/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.seq.io; import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.StringTokenizer; import org.biojava.bio.seq.Feature; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.symbol.Alphabet; import org.biojava.bio.symbol.FuzzyLocation; import org.biojava.bio.symbol.IllegalAlphabetException; import org.biojava.bio.symbol.IllegalSymbolException; import org.biojava.bio.symbol.Location; import org.biojava.bio.symbol.PointLocation; import org.biojava.bio.symbol.RangeLocation; import org.biojava.bio.symbol.Symbol; /** * Formats a sequence into Swissprot/TrEMBL format. Modeled after * EmblFileFormer. * * @author Greg Cox * @since 1.2 * @deprecated Use org.biojavax.bio.seq.io framework instead */ public class SwissprotFileFormer extends AbstractGenEmblFileFormer implements SeqFileFormer { // Main qualifier formatting buffer private StringBuffer qb = new StringBuffer(); // Utility formatting buffer private StringBuffer ub = new StringBuffer(); // Buffers for each possible sequence property line private StringBuffer idb = null; private StringBuffer acb = null; private StringBuffer dtb = null; private StringBuffer deb = null; private StringBuffer svb = null; private StringBuffer kwb = null; private StringBuffer osb = null; private StringBuffer ocb = null; private StringBuffer ccb = null; private StringBuffer ftb = new StringBuffer(); // Static variables static int LOCATION_WIDTH = 6; // Member variables PrintStream mStream; // Constructors and initialization /** * Creates a new SwissprotFileFormer using * System.out stream. */ protected SwissprotFileFormer() { super(); this.mStream = System.out; } /** * Creates a new SwissprotFileFormer using the * specified stream. * * @param theStream a PrintStream object. */ protected SwissprotFileFormer(PrintStream theStream) { super(); this.mStream = theStream; } // Interface implementations // SeqIOListener methods /** * Start the processing of a sequence. This method exists primarily * to enforce the life-cycles of SeqIOListener objects. */ public void startSequence() throws ParseException { } /** * Notify the listener that processing of the sequence is complete. */ public void endSequence() throws ParseException { } /** * The name is printed out as part of the identifier line. It will be * replaced if an ID keyword exists in the annotations. * * @param theName the String that should be returned by getName for the sequence * being parsed */ public void setName(String theName) throws ParseException { idb = new StringBuffer("ID " + theName); } /** * Null implementation. This object formats and prints a sequence. The * URI alone cannot be printed in Swissprot format. Therefore, it's * easiest to ignore it. * @param theURI the new URI of the sequence */ public void setURI(String theURI) throws ParseException { } /** * Prints out the sequences properties in order. * Prints out the symbol array passed in in lines of 60, blocks of 10 * * @param theAlphabet The alphabet of the symbol data * @param theSymbols An array containing symbols * @param theStart The start offset of valid data within the array * @param theLength The number of valid symbols in the array * * @throws IllegalAlphabetException if we can't cope with this * alphabet. */ public void addSymbols(Alphabet theAlphabet, Symbol[] theSymbols, int theStart, int theLength) throws IllegalAlphabetException { PrintStream stream = this.getPrintStream(); // Print out all of the sequence properties in order if (idb != null) {stream.println(idb); stream.println("XX");} if (acb != null) {stream.println(acb); stream.println("XX");} if (svb != null) {stream.println(svb); stream.println("XX");} if (dtb != null) {stream.println(dtb); stream.println("XX");} if (deb != null) {stream.println(deb); stream.println("XX");} if (kwb != null) {stream.println(kwb); stream.println("XX");} if (osb != null) {stream.println(osb);} if (ocb != null) {stream.println(ocb); stream.println("XX");} if (ccb != null) {stream.println(ccb); stream.println("XX");} if (ftb.length() != 0) { stream.print(ftb); } this.printOutSequenceHeaderLine(theAlphabet, theSymbols, theStart, theLength); List brokenLines = this.breakSymbolArray(theAlphabet, theSymbols, theStart, theLength); java.util.Iterator iterator = brokenLines.iterator(); String leader = " "; while(iterator.hasNext()) { stream.print(leader + iterator.next() + nl); } stream.println("//"); } /** * Formats sequence properties into form suitable for printing to * file. * * @param key The key of the sequence property * @param value The value of the sequence property * * @returns Properly formated string */ private String sequenceBufferCreator(Object key, Object value) { StringBuffer temp = new StringBuffer(); if (value == null) { temp.append((String) key); } else if (value instanceof ArrayList) { Iterator iter = ((ArrayList) value).iterator(); while (iter.hasNext()) { temp.append((String) key + " " + iter.next()); if (iter.hasNext()) temp.append(nl); } } else { StringTokenizer valueToke = new StringTokenizer((String) value, " "); int fullline = 80; int length = 0; String token = valueToke.nextToken(); while (true) { temp.append((String) key + " "); length = (temp.length() % (fullline + 1)) + token.length() + 1; if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); while (length <= fullline && valueToke.hasMoreTokens()) { temp.append(" " + token); token = valueToke.nextToken(); length = (temp.length() % (fullline + 1)) + token.length() + 1; if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); } if (valueToke.hasMoreTokens()) { for(int i = length-token.length(); i < fullline; i++) { temp.append(" "); } temp.append(nl); } else if (length <= fullline) { temp.append(" " + token); break; } else { temp.append(nl); temp.append((String) key + " " + token); break; } } } return temp.toString(); } /** * Notify the listener of a sequence-wide property. This might * be stored as an entry in the sequence's annotation bundle. * Checks for possible known properties to be shown in the file. * * @param key Key the property will be stored under * @param value Value stored under the key */ public void addSequenceProperty(Object key, Object value) throws ParseException { if (key.equals("ID")) { idb.setLength(0); idb.append("ID " + (String) value); } else if (key.equals("DT") || key.equals("MDAT")) { dtb = new StringBuffer(sequenceBufferCreator("DT", value)); } else if (key.equals("DE") || key.equals("DEFINITION")) { deb = new StringBuffer(sequenceBufferCreator("DE", value)); } else if (key.equals("SV") || key.equals("VERSION")) { svb = new StringBuffer(sequenceBufferCreator("SV", value)); } else if (key.equals("KW") || key.equals("KEYWORDS")) { kwb = new StringBuffer(sequenceBufferCreator("KW", value)); } else if (key.equals("OS") || key.equals("SOURCE")) { osb = new StringBuffer(sequenceBufferCreator("OS", value)); } else if (key.equals("OC") || key.equals("ORGANISM")) { ocb = new StringBuffer(sequenceBufferCreator("OC", value)); } else if (key.equals("CC") || key.equals("COMMENT")) { ccb = new StringBuffer(sequenceBufferCreator("CC", value)); } else if (key.equals(SwissprotProcessor.PROPERTY_SWISSPROT_ACCESSIONS)) { acb = new StringBuffer(); acb.append("AC "); for (Iterator ai = ((List) value).iterator(); ai.hasNext();) { acb.append((String) ai.next()); acb.append(";"); } } } /** * Null implementation. * * @param templ The template for this new feature object */ public void startFeature(Feature.Template templ) throws ParseException { // There are 19 spaces in the leader String leader = "FT "; ub.setLength(0); ub.append(leader); StringBuffer lb = formatLocation(ub, templ.location); lb.replace(5, 5 + templ.type.length(), templ.type); ftb.append(lb + nl); } /** * Null implementation. */ public void endFeature() throws ParseException { } /** * Null implementation * * @param key Key the property will be stored under * @param value Value stored under the key */ public void addFeatureProperty(Object key, Object value) throws ParseException { // There are 19 spaces in the leader String leader = "FT "; // Don't print internal data structures if (key.equals(Feature.PROPERTY_DATA_KEY)) return; // The value may be a collection if several qualifiers of the // same type are present in a feature if (Collection.class.isInstance(value)) { for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();) { qb.setLength(0); ub.setLength(0); StringBuffer fb = formatQualifierBlock(qb, formatQualifier(ub, key, vi.next()).toString(), leader, 80); ftb.append(fb + nl); } } else { qb.setLength(0); ub.setLength(0); StringBuffer fb = formatQualifierBlock(qb, formatQualifier(ub, key, value).toString(), leader, 80); ftb.append(fb + nl); } } // SeqFileFormer methods /** * getPrintStream returns the * PrintStream to which an instance of SwissprotFileFormer * will write the formatted data. The default is System.out * * @return the PrintStream which will be written to. */ public PrintStream getPrintStream() { return(this.mStream); } /** * setPrintStream informs an instance which * PrintStream to use. * * @param theStream a PrintStream to write to. */ public void setPrintStream(PrintStream theStream) { this.mStream = theStream; } /** * formatLocation creates a String representation of * a Location. Strand information is ignored, as Swissprot * files represent proteins. An alternative form of this function does not * take a Strand; that form is available only on SwissprotFileFormer; it * is not part of the SeqFileFormer interface. * * @param theBuffer a StringBuffer to append the location * to. * @param theLocation a Location to format. * @param theStrand a StrandedFeature.Strand indicating nothing * of relevance * * @return a StringBuffer with the location appended. */ public StringBuffer formatLocation(StringBuffer theBuffer, Location theLocation, StrandedFeature.Strand theStrand) { return(this.formatLocation(theBuffer, theLocation)); } /** * Creates a string representation of the location of a feature * * @param theFeature The feature with the location to format * @return String The formatted location */ public String formatLocation(Feature theFeature) { StringBuffer toReturn = this.formatLocation(new StringBuffer(), theFeature.getLocation()); return toReturn.toString(); } // Public methods /** * formatLocation creates a String representation of * a Location. The stringbuffer returned represents columns * 15-27 of the Swissprot feature table entry. An alternative form of this * function takes a Strand; that form is part of the SeqFileFormer * interface. * * @param theBuffer a StringBuffer to append the location * to. * @param theLocation a Location to format. * * @return a StringBuffer with the location appended. */ public StringBuffer formatLocation(StringBuffer theBuffer, Location theLocation) { // Five Location cases, each treated seperately: // Point Location: " 5 5" // Range Location: " 5 10" // Fuzzy Location: " <5 10" // Fuzzy Location: " ? 10" // Fuzzy Location: " ?24 35" (Not in the current // specification, but used anyways StringBuffer startPoint = new StringBuffer(LOCATION_WIDTH); StringBuffer endPoint = new StringBuffer(LOCATION_WIDTH); if((theLocation instanceof PointLocation) || (theLocation instanceof RangeLocation)) { // Point Location: " 5 5" // Range Location: " 5 10" startPoint = formatPoint(theLocation.getMin(), theLocation.getMin(), false); endPoint = formatPoint(theLocation.getMax(), theLocation.getMax(), false); } else if(theLocation instanceof FuzzyLocation) { // Handle all fuzzy location types through the magic of delegation. // If you pass things around long enough, someone's bound to do it // for you FuzzyLocation tempLocation = (FuzzyLocation)theLocation; //System.out.println("OuterMin: " + tempLocation.getOuterMin()); //System.out.println("InnerMin: " + tempLocation.getInnerMin()); //System.out.println("InnerMax: " + tempLocation.getInnerMax()); //System.out.println("OuterMax: " + tempLocation.getOuterMax()); startPoint = this.formatPoint(tempLocation.getOuterMin(), tempLocation.getInnerMin(), tempLocation.isMinFuzzy()); endPoint = this.formatPoint(tempLocation.getInnerMax(), tempLocation.getOuterMax(), tempLocation.isMaxFuzzy()); } return new StringBuffer(startPoint.toString() + " " + endPoint.toString()); } // Protected methods /** * Prints out sequence header with only length data. * * @param theAlphabet The alphabet of the symbol data * @param theSymbols An array containing symbols * @param theStart The start offset of valid data within the array * @param theLength The number of valid symbols in the array * * @throws IllegalAlphabetException if we can't cope with this * alphabet. */ protected void printOutSequenceHeaderLine(Alphabet theAlphabet, Symbol[] theSymbols, int theStart, int theLength) throws IllegalAlphabetException { this.getPrintStream().println("SQ SEQUENCE " + theLength + " AA; "); } /** * Converts the symbol list passed in into an array of strings. The * strings will be blocks of ten, with six blocks on a line. * * @param theAlphabet The alphabet of the symbol data * @param theSymbols An array containing symbols * @param theStart The start offset of valid data within the array * @param theLength The number of valid symbols in the array * @return The symbol list passed in broken into blocks of ten * characters, six to a string. * * @throws IllegalAlphabetException if we can't cope with this * alphabet. */ protected List breakSymbolArray(Alphabet theAlphabet, Symbol[] theSymbols, int theStart, int theLength) throws IllegalAlphabetException { List returnList = new ArrayList(theLength / 60 + 1); int blockCount = 0; int blockIndex = 0; StringBuffer tempString = new StringBuffer(); SymbolTokenization tokenization; try { tokenization = theAlphabet.getTokenization("token"); } catch (Exception ex) { throw new IllegalAlphabetException(ex, "Couldn't get tokenization for this alphabet"); } for(int i = theStart; i < theStart + theLength; i++) { try { theAlphabet.validate(theSymbols[i]); } catch (IllegalSymbolException e) { throw new IllegalAlphabetException(e); } // Every six completed blocks, put on the stack to return if(blockIndex == 10) { tempString.append(' '); blockIndex = 0; blockCount++; } if(blockCount == 6) { returnList.add(tempString.substring(0)); tempString.setLength(0); blockCount = 0; blockIndex = 0; } try { tempString.append(tokenization.tokenizeSymbol(theSymbols[i])); } catch (IllegalSymbolException ex) { throw new IllegalAlphabetException(ex, "Couldn't tokenize symbols"); } blockIndex++; } // Add the last line on if(tempString.length() != 0) { returnList.add(tempString.substring(0)); } return returnList; } /** * Simple method that adds spaces onto the buffer passed in. This method * exists to refactor some code used in location formatting. It isn't * intended to be generally used. * * @param theBuffer Buffer to append whitespace to. * @param theLength Ammount of whitespace to append. */ protected void fillBuffer(StringBuffer theBuffer, int theLength) { for(int i = 0; i < theLength; i++) { theBuffer.append(' '); } } /** * Formats the points from fuzzy locations. This is called easily with * this.formatPoint(FuzzyLocation.getInnerMax(), FuzzyLocation.getOuterMax(), FuzzyLocation.isFuzzyMax()) * * @param theMaxIndex Inner index of the fuzzy point * @param theMinIndex Outer index of the fuzzy point * @param isFuzzy Indicates if this point is fuzzy */ protected StringBuffer formatPoint(int theMinIndex, int theMaxIndex, boolean isFuzzy) { StringBuffer bufferToReturn = new StringBuffer(LOCATION_WIDTH); if(isFuzzy == false) { String tempString = Integer.toString(theMinIndex); int offset = LOCATION_WIDTH - tempString.length(); this.fillBuffer(bufferToReturn, offset); bufferToReturn.append(tempString); } else { // MIN_VALUE to MAX_VALUE is the ? location regardless of which end is which if((theMinIndex == Integer.MIN_VALUE) && (theMaxIndex == Integer.MAX_VALUE)) { int offset = LOCATION_WIDTH - 1; this.fillBuffer(bufferToReturn, offset); bufferToReturn.append('?'); } // If the outer index is MIN_VALUE, that's n else if(theMaxIndex == Integer.MAX_VALUE) { String tempString = Integer.toString(theMinIndex); int offset = LOCATION_WIDTH - tempString.length() - 1; this.fillBuffer(bufferToReturn, offset); bufferToReturn.append('>'); bufferToReturn.append(tempString); } // The only swissprot location left is ?nn else if(theMinIndex == theMaxIndex) { String tempString = Integer.toString(theMinIndex); int offset = LOCATION_WIDTH - tempString.length() - 1; this.fillBuffer(bufferToReturn, offset); bufferToReturn.append('?'); bufferToReturn.append(tempString); } else { // The location cannot be formatted in Swissprot format // Revisit System.out.println("Error in formatPoint"); System.out.println("\tInner: " + theMinIndex); System.out.println("\tOuter: " + theMaxIndex); System.out.println("\tFuzzy: " + isFuzzy); } } return bufferToReturn; } // Private methods }