/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.seq.io; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.StringTokenizer; import org.biojava.bio.BioError; import org.biojava.bio.BioException; import org.biojava.bio.seq.DNATools; import org.biojava.bio.seq.Feature; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.symbol.Alphabet; import org.biojava.bio.symbol.IllegalAlphabetException; import org.biojava.bio.symbol.IllegalSymbolException; import org.biojava.bio.symbol.Symbol; /** * GenbankFileFormer performs the detailed formatting of * Genbank entries for writing to a PrintStream. There is * some code dupication with EmblFileFormer which could * be factored out. * * @author Keith James * @since 1.2 * @deprecated Use org.biojavax.bio.seq.io framework instead */ public class GenbankFileFormer extends AbstractGenEmblFileFormer implements SeqFileFormer { private PrintStream stream; // Main sequence formatting buffer private StringBuffer sq = new StringBuffer(); // Main qualifier formatting buffer private StringBuffer qb = new StringBuffer(); // Utility formatting buffer private StringBuffer ub = new StringBuffer(); // Buffers for each possible sequence property line private StringBuffer idb = null; private StringBuffer acb = null; private StringBuffer deb = null; private StringBuffer svb = null; private StringBuffer kwb = null; private StringBuffer osb = null; private StringBuffer ocb = null; private StringBuffer ccb = null; private Object rfb = null; private StringBuffer ftb = new StringBuffer(); // Locusline buffers private StringBuffer typeb = new StringBuffer(); private StringBuffer strb = new StringBuffer(); private StringBuffer sizeb = new StringBuffer(); private StringBuffer circb = new StringBuffer(); private StringBuffer mdatb = new StringBuffer(); private StringBuffer divb = new StringBuffer(); private SymbolTokenization dnaTokenization; //vector NTI requires a slightly different flavour of Genbank private boolean vecNTISupport = false; { try { dnaTokenization = DNATools.getDNA().getTokenization("token"); } catch (BioException ex) { throw new BioError("Couldn't initialize tokenizer for the DNA alphabet",ex); } } /** * Creates a new GenbankFileFormer using * System.out stream. */ protected GenbankFileFormer() { this(System.out); } /** * Creates a new GenbankFileFormer using the * specified stream. * * @param stream a PrintStream. */ protected GenbankFileFormer(PrintStream stream) { this.stream = stream; } public PrintStream getPrintStream() { return stream; } public void setPrintStream(PrintStream stream) { this.stream = stream; } public void setName(String id) throws ParseException { idb = new StringBuffer("LOCUS " + id); } public void startSequence() throws ParseException { } public void endSequence() throws ParseException { } public void setURI(String uri) throws ParseException { } public void addSymbols(Alphabet alpha, Symbol [] syms, int start, int length) throws IllegalAlphabetException { try { int aCount = 0; int cCount = 0; int gCount = 0; int tCount = 0; int oCount = 0; int end = start + length - 1; for (int i = start; i <= end; i++) { char c = dnaTokenization.tokenizeSymbol(syms[i]).charAt(0); switch (c) { case 'a': case 'A': aCount++; break; case 'c': case 'C': cCount++; break; case 'g': case 'G': gCount++; break; case 't': case 'T': tCount++; break; default: oCount++; } } // FIXME: (kj) shouldn't be printing sequence properties // in addSymbols method. If you filter out symbols you // lose all sequence properties too. // Print out sequence properties in order locusLineCreator(length); if (idb != null) {stream.println(idb); } if (acb != null) {stream.println(acb); } if (svb != null) {stream.println(svb); } if (deb != null) {stream.println(deb); } if (kwb != null) {stream.println(kwb); } if (osb != null) {stream.println(osb); } if (ocb != null) {stream.println(ocb); } if (ccb != null) {stream.println(ccb); } if (rfb != null) {//RichardH if (rfb instanceof List) { Iterator i = ((List)rfb).iterator(); while (i.hasNext()) { stream.println((StringBuffer)i.next()); } } else { stream.println(rfb); } } if (ftb.length() != 0) { ftb.insert(0, "FEATURES Location/Qualifiers" + nl); stream.print(ftb); } sq.setLength(0); sq.append("BASE COUNT "); sq.append(aCount + " a "); sq.append(cCount + " c "); sq.append(gCount + " g "); sq.append(tCount + " t "); sq.append(oCount + " others"); sq.append(nl); sq.append("ORIGIN"); // Print sequence summary header stream.println(sq); int fullLine = length / 60; int partLine = length % 60; int lineCount = fullLine; if (partLine > 0) lineCount++; int lineLens [] = new int [lineCount]; // All lines are 60, except last (if present) Arrays.fill(lineLens, 60); if (partLine > 0) lineLens[lineCount - 1] = partLine; // Prepare line 80 characters wide, sequence is subset of this char [] emptyLine = new char [80]; for (int i = 0; i < lineLens.length; i++) { sq.setLength(0); ub.setLength(0); // How long is this chunk? int len = lineLens[i]; // Prep the whitespace Arrays.fill(emptyLine, ' '); sq.append(emptyLine); // Prepare a Symbol array same length as chunk Symbol [] sa = new Symbol [len]; // Get symbols and format into blocks of tokens System.arraycopy(syms, start + (i * 60), sa, 0, len); String blocks = (formatTokenBlock(ub, sa, 10, dnaTokenization)).toString(); sq.replace(10, blocks.length() + 10, blocks); // Calculate the running residue count and add to the line String count = Integer.toString((i * 60) + 1); sq.replace((9 - count.length()), 9, count); // Print formatted sequence line stream.println(sq); } // Print end of entry stream.println("//"); } catch (IllegalSymbolException ex) { throw new IllegalAlphabetException(ex, "DNA not tokenizing"); } } public void addSequenceProperty(Object key, Object value) throws ParseException { if (key.equals("LOCUS")) { idb.setLength(0); idb.append("LOCUS " + (String) value); } else if (key.equals("TYPE")) { typeb.append(value); } else if (key.equals("DIVISION")) { divb.append(value); } else if (key.equals("CIRCULAR")) { circb.append(value); } else if (key.equals("DT") || key.equals("MDAT")) { if (value instanceof ArrayList) { mdatb.append(((ArrayList) value).get(0)); } else { mdatb.append(value); } } else if (key.equals("DE") || key.equals("DEFINITION")) { deb = new StringBuffer(sequenceBufferCreator("DEFINITION ", value)); } else if (key.equals("SV") || key.equals("VERSION")) { if (svb != null) { svb.insert(11, (String) value); } else { svb = new StringBuffer("VERSION " + (String) value); } } else if (key.equals("GI")) { if (svb != null) { svb.append(" GI:" + (String) value); } else { svb = new StringBuffer("VERSION GI:" + (String) value); } } else if (key.equals("KW") || key.equals("KEYWORDS")) { kwb = new StringBuffer(sequenceBufferCreator("KEYWORDS ", value)); } else if (key.equals("OS") || key.equals("SOURCE")) { osb = new StringBuffer(sequenceBufferCreator("SOURCE ", value)); } else if (key.equals("OC") || key.equals("ORGANISM")) { ocb = new StringBuffer(sequenceBufferCreator(" ORGANISM ", value)); } else if (key.equals("CC") || key.equals("COMMENT")) { ccb = new StringBuffer(sequenceBufferCreator("COMMENT ", value)); } else if (key.equals(GenbankProcessor.PROPERTY_GENBANK_ACCESSIONS)) { ub.setLength(0); ub.append("ACCESSION "); if(value instanceof List) { for (Iterator ai = ((List) value).iterator(); ai.hasNext();) { ub.append((String) ai.next()); } } else { ub.append(value); } acb = new StringBuffer(ub.substring(0)); } // GenBank-style References by RichardH // FIXME: (rh) Understand EMBL-style references and ReferenceAnnotation objects here too. else if (key.equals("REFERENCE")) { if (value instanceof List) { List rfbs = new ArrayList(); List refs = (List)value; Iterator i = refs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = new StringBuffer(sequenceBufferCreator("REFERENCE ",v)); rfbs.add(rfb1); } rfb = rfbs; } else { rfb = new StringBuffer(sequenceBufferCreator("REFERENCE ",value)); } } else if (key.equals("AUTHORS")) { if (value instanceof List) { List rfbs = (List)rfb; List refs = (List)value; Iterator i = refs.iterator(); Iterator j = rfbs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = (StringBuffer)j.next(); rfb1.append("\n"+sequenceBufferCreator(" AUTHORS ",v)); } } else { if (rfb instanceof List) { ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" AUTHORS ",value)); } else { ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" AUTHORS ",value)); } } } else if (key.equals("TITLE")) { if (value instanceof List) { List rfbs = (List)rfb; List refs = (List)value; Iterator i = refs.iterator(); Iterator j = rfbs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = (StringBuffer)j.next(); rfb1.append("\n"+sequenceBufferCreator(" TITLE ",v)); } } else { if (rfb instanceof List) { ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" TITLE ",value)); } else { ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" TITLE ",value)); } } } else if (key.equals("JOURNAL")) { if (value instanceof List) { List rfbs = (List)rfb; List refs = (List)value; Iterator i = refs.iterator(); Iterator j = rfbs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = (StringBuffer)j.next(); rfb1.append("\n"+sequenceBufferCreator(" JOURNAL ",v)); } } else { if (rfb instanceof List) { ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" JOURNAL ",value)); } else { ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" JOURNAL ",value)); } } } else if (key.equals("PUBMED")) { if (value instanceof List) { List rfbs = (List)rfb; List refs = (List)value; Iterator i = refs.iterator(); Iterator j = rfbs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = (StringBuffer)j.next(); rfb1.append("\n"+sequenceBufferCreator(" PUBMED ",v)); } } else { if (rfb instanceof List) { ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" PUBMED ",value)); } else { ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" PUBMED ",value)); } } } else if (key.equals("MEDLINE")) { if (value instanceof List) { List rfbs = (List)rfb; List refs = (List)value; Iterator i = refs.iterator(); Iterator j = rfbs.iterator(); while (i.hasNext()) { String v = (String)i.next(); StringBuffer rfb1 = (StringBuffer)j.next(); rfb1.append("\n"+sequenceBufferCreator(" MEDLINE ",v)); } } else { if (rfb instanceof List) { ((StringBuffer)((List)rfb).get(0)).append("\n"+sequenceBufferCreator(" MEDLINE ",value)); } else { ((StringBuffer)rfb).append("\n"+sequenceBufferCreator(" MEDLINE ",value)); } } } } public void startFeature(Feature.Template templ) throws ParseException { // There are 21 spaces in the leader String leader = " "; int strand = 0; if (templ instanceof StrandedFeature.Template) strand = ((StrandedFeature.Template) templ).strand.getValue(); ub.setLength(0); ub.append(leader); StringBuffer lb = formatLocationBlock(ub, templ.location, strand, leader, 80); lb.replace(5, 5 + templ.type.length(), templ.type); ftb.append(lb + nl); } public void endFeature() throws ParseException { } public void addFeatureProperty(Object key, Object value) throws ParseException { // There are 21 spaces in the leader String leader = " "; // Don't print internal data structures if (key.equals(Feature.PROPERTY_DATA_KEY)) return; // The value may be a collection if several qualifiers of the // same type are present in a feature if (Collection.class.isInstance(value)) { for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();) { qb.setLength(0); ub.setLength(0); StringBuffer fb = formatQualifierBlock(qb, formatQualifier(ub, key, vi.next()).substring(0), leader, 80); ftb.append(fb + nl); } } else { qb.setLength(0); ub.setLength(0); StringBuffer fb = formatQualifierBlock(qb, formatQualifier(ub, key, value).substring(0), leader, 80); ftb.append(fb + nl); } } /** * VectorNTI requires GenBank format to be a little more specific than * required by the GenBank definition. By setting this to true the produced * output should be parsable by VectorNTI. By default this is false. * * @param b to support or not to support. */ public void setVectorNTISupport(boolean b){ vecNTISupport = b; } /** * Is VectorNTI compatable output being produced? * @return false by default. */ public boolean getVectorNTISupport(){ return vecNTISupport; } private String sequenceBufferCreator(Object key, Object value) { StringBuffer temp = new StringBuffer(); if (value == null) { temp.append(key.toString()); } else if (value instanceof ArrayList) { Iterator iter = ((ArrayList) value).iterator(); temp.append(key.toString() + " " + iter.next()); while (iter.hasNext()) { if (vecNTISupport) { temp.append(nl + key.toString() +" " + iter.next()); } else { temp.append(nl + " " + iter.next()); } } } else { // FIXME: (kj) unsafe cast to String StringTokenizer valueToke = new StringTokenizer((String) value, " "); int fullline = 80; int length = 0; // FIXME: (kj) unsafe cast to String temp.append((String) key); if (valueToke.hasMoreTokens()) { String token = valueToke.nextToken(); while (true) { length = (temp.length() % (fullline + 1)) + token.length() + 1; if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); while (length <= fullline && valueToke.hasMoreTokens()) { temp.append(" " + token); token = valueToke.nextToken(); length = (temp.length() % (fullline + 1)) + token.length() + 1; if (temp.length() % (fullline + 1) == 0) length = 81 + token.length(); } if (valueToke.hasMoreTokens()) { for(int i = length-token.length(); i < fullline; i++) { temp.append(" "); } temp.append(nl + " "); } else if (length <= fullline) { temp.append(" " + token); break; } else { temp.append(nl); temp.append(" " + token); break; } } } else { temp.append(" "); } } return temp.substring(0); } private StringBuffer fixLength(StringBuffer temp, int length) { // FIXME: (kj) check performance while (temp.length() < length) { temp.append(" "); } return temp; } private void locusLineCreator(int size) { idb = fixLength(idb, 30); typeb = fixLength(typeb, 8); sizeb.insert(0, size); while(sizeb.length() < 12) {sizeb.insert(0, " ");} sizeb.append(" bp "); if (strb.length() > 0) { strb.append("-"); } strb = fixLength(strb, 3); circb = fixLength(circb, 9); mdatb = fixLength(mdatb, 11); divb = fixLength(divb, 4); idb.insert(29, sizeb); idb.insert(44, strb); idb.insert(47, typeb); idb.insert(55, circb); idb.insert(64, divb); idb.insert(68, mdatb); idb.setLength(79); } }