/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.seq.io; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; import org.biojava.bio.BioException; import org.biojava.bio.seq.Feature; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.symbol.Alphabet; import org.biojava.bio.symbol.IllegalAlphabetException; import org.biojava.bio.symbol.Symbol; import org.biojava.bio.taxa.EbiFormat; import org.biojava.bio.taxa.Taxon; /** *
EmblFileFormer
performs the detailed formatting of
* EMBL entries for writing to a PrintStream
. Currently
* the formatting of the header is not correct. This really needs to
* be addressed in the parser which is merging fields which should
* remain separate.
The event generator used to feed events to this class should
* enforce ordering of those events. This class will stream data
* directly to the PrintStream
This implementation requires that all the symbols be added in * one block as is does not buffer the tokenized symbols between * calls.
* * @author Keith James * @author Len Trigg (Taxon output) * @author Lorna Morris * @since 1.2 * @deprecated Use org.biojavax.bio.seq.io framework instead */ public class EmblFileFormer extends AbstractGenEmblFileFormer implements SeqFileFormer { // Tags which are special cases, not having "XX" after them private static List NON_SEPARATED_TAGS = new ArrayList(); static { NON_SEPARATED_TAGS.add(EmblLikeFormat.SOURCE_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.REFERENCE_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.COORDINATE_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_ACCESSION_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.AUTHORS_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.TITLE_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.FEATURE_TAG); NON_SEPARATED_TAGS.add(EmblLikeFormat.JOURNAL_TAG);//Lorna: added NON_SEPARATED_TAGS.add(EmblLikeFormat.REF_XREF_TAG);//RichardH: added NON_SEPARATED_TAGS.add(EmblLikeFormat.SEPARATOR_TAG);//Lorna: added } // 19 spaces private static String FT_LEADER = EmblLikeFormat.FEATURE_TABLE_TAG + " "; // 3 spaces private static String SQ_LEADER = " "; // 80 spaces private static String EMPTY_LINE = " " + " "; private PrintStream stream; private String accLine; /** * Creates a newEmblFileFormer
using
* System.out
stream.
*/
protected EmblFileFormer()
{
this(System.out);
}
/**
* Creates a new EmblFileFormer
using the specified
* stream.
*
* @param stream a PrintStream
.
*/
protected EmblFileFormer(PrintStream stream)
{
super();
this.stream = stream;
}
public PrintStream getPrintStream()
{
return stream;
}
public void setPrintStream(PrintStream stream)
{
this.stream = stream;
}
public void setName(String id) throws ParseException
{
}
public void startSequence() throws ParseException
{
aCount = 0;
cCount = 0;
gCount = 0;
tCount = 0;
oCount = 0;
}
public void endSequence() throws ParseException
{
stream.println(EmblLikeFormat.END_SEQUENCE_TAG);
}
public void setURI(String uri) throws ParseException { }
public void addSymbols(Alphabet alpha,
Symbol [] syms,
int start,
int length)
throws IllegalAlphabetException
{
try
{
int end = start + length - 1;
for (int i = start; i <= end; i++)
{
Symbol sym = syms[i];
if (sym == a)
aCount++;
else if (sym == c)
cCount++;
else if (sym == g)
gCount++;
else if (sym == t)
tCount++;
else
oCount++;
}
StringBuffer sb = new StringBuffer(EmblLikeFormat.SEPARATOR_TAG);
sb.append(nl);
sb.append("SQ Sequence ");
sb.append(length + " BP; ");
sb.append(aCount + " A; ");
sb.append(cCount + " C; ");
sb.append(gCount + " G; ");
sb.append(tCount + " T; ");
sb.append(oCount + " other;");
// Print sequence summary header
stream.println(sb);
int fullLine = length / 60;
int partLine = length % 60;
int lineCount = fullLine;
if (partLine > 0)
lineCount++;
int lineLens [] = new int [lineCount];
// All lines are 60, except last (if present)
Arrays.fill(lineLens, 60);
if (partLine > 0)
lineLens[lineCount - 1] = partLine;
for (int i = 0; i < lineLens.length; i++)
{
// Prep the whitespace
StringBuffer sq = new StringBuffer(EMPTY_LINE);
// How long is this chunk?
int len = lineLens[i];
// Prepare a Symbol array same length as chunk
Symbol [] sa = new Symbol [len];
// Get symbols and format into blocks of tokens
System.arraycopy(syms, start + (i * 60), sa, 0, len);
sb = new StringBuffer();
String blocks = (formatTokenBlock(sb, sa, 10,
alpha.getTokenization("token"))).toString();
sq.replace(5, blocks.length() + 5, blocks);
// Calculate the running residue count and add to the line
String count = Integer.toString((i * 60) + len);
sq.replace((80 - count.length()), 80, count);
// Print formatted sequence line
stream.println(sq);
}
}
catch (BioException ex)
{
throw new IllegalAlphabetException(ex, "Alphabet not tokenizing");
}
}
public void addSequenceProperty(Object key, Object value)
throws ParseException
{
StringBuffer sb = new StringBuffer();
// Ignore separators if they are sent to us. The parser should
// be ignoring these really (lorna: I've changed this so they are ignored in SeqIOEventEmitter)
//if (key.equals(EmblLikeFormat.SEPARATOR_TAG))
//return;
String tag = key.toString();
String leader = tag + SQ_LEADER;
String line = "";
int wrapWidth = 85 - leader.length();
// Special case: accession number
if (key.equals(EmblProcessor.PROPERTY_EMBL_ACCESSIONS))
{
accLine = buildPropertyLine((Collection) value, ";", true);
return;
}
else if (key.equals(EmblLikeFormat.ACCESSION_TAG))
{
line = accLine;
} else if (key.equals(OrganismParser.PROPERTY_ORGANISM)) {
Taxon taxon = (Taxon) value;
addSequenceProperty(EmblLikeFormat.SOURCE_TAG, taxon);
addSequenceProperty(EmblLikeFormat.ORGANISM_TAG, taxon.getParent());
addSequenceProperty(EmblLikeFormat.ORGANISM_XREF_TAG, taxon);
return;
}
if (value instanceof String)
{
line = (String) value;
}
else if (value instanceof Collection)
{
// Special case: date lines
if (key.equals(EmblLikeFormat.DATE_TAG))
{
line = buildPropertyLine((Collection) value, nl + leader, false);
wrapWidth = Integer.MAX_VALUE;
}
//lorna :added 21.08.03, DR lines are another special case. Each one goes onto a separate line.
else if (key.equals(EmblLikeFormat.DR_TAG))
{
line = buildPropertyLine((Collection) value, nl + leader, false);
wrapWidth = Integer.MAX_VALUE;
}
else if (key.equals(EmblLikeFormat.AUTHORS_TAG))
{
line = buildPropertyLine((Collection) value, nl + leader, false); //lorna: add space here?
wrapWidth = Integer.MAX_VALUE;
}
else if (key.equals(EmblLikeFormat.REF_ACCESSION_TAG))
{
line = buildPropertyLine((Collection) value, nl + leader, false);
wrapWidth = Integer.MAX_VALUE;
}
else
{
line = buildPropertyLine((Collection) value, " ", false);
}
} else if (value instanceof Taxon) {
if (key.equals(EmblLikeFormat.ORGANISM_TAG)) {
line = EbiFormat.getInstance().serialize((Taxon) value);
} else if (key.equals(EmblLikeFormat.SOURCE_TAG)) {
line = EbiFormat.getInstance().serializeSource((Taxon) value);
} else if (key.equals(EmblLikeFormat.ORGANISM_XREF_TAG)) {
line = EbiFormat.getInstance().serializeXRef((Taxon) value);
}
}
if (line.length() == 0)
{
stream.println(tag);
}
else
{
sb = formatSequenceProperty(sb, line, leader, wrapWidth);
stream.println(sb);
}
// Special case: those which don't get separated
if (! NON_SEPARATED_TAGS.contains(key))
stream.println(EmblLikeFormat.SEPARATOR_TAG);
// Special case: feature header
if (key.equals(EmblLikeFormat.FEATURE_TAG))
stream.println(EmblLikeFormat.FEATURE_TAG);
}
public void startFeature(Feature.Template templ)
throws ParseException
{
int strand = 0;
if (templ instanceof StrandedFeature.Template)
strand = ((StrandedFeature.Template) templ).strand.getValue();
StringBuffer sb = new StringBuffer(FT_LEADER);
sb = formatLocationBlock(sb, templ.location, strand, FT_LEADER, 80);
sb.replace(5, 5 + templ.type.length(), templ.type);
stream.println(sb);
}
public void endFeature() throws ParseException { }
public void addFeatureProperty(Object key, Object value)
{
// Don't print internal data structures
if (key.equals(Feature.PROPERTY_DATA_KEY))
return;
StringBuffer fb;
StringBuffer sb;
// The value may be a collection if several qualifiers of the
// same type are present in a feature
if (value instanceof Collection)
{
for (Iterator vi = ((Collection) value).iterator(); vi.hasNext();)
{
fb = new StringBuffer();
sb = new StringBuffer();
fb = formatQualifierBlock(fb,
formatQualifier(sb, key, vi.next()).substring(0),
FT_LEADER,
80);
stream.println(fb);
}
}
else
{
fb = new StringBuffer();
sb = new StringBuffer();
fb = formatQualifierBlock(fb,
formatQualifier(sb, key, value).substring(0),
FT_LEADER,
80);
stream.println(fb);
}
}
private String buildPropertyLine(Collection property,
String separator,
boolean terminate)
{
StringBuffer sb = new StringBuffer();
for (Iterator pi = property.iterator(); pi.hasNext();)
{
sb.append(pi.next().toString());
sb.append(separator);
}
if (terminate)
{
return sb.substring(0);
}
else
{
return sb.substring(0, sb.length() - separator.length());
}
}
}