/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.program.phred; import java.io.BufferedReader; import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; import java.util.NoSuchElementException; import java.util.Vector; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.io.ParseException; import org.biojava.bio.seq.io.SeqIOListener; import org.biojava.bio.seq.io.SequenceFormat; import org.biojava.bio.seq.io.StreamParser; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.IllegalSymbolException; import org.biojava.bio.symbol.IntegerAlphabet; import org.biojava.utils.ParseErrorEvent; import org.biojava.utils.ParseErrorListener; import org.biojava.utils.ParseErrorSource; /** * Format object representing Phred Quality files. * The only `sequence property' reported by this parser * is PROPERTY_DESCRIPTIONLINE, which is the contents of the * sequence's description line (the line starting with a '>' * character). * * Essentially a rework of FastaFormat to cope with the quirks of Phred Quality data.
* Copyright (c) 2001
* Company: AgResearch
* * @author Mark Schreiber * @author Greg Cox * @author Frans Verhoef * @since 1.1 */ public class PhredFormat implements SequenceFormat, ParseErrorSource, ParseErrorListener, Serializable { public static final String DEFAULT = "PHRED"; private Vector mListeners = new Vector(); /** * Constant string which is the property key used to notify * listeners of the description lines of Phred sequences. */ public final static String PROPERTY_DESCRIPTIONLINE = "description_line"; /** * The line width for output. */ private int lineWidth = 60; /** * Retrive the current line width. * * @return the line width */ public int getLineWidth() { return lineWidth; } /** * Set the line width. *
* When writing, the lines of sequence will never be longer than the line
* width.
*
* @param width the new line width
*/
public void setLineWidth(int width) {
this.lineWidth = width;
}
public boolean readSequence(BufferedReader reader,
SymbolTokenization symParser,
SeqIOListener siol)
throws IllegalSymbolException, IOException, ParseException {
String line = reader.readLine();
if (line == null) {
throw new IOException("Premature stream end");
}
if (!line.startsWith(">")) {
throw new IOException("Stream does not appear to contain Phred formatted data: " + line);
}
siol.startSequence();
String description = line.substring(1).trim();
siol.addSequenceProperty(PROPERTY_DESCRIPTIONLINE, description);
boolean seenEOF = readSequenceData(reader, symParser, siol);
siol.endSequence();
return !seenEOF;
}
private boolean readSequenceData(BufferedReader br,
SymbolTokenization parser,
SeqIOListener listener)
throws IOException, IllegalSymbolException {
char[] buffer = new char[256];
StreamParser sparser = parser.parseStream(listener);
boolean seenEOF = false; //reached the end of the file
boolean reachedEnd = false; //reached the end of this sequence
while(reachedEnd == false){// while more sequence
br.mark(buffer.length); // mark the read ahead limit
int bytesRead = br.read(buffer,0,buffer.length); // read into the buffer
while(Character.isDigit(buffer[buffer.length -1])){// may have ended halfway through a number
br.reset();// if so reset
buffer = new char[buffer.length+64]; //make the buffer a little bigger
br.mark(buffer.length); //mark the new read ahead limit
bytesRead = br.read(buffer,0,buffer.length); //read into buffer
}
if(bytesRead < 0){ //ie -1 indicates end of file
seenEOF = reachedEnd = true;
}else{ // otherwise
int parseEnd = 0;
// while more sequence and more chars in the buffer and not a new sequence
while(!reachedEnd && parseEnd < bytesRead && buffer[parseEnd] != '>'){
++parseEnd;
}
sparser.characters(buffer,0,parseEnd);
//If found the start of a new sequence
if(parseEnd < bytesRead && buffer[parseEnd] == '>'){
br.reset(); // reset the reader
// then skip the file reading pointer to the start of the new sequence ready for the
//next read (if required).
if(br.skip(parseEnd) != parseEnd) throw new IOException("Couldn't reset to start of next sequence");
reachedEnd = true; //found the end of this sequence.
}
}
}
sparser.close();
return seenEOF;
}
/**
* Return a suitable description line for a Sequence. If the
* sequence's annotation bundle contains PROPERTY_DESCRIPTIONLINE,
* this is used verbatim. Otherwise, the sequence's name is used.
*/
protected String describeSequence(Sequence seq) {
String description = null;
try {
description = seq.getAnnotation().getProperty(PROPERTY_DESCRIPTIONLINE).toString();
} catch (NoSuchElementException ex) {
description = seq.getName();
}
return description;
}
/**
* This method will print symbols to the line width followed by a
* new line etc. NOTE that an integer symbol does not always
* correspond to one character therefore a line width of sixty
* will print sixty characters followed by a new line. Not
* necessarily sixty integers.
*/
public void writeSequence(Sequence seq, PrintStream os)
throws IOException {
os.print(">");
os.println(describeSequence(seq));
StringBuffer line = new StringBuffer();
int seqLen = seq.length();
for (int i = 1; i <= seqLen; i++) {
int val = ((IntegerAlphabet.IntegerSymbol)seq.symbolAt(i)).intValue();
String s = Integer.toString(val);
if ((line.length() + s.length()) > lineWidth) {
os.println(line.substring(0));
line = new StringBuffer();
}
line.append(s + " ");
}
}
/**
* writeSequence
writes a sequence to the specified
* PrintStream
, using the specified format.
*
* @param seq a Sequence
to write out.
* @param format a String
indicating which sub-format
* of those available from a particular
* SequenceFormat
implemention to use when
* writing.
* @param os a PrintStream
object.
*
* @exception IOException if an error occurs.
* @deprecated use writeSequence(Sequence seq, PrintStream os)
*/
public void writeSequence(Sequence seq, String format, PrintStream os)
throws IOException {
if (! format.equalsIgnoreCase(getDefaultFormat()))
throw new IllegalArgumentException("Unknown format '"
+ format
+ "'");
writeSequence(seq, os);
}
/**
* getDefaultFormat
returns the String identifier for
* the default format.
*
* @return a String
.
* @deprecated
*/
public String getDefaultFormat() {
return DEFAULT;
}
/**
* Adds a parse error listener to the list of listeners if it isn't already
* included.
*
* @param theListener Listener to be added.
*/
public synchronized void addParseErrorListener(ParseErrorListener theListener) {
if (mListeners.contains(theListener) == false) {
mListeners.addElement(theListener);
}
}
/**
* Removes a parse error listener from the list of listeners if it is
* included.
*
* @param theListener Listener to be removed.
*/
public synchronized void removeParseErrorListener(ParseErrorListener theListener) {
if (mListeners.contains(theListener) == true) {
mListeners.removeElement(theListener);
}
}
/**
* This method determines the behaviour when a bad line is processed.
* Some options are to log the error, throw an exception, ignore it
* completely, or pass the event through.
*
* This method should be overwritten when different behavior is desired. * * @param theEvent The event that contains the bad line and token. */ public void BadLineParsed(org.biojava.utils.ParseErrorEvent theEvent) { notifyParseErrorEvent(theEvent); } // Protected methods /** * Passes the event on to all the listeners registered for ParseErrorEvents. * * @param theEvent The event to be handed to the listeners. */ protected void notifyParseErrorEvent(ParseErrorEvent theEvent) { Vector listeners; synchronized(this) { listeners = (Vector)mListeners.clone(); } for (int index = 0; index < listeners.size(); index++) { ParseErrorListener client = (ParseErrorListener)listeners.elementAt(index); client.BadLineParsed(theEvent); } } }