/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.program.gff; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.StringTokenizer; import org.biojava.bio.BioException; import org.biojava.bio.seq.StrandedFeature; import org.biojava.utils.ParserException; import org.biojava.utils.SmallMap; /** * Parse a stream of GFF text into a stream of records and comments. * * @author Matthew Pocock * @author Thomas Down * @author Keith James (docs) */ public class GFFParser { private GFFErrorHandler errors = GFFErrorHandler.ABORT_PARSING; /** * Set the error handler used by this parser. */ public void setErrorHandler(GFFErrorHandler errors) { this.errors = errors; } /** * Find the error handler used by this parser. */ public GFFErrorHandler getErrorHandler() { return errors; } /** * Informs handler of each line of * gff read from bReader. This form * of the method should only be used if no locator string is * available for the resource being parsed. * * @param bReader the BufferedReader to parse * @param handler the GFFDocumentHandler that will * listen for 'stuff' * * @throws IOException if for any reason * bReader throws one * @throws BioException if * handler can not correct a parse error */ public void parse(BufferedReader bReader, GFFDocumentHandler handler) throws IOException, BioException, ParserException { parse(bReader, handler, "unknown:"); } /** * Informs handler of each line of * GFF read from bReader * * @param bReader the BufferedReader to parse * @param handler the GFFDocumentHandler that will * listen for 'stuff' * * @throws IOException if for any reason * bReader throws one * @throws BioException if * handler can not correct a parse error */ public void parse(BufferedReader bReader, GFFDocumentHandler handler, String locator) throws IOException, BioException, ParserException { handler.startDocument(locator); ArrayList aList = new ArrayList(); int lineNum = 0; for(String line = bReader.readLine(); line != null; line = bReader.readLine()) { ++lineNum; try { aList.clear(); if(line.startsWith("#")) { handler.commentLine(line.substring(1)); } else if (line.length() == 0) { } else { StringTokenizer st = new StringTokenizer(line, "\t", false); while(st.hasMoreTokens() && aList.size() < 8) { String token = st.nextToken(); aList.add(token); } if(aList.size() < 7) { throw new ParserException( "Line doesn't look like GFF", locator, lineNum, line ); } String rest = null; String comment = null; if(st.hasMoreTokens()) { try { rest = st.nextToken(((char) 0) + ""); } catch (NoSuchElementException nsee) { } } if(rest != null) { int ci = rest.indexOf("#"); if (ci != -1) { comment = rest.substring(ci); rest = rest.substring(0, ci); } } GFFRecord record = createRecord(handler, aList, rest, comment); handler.recordLine(record); } } catch (ParserException ex) { throw new ParserException(ex.getMessage(), locator, lineNum, line); } catch (IgnoreRecordException ex) { // Silently skip any more work on this record } } handler.endDocument(); } /** * Actually turns a list of tokens, some value string and a comment into a * GFFRecord and informs * handler. * * @param handler a GFFDocumentHandler to inform of * any parse errors, and the completed GFFRecord * @param aList a List containing the 8 mandatory GFF columns * @param rest a String representing the unparsed * attribute-value text, or null if there is none * @param comment a String containing the comment (without the * leading '#' character. * @throws BioException if handler * could not correct a parse error */ protected GFFRecord createRecord(GFFDocumentHandler handler, List aList, String rest, String comment) throws BioException, ParserException, IgnoreRecordException { SimpleGFFRecord record = new SimpleGFFRecord(); record.setSeqName((String) aList.get(0)); record.setSource((String) aList.get(1)); record.setFeature((String) aList.get(2)); int start = -1; try { start = Integer.parseInt( (String) aList.get(3)); } catch (NumberFormatException nfe) { start = errors.invalidStart((String) aList.get(3)); } record.setStart(start); int end = -1; try { end = Integer.parseInt( (String) aList.get(4)); } catch (NumberFormatException nfe) { end = errors.invalidEnd((String) aList.get(3)); } record.setEnd(end); String score = (String) aList.get(5); if(score == null || score.equals("") || score.equals(".") || score.equals("0") ) { record.setScore(GFFTools.NO_SCORE); } else { double sc = 0.0; try { sc = Double.parseDouble(score); } catch (NumberFormatException nfe) { sc = errors.invalidScore(score); } record.setScore(sc); } String strand = (String) aList.get(6); if(strand == null || strand.equals("") || strand.equals(".")) { record.setStrand(StrandedFeature.UNKNOWN); } else { if(strand.equals("+")) { record.setStrand(StrandedFeature.POSITIVE); } else if(strand.equals("-")) { record.setStrand(StrandedFeature.NEGATIVE); } else { record.setStrand(errors.invalidStrand(strand)); } } String frame = (String) aList.get(7); if(frame.equals(".")) { record.setFrame(GFFTools.NO_FRAME); } else { int fr = 0; try { fr = Integer.parseInt(frame); } catch (NumberFormatException nfe) { fr = errors.invalidFrame(frame); } record.setFrame(fr); } if (rest != null) record.setGroupAttributes(parseAttribute(rest)); else record.setGroupAttributes(new SmallMap()); record.setComment(comment); return record; } /** * Parse attValList into a * Map of attributes and value lists. *

* The resulting Map will have * String keys, with * List values. If there are no values * associated with a key, then it will have an empty * List, not null as * its value. * * @param attValList the String to parse * @return a Map of parsed attributes and value lists */ protected Map parseAttribute(String attValList) { Map attMap = new SmallMap(); StringTokenizer sTok = new StringTokenizer(attValList, ";", false); while(sTok.hasMoreTokens()) { String attVal = sTok.nextToken().trim(); String attName; List valList = new ArrayList(); int spaceIndx = attVal.indexOf(" "); if(spaceIndx == -1) { attName = attVal; } else { attName = attVal.substring(0, spaceIndx); attValList = attVal.substring(spaceIndx).trim(); while(attValList.length() > 0) { if(attValList.startsWith("\"")) { // System.out.println("Quoted"); int quoteIndx = 0; do { quoteIndx++; quoteIndx = attValList.indexOf("\"", quoteIndx); } while(quoteIndx != -1 && attValList.charAt(quoteIndx-1) == '\\'); if(quoteIndx > 0){ valList.add(attValList.substring(1, quoteIndx)); attValList = attValList.substring(quoteIndx+1).trim(); }else{ valList.add(attValList); attValList = ""; } } else { spaceIndx = attValList.indexOf(" "); if(spaceIndx == -1) { valList.add(attValList); attValList = ""; } else { valList.add(attValList.substring(0, spaceIndx)); attValList = attValList.substring(spaceIndx).trim(); } } } } attMap.put(attName, valList); } return attMap; } }