/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.program.gff; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import org.biojava.bio.BioError; import org.biojava.bio.BioException; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator; import org.biojava.bio.seq.db.IllegalIDException; import org.biojava.bio.seq.db.SequenceDB; import org.biojava.utils.ChangeVetoException; import org.biojava.utils.ParserException; /** * @author Mark Schreiber * @author Matthew Pocock * @since 1.2 */ public class GFFTools { /** * Flag to indicate that there is no score info. */ public static double NO_SCORE = Double.NEGATIVE_INFINITY; /** * Flag to indicate that there is no frame info. */ public static int NO_FRAME = -1; /** * Reads a GFFEntrySet from a file with no filtering. * * @param fileName the file containing the GFF * @throws FileNotFoundException if file is not found * @throws ParserException if format is wrong * @throws BioException if format is wrong * @throws IOException if file reading error occurs * @return a GFFEntrySet encapsulating the records read from the file * @deprecated use: readGff(File) */ public static GFFEntrySet readGFF(String fileName) throws FileNotFoundException, ParserException, BioException, IOException { return readGFF(fileName, GFFRecordFilter.ACCEPT_ALL); } /** * Reads a GFFEntrySet from a file with the specified filter. * * @param fileName the file containing the GFF * @param recFilt the filter to use * @throws FileNotFoundException if file is not found * @throws ParserException if format is wrong * @throws BioException if format is wrong * @throws IOException if file reading error occurs * @return a GFFEntrySet encapsulating the records read from the file * @deprecated use: readGff(File,GFFRecordFilter) */ public static GFFEntrySet readGFF(String fileName, GFFRecordFilter recFilt) throws FileNotFoundException, ParserException, BioException, IOException { GFFEntrySet gffEntries = new GFFEntrySet(); GFFFilterer filterer = new GFFFilterer(gffEntries.getAddHandler(),recFilt); GFFParser parser = new GFFParser(); parser.parse(new BufferedReader(new FileReader(fileName)),filterer); return gffEntries; } /** * Reads a GFFEntrySet from a file with no filtering. * * @param inFile the File containing the GFF * @throws FileNotFoundException if file is not found * @throws ParserException if format is wrong * @throws BioException if format is wrong * @throws IOException if file reading error occurs * @return a GFFEntrySet encapsulating the records read from the file */ public static GFFEntrySet readGFF(File inFile) throws FileNotFoundException, ParserException, BioException, IOException { return readGFF(inFile, GFFRecordFilter.ACCEPT_ALL); } /** * Reads a GFFEntrySet from a file with the specified filter. * * @param inFile the File containing the GFF * @param recFilt the filter to use * @throws FileNotFoundException if file is not found * @throws ParserException if format is wrong * @throws BioException if format is wrong * @throws IOException if file reading error occurs * @return a GFFEntrySet encapsulating the records read from the file */ public static GFFEntrySet readGFF(File inFile, GFFRecordFilter recFilt) throws FileNotFoundException, ParserException, BioException, IOException { GFFEntrySet gffEntries = new GFFEntrySet(); GFFFilterer filterer = new GFFFilterer(gffEntries.getAddHandler(),recFilt); GFFParser parser = new GFFParser(); parser.parse(new BufferedReader(new FileReader(inFile)),filterer); return gffEntries; } /** * Read all GFF entries from a buffered reader. * * This will read up untill the end of the reader. * * @param gffIn the BufferedReader to read text from * @return a GFFEntrySet containing all of the GFF that could be read * @throws parserException if the text could not be parsed as GFF * @throws BioException if there was some error reading the GFF * @throws IOException if there was an error with the reader */ public static GFFEntrySet readGFF(BufferedReader gffIn) throws ParserException, BioException, IOException { return readGFF(gffIn, GFFRecordFilter.ACCEPT_ALL); } /** * Read all GFF entries matching a filter from a buffered reader. * * This will read up untill the end of the reader. * * @param gffIn the BufferedReader to read text from * @return a GFFEntrySet containing all of the GFF that could be read * @throws parserException if the text could not be parsed as GFF * @throws BioException if there was some error reading the GFF * @throws IOException if there was an error with the reader */ public static GFFEntrySet readGFF(BufferedReader gffIn, GFFRecordFilter recFilt) throws ParserException, BioException, IOException { GFFEntrySet gffEntries = new GFFEntrySet(); GFFFilterer filterer = new GFFFilterer(gffEntries.getAddHandler(),recFilt); GFFParser parser = new GFFParser(); parser.parse(gffIn, filterer); return gffEntries; } /** * Writes a GFFEntrySet to a file. * * @param fileName the file to write to * @param ents the entries to write * @throws IOException if file writing fails */ public static void writeGFF(String fileName, GFFEntrySet ents) throws IOException { PrintWriter pw = new PrintWriter(new FileWriter(fileName)); writeGFF(pw, ents); pw.close(); } /** * Writes a GFFEntrySet to a file. * * @param outFile the file to write to * @param ents the entry set to write * @throws IOException if writing to the file fails */ public static void writeGFF(File outFile, GFFEntrySet ents) throws IOException { PrintWriter pw = new PrintWriter(new FileWriter(outFile)); writeGFF(pw, ents); pw.close(); } /** * Writes a GFFEntrySet to a PrintWriter. * * @param pw the PrintWriter to write to * @param ents the entries to write * @throws IOException if file writing fails */ public static void writeGFF(PrintWriter pw, GFFEntrySet ents) throws IOException { GFFWriter writer = new GFFWriter(pw); ents.streamRecords(writer); } /** * Annotates a sequence with the features from a GFF entry set with sequence * name matching this sequence. * * @param seq the Sequence to annotate. * @param ents the the GFF features to annotate it with. * @return a reference to a newly annotated sequence. */ public static Sequence annotateSequence(Sequence seq, GFFEntrySet ents){ Sequence annotated; try { annotated = ents.getAnnotator().annotate(seq); } catch (ChangeVetoException ex) { throw new BioError("Assertion Error: Unable to annotate sequence",ex); }catch (BioException ex) { throw new BioError("Assertion Error: Unable to annotate sequence",ex); } return annotated; } /** * Annotates a sequence with the features from a GFF entry set. * * @param seq the Sequence to annotate. * @param ents the the GFF features to annotate it with. * @param checkSeqName boolean flat, if true only annotate sequence with * features that have matching sequence names, otherwise annotate * all features * @return a reference to a newly annotated sequence. */ public static Sequence annotateSequence( Sequence seq, GFFEntrySet ents, boolean checkSeqName ) { Sequence annotated; try { annotated = ents.getAnnotator(checkSeqName).annotate(seq); } catch (ChangeVetoException ex) { throw new BioError("Assertion Error: Unable to annotate sequence",ex); }catch (BioException ex) { throw new BioError("Assertion Error: Unable to annotate sequence",ex); } return annotated; } /** * Annotates all sequences in a sequence DB with features from a GFF entry set. * * @param seqs the SequenceDB to annotate * @param ents the GFFEntrySet to annote with * @return a SequenceDB with all the annotations on */ public static SequenceDB annotateSequences(SequenceDB seqs, GFFEntrySet ents) throws IllegalIDException, BioException{ Set names = new HashSet(); //get the list of names for each sequence for (Iterator i = ents.lineIterator(); i.hasNext(); ) { Object o = i.next(); if(o instanceof GFFRecord){//only process GFFRecords not comments GFFRecord record = (GFFRecord)o; if(! names.contains(record.getSeqName())){ names.add(record.getSeqName()); } } } //filter entry set into subsets with same names, use that subset to annotate //the correct sequence. for (Iterator i = names.iterator(); i.hasNext(); ) { final String name = (String)i.next(); GFFRecordFilter filt = new GFFRecordFilter(){ public boolean accept(GFFRecord rec){ return rec.getSeqName().equals(name); } }; GFFEntrySet filtered = ents.filter(filt); Sequence seq = seqs.getSequence(name); seq = GFFTools.annotateSequence(seq, filtered); } return seqs; } /** * Creates a GFFEntrySet containing one entry for each feature on a sequence. * * @param seq the Sequence to create features for * @return a new GFFEntrySet with gff records for each featre on the sequence * @throws BioException if something went wrong GFF-ifying the sequences * features */ public static GFFEntrySet gffFromSequence(Sequence seq) throws BioException { SequencesAsGFF sagff = new SequencesAsGFF(); GFFEntrySet gffES = new GFFEntrySet(); sagff.processSequence(seq, gffES.getAddHandler()); return gffES; } /** * Creates a GFFEntrySet containing one entry for each feature on each * sequence of a SequenceDB. * *

Note: This converts all features in the whole database to * in-memorey GFFRecord instances. This will take up considerable memory for * large databases.

* * @param seqDB the SequenceDB to create features for * @return a new GFFEntrySet with gff records for each feature on the database * @throws BioException if something went wrong GFF-ifying the sequences * features */ public static GFFEntrySet gffFromSeqDB(SequenceDB seqDB) throws BioException { GFFEntrySet gffES = new GFFEntrySet(); for(SequenceIterator si = seqDB.sequenceIterator(); si.hasNext(); ) { Sequence seq = si.nextSequence(); SequencesAsGFF sagff = new SequencesAsGFF(); sagff.processSequence(seq, gffES.getAddHandler()); } return gffES; } }