/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojavax.bio.seq; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; import org.biojava.bio.BioError; import org.biojava.bio.BioException; import org.biojava.bio.seq.DNATools; import org.biojava.bio.seq.Feature; import org.biojava.bio.seq.NucleotideTools; import org.biojava.bio.seq.ProteinTools; import org.biojava.bio.seq.RNATools; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.Alphabet; import org.biojava.bio.symbol.AlphabetManager; import org.biojava.bio.symbol.SimpleSymbolList; import org.biojava.bio.symbol.SymbolList; import org.biojava.utils.ChangeType; import org.biojava.utils.ChangeVetoException; import org.biojavax.Namespace; import org.biojavax.Note; import org.biojavax.RichObjectFactory; import org.biojavax.SimpleNamespace; import org.biojavax.SimpleNote; import org.biojavax.bio.BioEntry; import org.biojavax.bio.seq.io.EMBLFormat; import org.biojavax.bio.seq.io.EMBLxmlFormat; import org.biojavax.bio.seq.io.FastaFormat; import org.biojavax.bio.seq.io.FastaHeader; import org.biojavax.bio.seq.io.GenbankFormat; import org.biojavax.bio.seq.io.HashedFastaIterator; import org.biojavax.bio.seq.io.INSDseqFormat; import org.biojavax.bio.seq.io.RichSequenceBuilderFactory; import org.biojavax.bio.seq.io.RichSequenceFormat; import org.biojavax.bio.seq.io.RichStreamReader; import org.biojavax.bio.seq.io.RichStreamWriter; import org.biojavax.bio.seq.io.UniProtFormat; import org.biojavax.bio.seq.io.UniProtXMLFormat; import org.biojavax.ontology.ComparableTerm; /** * A rich sequence is a combination of a org.biojavax.bio.Bioentry and a * Sequence. It inherits and merges the methods of both. The RichSequence is * based on the BioSQL model and provides a richer array of methods to access * information than Sequence does. Whenever possible RichSequence should be used * in preference to Sequence. * * @author Mark Schreiber * @author Richard Holland * @author George Waldon * @since 1.5 */ public interface RichSequence extends BioEntry, Sequence { public static final ChangeType SYMLISTVERSION = new ChangeType( "This sequences's symbollist version has changed", "org.biojavax.bio.seq.RichSequence", "SYMLISTVERSION"); public static final ChangeType CIRCULAR = new ChangeType( "This sequences's circularity has changed", "org.biojavax.bio.seq.RichSequence", "CIRCULAR"); /** * The version of the associated symbol list. Note the use of an object for * the value means that it can be nulled. * * @return the version */ public Double getSeqVersion(); /** * Sets the version of the associated symbol list. Note the use of an object * for the value means that it can be nulled. * * @param seqVersion * the version to set. * @throws ChangeVetoException * if it doesn't want to change. */ public void setSeqVersion(Double seqVersion) throws ChangeVetoException; /** * The features for this sequence. * * @return a set of RichFeature objects. */ public Set getFeatureSet(); /** * Sets the features of this sequence. Note that it is not checked to see if * the features actually belong to this sequence, you'd best check that * yourself and make changes using feature.setParent() if necessary. * * @param features * the features to assign to this sequence, replacing all others. * Must be a set of RichFeature objects. * @throws ChangeVetoException * if they could not be assigned. */ public void setFeatureSet(Set features) throws ChangeVetoException; /** * Circularises the Sequence. The circular length can then be * said to be the length of the sequence itself. * * @param circular * set to true if you want it to be circular * @throws ChangeVetoException * if the change is blocked. Some implementations may choose not * to support circularisation and should throw an exception * here. Some implementations may only support this method for * certain Alphabets. */ public void setCircular(boolean circular) throws ChangeVetoException; /** * Is the sequence circular? Circularity has implications for work with * locations and any coordinate work eg symbolAt(int i). Classes that allow * it should test this method when working with coordinates or locations / * features. * * @return true if the this is circular else false. */ public boolean getCircular(); /** * A special function that returns the SymbolList that this RichSequence is * based around. This should _not_ be the RichSequence object itself, as * this function is used to perform actions on the symbol list without * referring to the RichSequence object directly. * * @return the internal SymbolList of the RichSequence, NOT the RichSequence * object itself. */ public SymbolList getInternalSymbolList(); /** * Stores a number of useful terms used across many sequence formats for * consistency's sake. */ public static class Terms { public static String SPECIES_KEY = "SPECIES"; public static String STRAIN_KEY = "STRAIN"; public static String TISSUE_KEY = "TISSUE"; public static String TRANSPOSON_KEY = "TRANSPOSON"; public static String PLASMID_KEY = "PLASMID"; /** * Holds a reference to the key that must be used to store PubMed * references. */ public static final String PUBMED_KEY = "PUBMED"; /** * Holds a reference to the key that must be used to store Medline * references. */ public static final String MEDLINE_KEY = "MEDLINE"; /** * Holds a reference to the key that must be used to store DOI * references. */ public static final String DOI_KEY = "DOI"; /** * Getter for the secondary/tertiary/additional accession term * * @return A Term that represents the secondary accession tag */ public static ComparableTerm getAdditionalAccessionTerm() { return RichObjectFactory.getDefaultOntology() .getOrCreateTerm("acc"); } /** * Getter for the keyword term * * @return a Term that represents the Keyword tag */ public static ComparableTerm getKeywordTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm("kw"); } /** * Getter for the date created term * * @return a Term */ public static ComparableTerm getDateCreatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "cdat"); } /** * Getter for the date updated term * * @return a Term */ public static ComparableTerm getDateUpdatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "udat"); } /** * Getter for the date annotated term * * @return a Term */ public static ComparableTerm getDateAnnotatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "adat"); } /** * Getter for the release created term * * @return a Term */ public static ComparableTerm getRelCreatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "crel"); } /** * Getter for the release updated term * * @return a Term */ public static ComparableTerm getRelUpdatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "urel"); } /** * Getter for the release annotated term * * @return a Term */ public static ComparableTerm getRelAnnotatedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "arel"); } /** * getter for the MolType term * * @return a Term that represents the molecule type */ public static ComparableTerm getMolTypeTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "moltype"); } /** * Getter for the Strand term; legal values are "single", "double", and * "mixed". * * @return a Term that represents the Strand tag */ public static ComparableTerm getStrandedTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "stranded"); } /** * Getter for the Organelle term * * @return a Term that represents the Organelle tag */ public static ComparableTerm getOrganelleTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "organelle"); } /** * Getter for the GeneName term * * @return The GeneName Term */ public static ComparableTerm getGeneNameTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "gene_name"); } /** * Getter for the GeneSynonym term * * @return The GeneSynonym Term */ public static ComparableTerm getGeneSynonymTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "gene_synonym"); } /** * Getter for the OrderedLocusName term * * @return The OrderedLocusName Term */ public static ComparableTerm getOrderedLocusNameTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "gene_ordloc"); } /** * Getter for the ORFName term * * @return The ORFName Term */ public static ComparableTerm getORFNameTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "gene_orf"); } /** * Getter for the Strain term * * @return The Strain Term */ public static ComparableTerm getStrainTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "strain"); } /** * Getter for the Species term * * @return The Species Term */ public static ComparableTerm getSpeciesTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "species"); } /** * Getter for the Tissue term * * @return The Tissue Term */ public static ComparableTerm getTissueTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "tissue"); } /** * Getter for the Transposon term * * @return The Transposon Term */ public static ComparableTerm getTransposonTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "transposon"); } /** * Getter for the Plasmid term * * @return The plasmid Term */ public static ComparableTerm getPlasmidTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "plasmid"); } /** * Getter for the DataClass term * * @return The DataClass Term */ public static ComparableTerm getDataClassTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "dataclass"); } /** * Getter for the FTId term * * @return The FTId Term */ public static ComparableTerm getFTIdTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "feature_id"); } /** * Getter for the FeatureDesc term * * @return The FeatureDesc Term */ public static ComparableTerm getFeatureDescTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "feature_desc"); } /** * Getter for the copyright term * * @return The copyright Term */ public static ComparableTerm getCopyrightTerm() { return RichObjectFactory.getDefaultOntology().getOrCreateTerm( "copyright"); } } /** * Some useful tools for working with RichSequence objects. * * @since 1.5 */ public static class Tools { // because we are static we don't want any instances private Tools() { } /** * Create a new RichSequence in the default namespace. * * @param name * The name for the sequence. Will also be used for the * accession. * @param seqString * The sequence string * @param alpha * The Alphabet for the sequence * @throws org.biojava.bio.BioException * If the symbols in seqString are not valid in * alpha * @return A new RichSequence. All versions are 1 or 1.0 */ public static RichSequence createRichSequence(String name, String seqString, Alphabet alpha) throws BioException { SymbolList syms = new SimpleSymbolList(alpha .getTokenization("token"), seqString); return createRichSequence(name, syms); } /** * Create a new RichSequence in the specified namespace. * * @return A new RichSequence. All versions are 1 or 1.0 * @param namespace * the namespace to create the sequence in. A singleton * Namespace will be created or retrieved as * appropriate. * @param name * The name for the sequence. Will also be used for the * accession. * @param seqString * The sequence string * @param alpha * The Alphabet for the sequence * @throws org.biojava.bio.BioException * If the symbols in seqString are not valid in * alpha */ public static RichSequence createRichSequence(String namespace, String name, String seqString, Alphabet alpha) throws BioException { SymbolList syms = new SimpleSymbolList(alpha .getTokenization("token"), seqString); Namespace ns = (Namespace) RichObjectFactory.getObject( SimpleNamespace.class, new Object[] { namespace }); return createRichSequence(ns, name, syms); } /** * Create a new RichSequence in the specified namespace. * * @return A new RichSequence. All versions are 1 or 1.0 * @param ns * The namespace to create the sequence in. * @param name * The name for the sequence. Will also be used for the * accession. * @param seqString * The sequence string * @param alpha * The Alphabet for the sequence * @throws org.biojava.bio.BioException * If the symbols in seqString are not valid in * alpha */ public static RichSequence createRichSequence(Namespace ns, String name, String seqString, Alphabet alpha) throws BioException { SymbolList syms = new SimpleSymbolList(alpha .getTokenization("token"), seqString); return createRichSequence(ns, name, syms); } /** * Create a new RichSequence in the default namespace. * * @return A new RichSequence. All versions are 1 or 1.0 * @param syms * The symbols to add to the sequence. * @param name * The name for the sequence. Will also be used for the * accession. */ public static RichSequence createRichSequence(String name, SymbolList syms) { Namespace ns = RichObjectFactory.getDefaultNamespace(); return createRichSequence(ns, name, syms); } /** * Create a new RichSequence in the specified namespace. * * @return A new RichSequence. All versions are 1 or 1.0 * @param ns * the namespace to create the sequence in. * @param syms * The symbols to add to the sequence. * @param name * The name for the sequence. Will also be used for the * accession. */ public static RichSequence createRichSequence(Namespace ns, String name, SymbolList syms) { return new SimpleRichSequence(ns, name, name, 1, syms, new Double( 1.0)); } /** * Boldly attempts to convert a Sequence into a * RichSequence. Sequences will be assigned to * the default namespace. The accession will be assumed to be the name * of the old sequence. The version of the sequence will be set to 0 and * the seqversion set to 0.0. Features are converted to * RichFeatures. The old Annotation bundle is * converted to a RichAnnotation * * @param s * The Sequence to enrich * @throws ChangeVetoException * if s is locked or the conversion fails. * @return a new RichSequence */ public static RichSequence enrich(Sequence s) throws ChangeVetoException { if (s instanceof RichSequence) return (RichSequence) s; String name = s.getName(); RichSequence rs = new SimpleRichSequence(RichObjectFactory .getDefaultNamespace(), name == null ? "UnknownName" : name, name == null ? "UnknownAccession" : name, 0, s, new Double( 0.0)); // Transfer features for (Iterator i = s.features(); i.hasNext();) { Feature f = (Feature) i.next(); try { rs.createFeature(f.makeTemplate()); } catch (BioException e) { throw new ChangeVetoException("They hates us!", e); } } // Transfer annotations for (Iterator i = s.getAnnotation().keys().iterator(); i.hasNext();) { Object key = i.next(); Object value = s.getAnnotation().getProperty(key); rs.getAnnotation().setProperty(key, value); } return rs; } /** *

* Creates a new sequence from a subregion of another sequence. The * sequence is not a view. The sequence can be given a new Namespace, * Accession, Name, Identifier etc. or you can copy over the old values. * For unique identification in databases we recommend you change at * least the name and identifier. *

*

* The new sequence will retain all features that are fully contained by * the new subsequence, the note set (annotation), Taxon, and * description, modified to reflect the subsequence as follows: * *

		 * seq.setDescription("subsequence (" + from + ":" + to + ") of "
		 * 		+ s.getDescription());
		 * 
* * No other properties are copied. * * @param newVersion * the new version number * @param seqVersion * the new sequence version * @param s * the original RichSequence. * @param from * the 1st subsequence coordinate (inclusive) * @param to * the last subsequence coordinate (inclusive) * @param newNamespace * the new Namespace * @param newName * the new name * @param newAccession * the new accession number * @param newIdentifier * the new identifier * @throws java.lang.IndexOutOfBoundsException * if from or to lie outside of * the bounds of s. * @return A new RichSequence */ public static RichSequence subSequence(RichSequence s, int from, int to, Namespace newNamespace, String newName, String newAccession, String newIdentifier, int newVersion, Double seqVersion) throws IndexOutOfBoundsException { SymbolList symList = s.subList(from, to); SimpleRichSequence seq = new SimpleRichSequence(newNamespace, newName, newAccession, newVersion, symList, seqVersion); RichLocation subLoc = new SimpleRichLocation(new SimplePosition( from), new SimplePosition(to), 0); RichLocation subLocComplement = new SimpleRichLocation( new SimplePosition(from), new SimplePosition(to), 0, RichLocation.Strand.NEGATIVE_STRAND); try { // copy features if appropriate for (Iterator i = s.features(); i.hasNext();) { RichFeature f = (RichFeature) i.next(); if (f.getStrand().equals(StrandedFeature.POSITIVE)) { if (subLoc.contains(f.getLocation())) { RichFeature.Template templ = (RichFeature.Template) f .makeTemplate(); // change the location Position min = new SimplePosition(templ.location .getMin() - from + 1); // System.out.println("getMin " + // templ.location.getMin()); Position max = new SimplePosition(templ.location .getMax() - from + 1); // System.out.println("getMax " + // templ.location.getMax()); templ.location = new SimpleRichLocation(min, max, 0); seq.createFeature(templ); } } else { if (subLocComplement.contains(f.getLocation())) { RichFeature.Template templ = (RichFeature.Template) f .makeTemplate(); // change the location Position min = new SimplePosition(templ.location .getMin() - from + 1); // System.out.println("getMin " + // templ.location.getMin()); Position max = new SimplePosition(templ.location .getMax() - from + 1); // System.out.println("getMax " + // templ.location.getMax()); templ.location = new SimpleRichLocation(min, max, 0, RichLocation.Strand.NEGATIVE_STRAND); seq.createFeature(templ); } } } // clone Notes if (s.getNoteSet() != null) { Set notes = s.getNoteSet(); Iterator it = notes.iterator(); Set ns = new TreeSet(); while (it.hasNext()) { Note note = (Note) it.next(); ns.add(new SimpleNote( note.getTerm(), note.getValue(), note.getRank())); } seq.setNoteSet(ns); } // copy other cruft if (s.getTaxon() != null) seq.setTaxon(s.getTaxon()); if (s.getDescription() != null) { seq.setDescription("subsequence (" + from + ":" + to + ") of " + s.getDescription()); } if (s.getDivision() != null) { seq.setDivision(s.getDivision()); } } catch (ChangeVetoException ex) { throw new BioError(ex); // something is rotten in Denmark! } catch (BioException ex) { throw new BioError(ex); // something is rotten in Denmark! } return seq; } } /** * A set of convenience methods for handling common file formats. * * @author Mark Schreiber * @author Richard Holland * @since 1.5 */ public final class IOTools { private static RichSequenceBuilderFactory factory = RichSequenceBuilderFactory.FACTORY; // This can't be instantiated. private IOTools() { } /** * Register a new format with IOTools for auto-guessing. * * @param formatClass * the RichSequenceFormat object to register. */ public static void registerFormat(Class formatClass) { Object o; try { o = formatClass.newInstance(); } catch (Exception e) { throw new BioError(e); } if (!(o instanceof RichSequenceFormat)) throw new BioError("Class " + formatClass + " is not an implementation of RichSequenceFormat!"); formatClasses.add(formatClass); } // Private reference to the formats we know about. private static List formatClasses = new ArrayList(); /** * Guess which format a stream is then attempt to read it. * * @param stream * the BufferedInputStream to attempt to read. * @param seqFactory * a factory used to build a RichSequence * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * file * @throws IOException * in case the stream is unrecognisable or problems occur in * reading it. */ public static RichSequenceIterator readStream( BufferedInputStream stream, RichSequenceBuilderFactory seqFactory, Namespace ns) throws IOException { for (Iterator i = formatClasses.iterator(); i.hasNext();) { Class formatClass = i.next(); RichSequenceFormat format; try { format = (RichSequenceFormat) formatClass.newInstance(); } catch (Exception e) { throw new BioError(e); } if (format.canRead(stream)) { SymbolTokenization sTok = format .guessSymbolTokenization(stream); BufferedReader br = new BufferedReader( new InputStreamReader(stream)); return new RichStreamReader(br, format, sTok, seqFactory, ns); } } throw new IOException("Could not recognise format of stream."); } /** * Guess which format a stream is then attempt to read it. * * @return a RichSequenceIterator over each sequence in the * file * @param stream * the BufferedInputStream to attempt to read. * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @throws java.io.IOException * If the file cannot be read. */ public static RichSequenceIterator readStream( BufferedInputStream stream, Namespace ns) throws IOException { return readStream(stream, factory, ns); } /** * Guess which format a file is then attempt to read it. * * @param file * the File to attempt to read. * @param seqFactory * a factory used to build a RichSequence * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * file * @throws IOException * in case the file is unrecognisable or problems occur in * reading it. */ public static RichSequenceIterator readFile(File file, RichSequenceBuilderFactory seqFactory, Namespace ns) throws IOException { for (Iterator i = formatClasses.iterator(); i.hasNext();) { Class formatClass = i.next(); RichSequenceFormat format; try { format = (RichSequenceFormat) formatClass.newInstance(); } catch (Exception e) { throw new BioError(e); } if (format.canRead(file)) { SymbolTokenization sTok = format .guessSymbolTokenization(file); BufferedReader br = new BufferedReader(new FileReader(file)); return new RichStreamReader(br, format, sTok, seqFactory, ns); } } throw new IOException("Could not recognise format of file: " + file.getName()); } /** * Guess which format a file is then attempt to read it. * * @return a RichSequenceIterator over each sequence in the * file * @param file * the File to attempt to read. * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @throws java.io.IOException * If the file cannot be read. */ public static RichSequenceIterator readFile(File file, Namespace ns) throws IOException { return readFile(file, factory, ns); } /** * Read a fasta file. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readFasta(BufferedReader br, SymbolTokenization sTok, Namespace ns) { return new RichStreamReader(br, new FastaFormat(), sTok, factory, ns); } /** * Read a fasta file building a custom type of RichSequence * . For example, use RichSequenceBuilderFactory.FACTORY to * emulate readFasta(BufferedReader, SymbolTokenization) * and RichSequenceBuilderFactory.PACKED to force all * symbols to be encoded using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a RichSequence * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readFasta(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new FastaFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an FASTA-format stream of DNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file * @see #readHashedFastaDNA(BufferedInputStream, Namespace) for a * speeded up version that can access sequences from memory. */ public static RichSequenceIterator readFastaDNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new FastaFormat(), getDNAParser(), factory, ns); } /** * Iterate over the sequences in an FASTA-format stream of DNA * sequences. In contrast to readFastaDNA, this provides a speeded up * implementation where all sequences are accessed from memory. * * @param is * the BufferedInputStream to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file * @throws BioException * if somethings goes wrong while reading the file. * @see #readFastaDNA */ public static RichSequenceIterator readHashedFastaDNA( BufferedInputStream is, Namespace ns) throws BioException { Alphabet alpha = AlphabetManager.alphabetForName("DNA"); return new HashedFastaIterator(is, alpha, ns); } /** * Iterate over the sequences in an FASTA-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readFastaRNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new FastaFormat(), getRNAParser(), factory, ns); } /** * Iterate over the sequences in an FASTA-format stream of Protein * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readFastaProtein(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new FastaFormat(), getProteinParser(), factory, ns); } /** * Read a GenBank file using a custom type of SymbolList. For example, * use RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readGenbank(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new GenbankFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an GenBank-format stream of DNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readGenbankDNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new GenbankFormat(), getDNAParser(), factory, ns); } /** * Iterate over the sequences in an GenBank-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readGenbankRNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new GenbankFormat(), getRNAParser(), factory, ns); } /** * Iterate over the sequences in an GenBank-format stream of Protein * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readGenbankProtein( BufferedReader br, Namespace ns) { return new RichStreamReader(br, new GenbankFormat(), getProteinParser(), factory, ns); } /** * Read a INSDseq file using a custom type of SymbolList. For example, * use RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readINSDseq(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new INSDseqFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an INSDseq-format stream of DNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readINSDseqDNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new INSDseqFormat(), getDNAParser(), factory, ns); } /** * Iterate over the sequences in an INSDseq-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readINSDseqRNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new INSDseqFormat(), getRNAParser(), factory, ns); } /** * Iterate over the sequences in an INSDseq-format stream of Protein * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readINSDseqProtein( BufferedReader br, Namespace ns) { return new RichStreamReader(br, new INSDseqFormat(), getProteinParser(), factory, ns); } /** * Read a EMBLxml file using a custom type of SymbolList. For example, * use RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLxml(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new EMBLxmlFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an EMBLxml-format stream of DNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLxmlDNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLxmlFormat(), getDNAParser(), factory, ns); } /** * Iterate over the sequences in an EMBLxml-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLxmlRNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLxmlFormat(), getRNAParser(), factory, ns); } /** * Iterate over the sequences in an EMBLxml-format stream of Protein * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLxmlProtein( BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLxmlFormat(), getProteinParser(), factory, ns); } /** * Read a EMBL file using a custom type of SymbolList. For example, use * RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBL(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new EMBLFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an EMBL-format stream of DNA sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLDNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLFormat(), getDNAParser(), factory, ns); } /** * Iterate over the sequences in an EMBL-format stream of RNA sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLRNA(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLFormat(), getRNAParser(), factory, ns); } /** * Iterate over the sequences in an EMBL-format stream of Protein * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readEMBLProtein(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new EMBLFormat(), getProteinParser(), factory, ns); } /** * Read a UniProt file using a custom type of SymbolList. For example, * use RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readUniProt(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new UniProtFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an UniProt-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readUniProt(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new UniProtFormat(), getProteinParser(), factory, ns); } /** * Read a UniProt XML file using a custom type of SymbolList. For * example, use RichSequenceBuilderFactory.FACTORY to emulate * readFasta(BufferedReader, SymbolTokenization) and * RichSequenceBuilderFactory.PACKED to force all symbols to be encoded * using bit-packing. * * @param br * the BufferedReader to read data from * @param sTok * a SymbolTokenization that understands the * sequences * @param seqFactory * a factory used to build a SymbolList * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readUniProtXML(BufferedReader br, SymbolTokenization sTok, RichSequenceBuilderFactory seqFactory, Namespace ns) { return new RichStreamReader(br, new UniProtXMLFormat(), sTok, seqFactory, ns); } /** * Iterate over the sequences in an UniProt XML-format stream of RNA * sequences. * * @param br * the BufferedReader to read data from * @param ns * a Namespace to load the sequences into. Null * implies that it should use the namespace specified in the * file. If no namespace is specified in the file, then * RichObjectFactory.getDefaultNamespace() is * used. * @return a RichSequenceIterator over each sequence in the * fasta file */ public static RichSequenceIterator readUniProtXML(BufferedReader br, Namespace ns) { return new RichStreamReader(br, new UniProtXMLFormat(), getProteinParser(), factory, ns); } /** * Writes Sequences from a SequenceIterator to * an OutputStream in Fasta Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input RichSequences * @param ns * a Namespace to write the * RichSequences to. Null implies * that it should use the namespace specified in the * individual sequence. * @param header * the FastaHeader * @throws java.io.IOException * if there is an IO problem */ public static void writeFasta(OutputStream os, SequenceIterator in, Namespace ns, FastaHeader header) throws IOException { FastaFormat fastaFormat = new FastaFormat(); if (header != null) { fastaFormat.setHeader(header); } RichStreamWriter sw = new RichStreamWriter(os, fastaFormat); sw.writeStream(in, ns); } /** * Writes Sequences from a SequenceIterator to * an OutputStream in Fasta Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input RichSequences * @param ns * a Namespace to write the * RichSequences to. Null implies * that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeFasta(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { writeFasta(os, in, ns, null); } /** * Writes a single Sequence to an OutputStream * in Fasta format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeFasta(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeFasta(os, new SingleRichSeqIterator(seq), ns, null); } /** * Writes a single Sequence to an OutputStream * in Fasta format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @param header * a FastaHeader that controls the fields in the * header. * @throws java.io.IOException * if there is an IO problem */ public static void writeFasta(OutputStream os, Sequence seq, Namespace ns, FastaHeader header) throws IOException { writeFasta(os, new SingleRichSeqIterator(seq), ns, header); } /** * Writes sequences from a SequenceIterator to an * OutputStream in GenBank Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeGenbank(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new GenbankFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in GenBank format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeGenbank(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeGenbank(os, new SingleRichSeqIterator(seq), ns); } /** * Writes sequences from a SequenceIterator to an * OutputStream in INSDseq Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeINSDseq(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new INSDseqFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in INSDseq format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeINSDseq(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeINSDseq(os, new SingleRichSeqIterator(seq), ns); } /** * Writes sequences from a SequenceIterator to an * OutputStream in EMBLxml Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeEMBLxml(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new EMBLxmlFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in EMBLxml format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeEMBLxml(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeEMBLxml(os, new SingleRichSeqIterator(seq), ns); } /** * Writes sequences from a SequenceIterator to an * OutputStream in EMBL Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeEMBL(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new EMBLFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in EMBL format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeEMBL(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeEMBL(os, new SingleRichSeqIterator(seq), ns); } /** * Writes sequences from a SequenceIterator to an * OutputStream in UniProt Format. This makes for a useful * format filter where a StreamReader can be sent to the * RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeUniProt(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new UniProtFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in UniProt format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeUniProt(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeUniProt(os, new SingleRichSeqIterator(seq), ns); } /** * Writes sequences from a SequenceIterator to an * OutputStream in UniProt XML Format. This makes for a * useful format filter where a StreamReader can be sent to * the RichStreamWriter after formatting. * * @param os * The stream to write fasta formatted data to * @param in * The source of input Sequences * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeUniProtXML(OutputStream os, SequenceIterator in, Namespace ns) throws IOException { RichStreamWriter sw = new RichStreamWriter(os, new UniProtXMLFormat()); sw.writeStream(in, ns); } /** * Writes a single Sequence to an OutputStream * in UniProt XML format. * * @param os * the OutputStream. * @param seq * the Sequence. * @param ns * a Namespace to write the sequences to. Null * implies that it should use the namespace specified in the * individual sequence. * @throws java.io.IOException * if there is an IO problem */ public static void writeUniProtXML(OutputStream os, Sequence seq, Namespace ns) throws IOException { writeUniProtXML(os, new SingleRichSeqIterator(seq), ns); } /** * Creates a DNA symbol tokenizer. * * @return a SymbolTokenization for parsing DNA. */ public static SymbolTokenization getDNAParser() { try { return DNATools.getDNA().getTokenization("token"); } catch (BioException ex) { throw new BioError("Assertion failing:" + " Couldn't get DNA token parser", ex); } } /** * Creates a RNA symbol tokenizer. * * @return a SymbolTokenization for parsing RNA. */ public static SymbolTokenization getRNAParser() { try { return RNATools.getRNA().getTokenization("token"); } catch (BioException ex) { throw new BioError("Assertion failing:" + " Couldn't get RNA token parser", ex); } } /** * Creates a nucleotide symbol tokenizer. * * @return a SymbolTokenization for parsing nucleotides. */ public static SymbolTokenization getNucleotideParser() { try { return NucleotideTools.getNucleotide().getTokenization("token"); } catch (BioException ex) { throw new BioError("Assertion failing:" + " Couldn't get nucleotide token parser", ex); } } /** * Creates a protein symbol tokenizer. * * @return a SymbolTokenization for parsing protein. */ public static SymbolTokenization getProteinParser() { try { return ProteinTools.getTAlphabet().getTokenization("token"); } catch (BioException ex) { throw new BioError("Assertion failing:" + " Couldn't get PROTEIN token parser", ex); } } /** * Used to iterate over a single rich sequence */ public static final class SingleRichSeqIterator implements RichSequenceIterator { private RichSequence seq; /** * Creates an iterator over a single sequence. * * @param seq * the sequence to iterate over. */ public SingleRichSeqIterator(Sequence seq) { try { if (seq instanceof RichSequence) this.seq = (RichSequence) seq; else this.seq = RichSequence.Tools.enrich(seq); } catch (ChangeVetoException e) { throw new RuntimeException("Unable to enrich sequence", e); } } /** * {@inheritDoc} * * @return true if another RichSequence is available */ public boolean hasNext() { return seq != null; } /** * {@inheritDoc} * * @return a RichSequence */ public Sequence nextSequence() { return this.nextRichSequence(); } /** * {@inheritDoc} * * @return a RichSequence */ public BioEntry nextBioEntry() { return this.nextRichSequence(); } /** * {@inheritDoc} * * @return a RichSequence */ public RichSequence nextRichSequence() { RichSequence seq = this.seq; this.seq = null; return seq; } } } }