/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.seq.io; import org.biojava.utils.lsid.LifeScienceIdentifier; /** * SeqIOConstants contains constants used to identify * sequence formats, alphabets etc, in the context of reading and * writing sequences. * *

An int used to specify symbol alphabet and * sequence format type is derived thus:

* * * * @author Keith James */ public final class SeqIOConstants { /** * AMBIGUOUS indicates that a sequence contains * ambiguity symbols. The first bit of the most significant word * of the int is set. */ public static final int AMBIGUOUS = 1 << 16; /** * DNA indicates that a sequence contains DNA * (deoxyribonucleic acid) symbols. The second bit of the most * significant word of the int is set. */ public static final int DNA = 1 << 17; /** * RNA indicates that a sequence contains RNA * (ribonucleic acid) symbols. The third bit of the most * significant word of the int is set. */ public static final int RNA = 1 << 18; /** * AA indicates that a sequence contains AA (amino * acid) symbols. The fourth bit of the most significant word of * the int is set. */ public static final int AA = 1 << 19; /** * INTEGER indicates that a sequence contains integer * alphabet symbols, such as used to describe sequence quality * data. The fifth bit of the most significant word of the int is * set. */ public static final int INTEGER = 1 << 20; /** * UNKNOWN indicates that the sequence format is * unknown. */ public static final int UNKNOWN = 0; /** * RAW indicates that the sequence format is raw * (symbols only). */ public static final int RAW = 1; /** * FASTA indicates that the sequence format is Fasta. */ public static final int FASTA = 2; /** * NBRF indicates that the sequence format is NBRF. */ public static final int NBRF = 3; /** * IG indicates that the sequence format is IG. */ public static final int IG = 4; /** * EMBL indicates that the sequence format is EMBL. */ public static final int EMBL = 10; /** * SWISSPROT indicates that the sequence format is * SWISSPROT. Always protein, so already had the AA bit set. */ public static final int SWISSPROT = 11 | AA; /** * GENBANK indicates that the sequence format is * GENBANK. */ public static final int GENBANK = 12; /** * GENPEPT indicates that the sequence format is * GENPEPT. Always protein, so already had the AA bit set. */ public static final int GENPEPT = 13 | AA; /** * REFSEQ indicates that the sequence format is * REFSEQ. */ public static final int REFSEQ = 14; /** * GCG indicates that the sequence format is GCG. */ public static final int GCG = 15; /** * GFF indicates that the sequence format is GFF. */ public static final int GFF = 20; /** * PDB indicates that the sequence format is * PDB. Always protein, so already had the AA bit set. */ public static final int PDB = 21 | AA; /** * PHRED indicates that the sequence format is * PHRED. Always DNA, so already had the DNA bit set. Also has * INTEGER bit set for quality data. */ public static final int PHRED = 30 | DNA | INTEGER; /** * EMBL_DNA premade EMBL | DNA. */ public static final int EMBL_DNA = EMBL | DNA; /** * EMBL_RNA premade EMBL | RNA. */ public static final int EMBL_RNA = EMBL | RNA; /** * EMBL_AA premade EMBL | AA. */ public static final int EMBL_AA = EMBL | AA; /** * GENBANK_DNA premade GENBANK | DNA. */ public static final int GENBANK_DNA = GENBANK | DNA; /** * GENBANK_DNA premade GENBANK | RNA. */ public static final int GENBANK_RNA = GENBANK | RNA; /** * GENBANK_DNA premade GENBANK | AA. */ public static final int GENBANK_AA = GENBANK | AA; /** * REFSEQ_DNA premade REFSEQ | DNA. */ public static final int REFSEQ_DNA = REFSEQ | DNA; /** * REFSEQ_RNA premade REFSEQ | RNA. */ public static final int REFSEQ_RNA = REFSEQ | RNA; /** * REFSEQ_AA premade REFSEQ | AA. */ public static final int REFSEQ_AA = REFSEQ | AA; /** * FASTA_DNA premade FASTA | DNA. */ public static final int FASTA_DNA = FASTA | DNA; /** * FASTA_RNA premade FASTA | RNA. */ public static final int FASTA_RNA = FASTA | RNA; /** * FASTA_AA premade FASTA | AA. */ public static final int FASTA_AA = FASTA | AA; /** * LSID_FASTA_DNA sequence format LSID for Fasta DNA. */ public static final LifeScienceIdentifier LSID_FASTA_DNA = LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "dna"); /** * LSID_FASTA_RNA sequence format LSID for Fasta RNA. */ public static final LifeScienceIdentifier LSID_FASTA_RNA = LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "rna"); /** * LSID_FASTA_AA sequence format LSID for Fasta AA. */ public static final LifeScienceIdentifier LSID_FASTA_AA = LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "protein"); /** * LSID_EMBL_DNA sequence format LSID for EMBL DNA. */ public static final LifeScienceIdentifier LSID_EMBL_DNA = LifeScienceIdentifier.valueOf("open-bio.org", "embl", "dna"); /** * LSID_EMBL_RNA sequence format LSID for EMBL RNA. */ public static final LifeScienceIdentifier LSID_EMBL_RNA = LifeScienceIdentifier.valueOf("open-bio.org", "embl", "rna"); /** * LSID_EMBL_AA sequence format LSID for EMBL AA. */ public static final LifeScienceIdentifier LSID_EMBL_AA = LifeScienceIdentifier.valueOf("open-bio.org", "embl", "protein"); /** * LSID_GENBANK_DNA sequence format LSID for Genbank * DNA. */ public static final LifeScienceIdentifier LSID_GENBANK_DNA = LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "dna"); /** * LSID_GENBANK_RNA sequence format LSID for Genbank * RNA. */ public static final LifeScienceIdentifier LSID_GENBANK_RNA = LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "rna"); /** * LSID_GENBANK_AA sequence format LSID for Genbank * AA. */ public static final LifeScienceIdentifier LSID_GENBANK_AA = LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "protein"); /** * LSID_SWISSPROT sequence format LSID for Swissprot. */ public static final LifeScienceIdentifier LSID_SWISSPROT = LifeScienceIdentifier.valueOf("open-bio.org", "swiss", "protein"); }