/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.seq.io;
import org.biojava.utils.lsid.LifeScienceIdentifier;
/**
* SeqIOConstants
contains constants used to identify
* sequence formats, alphabets etc, in the context of reading and
* writing sequences.
*
*
An int
used to specify symbol alphabet and
* sequence format type is derived thus:
int
are used
* to specify combinations of format type and symbol information. To
* derive an int
identifier for DNA with ambiguity codes
* in Fasta format, bitwise OR the AMBIGUOUS, DNA and FASTA values.
* AMBIGUOUS
indicates that a sequence contains
* ambiguity symbols. The first bit of the most significant word
* of the int is set.
*/
public static final int AMBIGUOUS = 1 << 16;
/**
* DNA
indicates that a sequence contains DNA
* (deoxyribonucleic acid) symbols. The second bit of the most
* significant word of the int is set.
*/
public static final int DNA = 1 << 17;
/**
* RNA
indicates that a sequence contains RNA
* (ribonucleic acid) symbols. The third bit of the most
* significant word of the int is set.
*/
public static final int RNA = 1 << 18;
/**
* AA
indicates that a sequence contains AA (amino
* acid) symbols. The fourth bit of the most significant word of
* the int is set.
*/
public static final int AA = 1 << 19;
/**
* INTEGER
indicates that a sequence contains integer
* alphabet symbols, such as used to describe sequence quality
* data. The fifth bit of the most significant word of the int is
* set.
*/
public static final int INTEGER = 1 << 20;
/**
* UNKNOWN
indicates that the sequence format is
* unknown.
*/
public static final int UNKNOWN = 0;
/**
* RAW
indicates that the sequence format is raw
* (symbols only).
*/
public static final int RAW = 1;
/**
* FASTA
indicates that the sequence format is Fasta.
*/
public static final int FASTA = 2;
/**
* NBRF
indicates that the sequence format is NBRF.
*/
public static final int NBRF = 3;
/**
* IG
indicates that the sequence format is IG.
*/
public static final int IG = 4;
/**
* EMBL
indicates that the sequence format is EMBL.
*/
public static final int EMBL = 10;
/**
* SWISSPROT
indicates that the sequence format is
* SWISSPROT. Always protein, so already had the AA bit set.
*/
public static final int SWISSPROT = 11 | AA;
/**
* GENBANK
indicates that the sequence format is
* GENBANK.
*/
public static final int GENBANK = 12;
/**
* GENPEPT
indicates that the sequence format is
* GENPEPT. Always protein, so already had the AA bit set.
*/
public static final int GENPEPT = 13 | AA;
/**
* REFSEQ
indicates that the sequence format is
* REFSEQ.
*/
public static final int REFSEQ = 14;
/**
* GCG
indicates that the sequence format is GCG.
*/
public static final int GCG = 15;
/**
* GFF
indicates that the sequence format is GFF.
*/
public static final int GFF = 20;
/**
* PDB
indicates that the sequence format is
* PDB. Always protein, so already had the AA bit set.
*/
public static final int PDB = 21 | AA;
/**
* PHRED
indicates that the sequence format is
* PHRED. Always DNA, so already had the DNA bit set. Also has
* INTEGER bit set for quality data.
*/
public static final int PHRED = 30 | DNA | INTEGER;
/**
* EMBL_DNA
premade EMBL | DNA.
*/
public static final int EMBL_DNA = EMBL | DNA;
/**
* EMBL_RNA
premade EMBL | RNA.
*/
public static final int EMBL_RNA = EMBL | RNA;
/**
* EMBL_AA
premade EMBL | AA.
*/
public static final int EMBL_AA = EMBL | AA;
/**
* GENBANK_DNA
premade GENBANK | DNA.
*/
public static final int GENBANK_DNA = GENBANK | DNA;
/**
* GENBANK_DNA
premade GENBANK | RNA.
*/
public static final int GENBANK_RNA = GENBANK | RNA;
/**
* GENBANK_DNA
premade GENBANK | AA.
*/
public static final int GENBANK_AA = GENBANK | AA;
/**
* REFSEQ_DNA
premade REFSEQ | DNA.
*/
public static final int REFSEQ_DNA = REFSEQ | DNA;
/**
* REFSEQ_RNA
premade REFSEQ | RNA.
*/
public static final int REFSEQ_RNA = REFSEQ | RNA;
/**
* REFSEQ_AA
premade REFSEQ | AA.
*/
public static final int REFSEQ_AA = REFSEQ | AA;
/**
* FASTA_DNA
premade FASTA | DNA.
*/
public static final int FASTA_DNA = FASTA | DNA;
/**
* FASTA_RNA
premade FASTA | RNA.
*/
public static final int FASTA_RNA = FASTA | RNA;
/**
* FASTA_AA
premade FASTA | AA.
*/
public static final int FASTA_AA = FASTA | AA;
/**
* LSID_FASTA_DNA
sequence format LSID for Fasta DNA.
*/
public static final LifeScienceIdentifier LSID_FASTA_DNA =
LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "dna");
/**
* LSID_FASTA_RNA
sequence format LSID for Fasta RNA.
*/
public static final LifeScienceIdentifier LSID_FASTA_RNA =
LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "rna");
/**
* LSID_FASTA_AA
sequence format LSID for Fasta AA.
*/
public static final LifeScienceIdentifier LSID_FASTA_AA =
LifeScienceIdentifier.valueOf("open-bio.org", "fasta", "protein");
/**
* LSID_EMBL_DNA
sequence format LSID for EMBL DNA.
*/
public static final LifeScienceIdentifier LSID_EMBL_DNA =
LifeScienceIdentifier.valueOf("open-bio.org", "embl", "dna");
/**
* LSID_EMBL_RNA
sequence format LSID for EMBL RNA.
*/
public static final LifeScienceIdentifier LSID_EMBL_RNA =
LifeScienceIdentifier.valueOf("open-bio.org", "embl", "rna");
/**
* LSID_EMBL_AA
sequence format LSID for EMBL AA.
*/
public static final LifeScienceIdentifier LSID_EMBL_AA =
LifeScienceIdentifier.valueOf("open-bio.org", "embl", "protein");
/**
* LSID_GENBANK_DNA
sequence format LSID for Genbank
* DNA.
*/
public static final LifeScienceIdentifier LSID_GENBANK_DNA =
LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "dna");
/**
* LSID_GENBANK_RNA
sequence format LSID for Genbank
* RNA.
*/
public static final LifeScienceIdentifier LSID_GENBANK_RNA =
LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "rna");
/**
* LSID_GENBANK_AA
sequence format LSID for Genbank
* AA.
*/
public static final LifeScienceIdentifier LSID_GENBANK_AA =
LifeScienceIdentifier.valueOf("open-bio.org", "genbank", "protein");
/**
* LSID_SWISSPROT
sequence format LSID for Swissprot.
*/
public static final LifeScienceIdentifier LSID_SWISSPROT =
LifeScienceIdentifier.valueOf("open-bio.org", "swiss", "protein");
}