# Copyright 2000-2001 by Andrew Dalke. # Revisions copyright 2008 by Peter Cock. # All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Standard nucleotide and protein alphabets defined by IUPAC.""" from Bio import Alphabet from Bio.Data import IUPACData ##################### Protein # From the IUPAC definition at: # http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() class ExtendedIUPACProtein(Alphabet.ProteinAlphabet): """Extended uppercase IUPAC protein single letter alphabet including X etc. In addition to the standard 20 single letter protein codes, this includes: B = "Asx"; Aspartic acid (R) or Asparagine (N) X = "Xxx"; Unknown or 'other' amino acid Z = "Glx"; Glutamic acid (E) or Glutamine (Q) J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) U = "Sec"; Selenocysteine O = "Pyl"; Pyrrolysine This alphabet is not intended to be used with X for Selenocysteine (an ad-hoc standard prior to the IUPAC adoption of U instead). """ letters = IUPACData.extended_protein_letters extended_protein = ExtendedIUPACProtein() assert IUPACData.protein_letters == IUPACData.protein_letters.upper() class IUPACProtein(ExtendedIUPACProtein): """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" letters = IUPACData.protein_letters protein = IUPACProtein() ##################### DNA # The next two are the IUPAC definitions, from: # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html class IUPACAmbiguousDNA(Alphabet.DNAAlphabet): """Uppercase IUPAC ambiguous DNA.""" letters = IUPACData.ambiguous_dna_letters ambiguous_dna = IUPACAmbiguousDNA() class IUPACUnambiguousDNA(IUPACAmbiguousDNA): """Uppercase IUPAC unambiguous DNA (letters GATC only).""" letters = IUPACData.unambiguous_dna_letters unambiguous_dna = IUPACUnambiguousDNA() # Also from the URL, but not part of the standard class ExtendedIUPACDNA(Alphabet.DNAAlphabet): """Extended IUPAC DNA alphabet. In addition to the standard letter codes GATC, this includes: B = 5-bromouridine D = 5,6-dihydrouridine S = thiouridine W = wyosine """ letters = IUPACData.extended_dna_letters extended_dna = ExtendedIUPACDNA() ##################### RNA class IUPACAmbiguousRNA(Alphabet.RNAAlphabet): """Uppercase IUPAC ambiguous RNA.""" letters = IUPACData.ambiguous_rna_letters ambiguous_rna = IUPACAmbiguousRNA() class IUPACUnambiguousRNA(IUPACAmbiguousRNA): """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" letters = IUPACData.unambiguous_rna_letters unambiguous_rna = IUPACUnambiguousRNA() # are there extended forms? #class ExtendedIUPACRNA(Alphabet.RNAAlphabet): # letters = extended_rna_letters # # B == 5-bromouridine # # D == 5,6-dihydrouridine # # S == thiouridine # # W == wyosine # We need to load the property resolution information, but we need to # wait until after the systems have been loaded. (There's a nasty loop # where, eg, translation objects need an alphabet, which need to be # assocated with translators.) from Bio.PropertyManager import default_manager def _bootstrap(manager, klass, property): assert manager is default_manager del default_manager.class_resolver[IUPACProtein] del default_manager.class_resolver[ExtendedIUPACProtein] del default_manager.class_resolver[IUPACAmbiguousDNA] del default_manager.class_resolver[IUPACUnambiguousDNA] del default_manager.class_resolver[ExtendedIUPACDNA] del default_manager.class_resolver[IUPACAmbiguousRNA] del default_manager.class_resolver[IUPACUnambiguousRNA] return manager.resolve_class(klass, property) default_manager.class_resolver[IUPACProtein] = _bootstrap default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap