# This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. from __future__ import print_function import array import copy import sys import warnings # Remove unittest2 import after dropping support for Python2.6 if sys.version_info < (2, 7): try: import unittest2 as unittest except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError("Under Python 2.6 this test needs the unittest2 library") else: import unittest from Bio import Alphabet from Bio import Seq from Bio.Alphabet import IUPAC, Gapped from Bio.Data.IUPACData import ambiguous_dna_complement, ambiguous_rna_complement from Bio.Data.IUPACData import ambiguous_dna_values, ambiguous_rna_values from Bio.Data.CodonTable import TranslationError from Bio.Data.CodonTable import standard_dna_table from Bio.Seq import MutableSeq if sys.version_info[0] == 3: array_indicator = "u" else: array_indicator = "c" test_seqs = [ Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna), Seq.Seq("T", IUPAC.ambiguous_dna), Seq.Seq("ATGAAACTG"), Seq.Seq("ATGAARCTG"), Seq.Seq("AWGAARCKG"), # Note no U or T Seq.Seq("".join(ambiguous_rna_values)), Seq.Seq("".join(ambiguous_dna_values)), Seq.Seq("".join(ambiguous_rna_values), Alphabet.generic_rna), Seq.Seq("".join(ambiguous_dna_values), Alphabet.generic_dna), Seq.Seq("".join(ambiguous_rna_values), IUPAC.IUPACAmbiguousRNA()), Seq.Seq("".join(ambiguous_dna_values), IUPAC.IUPACAmbiguousDNA()), Seq.Seq("AWGAARCKG", Alphabet.generic_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("ATGAAACTG", IUPAC.unambiguous_dna), Seq.Seq("ATGAAA-CTG", Alphabet.Gapped(IUPAC.unambiguous_dna)), Seq.Seq("ATGAAACTGWN", IUPAC.ambiguous_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("AUGAAA==CUG", Alphabet.Gapped(Alphabet.generic_rna, "=")), Seq.Seq("AUGAAACUG", IUPAC.unambiguous_rna), Seq.Seq("AUGAAACUGWN", IUPAC.ambiguous_rna), Seq.Seq("ATGAAACTG", Alphabet.generic_nucleotide), Seq.Seq("AUGAAACTG", Alphabet.generic_nucleotide), # U and T Seq.MutableSeq("ATGAAACTG", Alphabet.generic_dna), Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna), Seq.Seq("ACTGTCGTCT", Alphabet.generic_protein), ] protein_seqs = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")), Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")), Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), Seq.Seq("MEDG-KRXR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.extended_protein, "-"), "@")), Seq.Seq("ME-KR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.protein, "-"), "@")), Seq.Seq("MEDG.KRXR@", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "@"), ".")), ] class TestSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) def test_as_string(self): """Test converting Seq to string""" self.assertEqual("TCAAAAGGATGCATCATG", str(self.s)) def test_construction_using_a_seq_object(self): """Test using a Seq object to initialize another Seq object""" with self.assertRaises(TypeError): Seq.Seq(self.s) def test_repr(self): """Test representation of Seq object""" self.assertEqual("Seq('TCAAAAGGATGCATCATG', IUPACUnambiguousDNA())", repr(self.s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "Seq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(Seq.Seq(seq, IUPAC.ambiguous_dna))) def test_length(self): """Test len method on Seq object""" self.assertEqual(18, len(self.s)) def test_first_nucleotide(self): """Test getting first nucleotide of Seq""" self.assertEqual("T", self.s[0]) def test_last_nucleotide(self): """Test getting last nucleotide of Seq""" self.assertEqual("G", self.s[-1]) def test_slicing(self): """Test slicing of Seq""" self.assertEqual("AA", str(self.s[3:5])) def test_reverse(self): """Test reverse using -1 stride""" self.assertEqual("GTACTACGTAGGAAAACT", self.s[::-1]) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual("TAGTAA", str(self.s[0::3])) self.assertEqual("CAGGTT", str(self.s[1::3])) self.assertEqual("AAACCG", str(self.s[2::3])) def test_alphabet_letters(self): """Test nucleotides in DNA Seq""" self.assertEqual("GATC", self.s.alphabet.letters) def test_alphabet(self): """Test alphabet of derived Seq object""" t = Seq.Seq("T", IUPAC.unambiguous_dna) u = self.s + t self.assertEqual("IUPACUnambiguousDNA()", str(u.alphabet)) def test_length_concatenated_unambiguous_seq(self): """Test length of concatenated Seq object with unambiguous DNA""" t = Seq.Seq("T", IUPAC.unambiguous_dna) u = self.s + t self.assertEqual(19, len(u)) def test_concatenation_of_seq(self): t = Seq.Seq("T", IUPAC.unambiguous_dna) u = self.s + t self.assertEqual(str(self.s) + "T", str(u)) def test_concatenation_error(self): """Test DNA Seq objects cannot be concatenated with Protein Seq objects""" with self.assertRaises(TypeError): self.s + Seq.Seq("T", IUPAC.protein) def test_concatenation_of_ambiguous_and_unambiguous_dna(self): """Test concatenated Seq object with ambiguous and unambiguous DNA returns ambiguous Seq""" t = Seq.Seq("T", IUPAC.ambiguous_dna) u = self.s + t self.assertEqual("IUPACAmbiguousDNA()", str(u.alphabet)) def test_ungap(self): self.assertEqual("ATCCCA", str(Seq.Seq("ATC-CCA").ungap("-"))) with self.assertRaises(ValueError): Seq.Seq("ATC-CCA").ungap("--") with self.assertRaises(ValueError): Seq.Seq("ATC-CCA").ungap() class TestSeqStringMethods(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.dna = [ Seq.Seq("ATCG", IUPAC.ambiguous_dna), Seq.Seq("gtca", Alphabet.generic_dna), Seq.MutableSeq("GGTCA", Alphabet.generic_dna), Seq.Seq("CTG-CA", Alphabet.Gapped(IUPAC.unambiguous_dna, "-")), ] self.rna = [ Seq.Seq("AUUUCG", IUPAC.ambiguous_rna), Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna), Seq.Seq("uCAg", Alphabet.generic_rna), Seq.MutableSeq("UC-AG", Alphabet.Gapped(Alphabet.generic_rna, "-")), Seq.Seq("U.CAG", Alphabet.Gapped(Alphabet.generic_rna, ".")), ] self.nuc = [Seq.Seq("ATCG", Alphabet.generic_nucleotide)] self.protein = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("atcGPK", Alphabet.generic_protein), Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")), Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")), Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), Seq.Seq("MEDG-KRXR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.extended_protein, "-"), "@")), Seq.Seq("ME-KR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.protein, "-"), "@")), Seq.Seq("MEDG.KRXR@", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "@"), ".")), ] self.test_chars = ["-", Seq.Seq("-"), Seq.Seq("*"), "-X@"] def test_string_methods(self): for a in self.dna + self.rna + self.nuc + self.protein: if isinstance(a, Seq.Seq): self.assertEqual(str(a.strip()), str(a).strip()) self.assertEqual(str(a.lstrip()), str(a).lstrip()) self.assertEqual(str(a.rstrip()), str(a).rstrip()) self.assertEqual(str(a.lower()), str(a).lower()) self.assertEqual(str(a.upper()), str(a).upper()) def test_hash(self): with warnings.catch_warnings(record=True): hash(self.s) def test_equal_comparison_of_incompatible_alphabets(self): """Test __eq__ comparison method""" with warnings.catch_warnings(record=True): Seq.Seq("TCAAAA", IUPAC.ambiguous_dna) == Seq.Seq("TCAAAA", IUPAC.ambiguous_rna) def test_not_equal_comparsion(self): """Test __ne__ comparison method""" self.assertNotEqual(Seq.Seq("TCAAA", IUPAC.ambiguous_dna), Seq.Seq("TCAAAA", IUPAC.ambiguous_dna)) def test_less_than_comparison_of_incompatible_alphabets(self): """Test __lt__ comparison method""" seq1 = Seq.Seq("TCAAA", IUPAC.ambiguous_dna) seq2 = Seq.Seq("UCAAAA", IUPAC.ambiguous_rna) with warnings.catch_warnings(record=True): self.assertTrue(seq1 < seq2) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): """Test __lt__ comparison method""" seq1 = Seq.Seq("TCAAA", IUPAC.ambiguous_dna) seq2 = Seq.Seq("UCAAAA", IUPAC.ambiguous_rna) with warnings.catch_warnings(record=True): self.assertTrue(seq1 <= seq2) def test_add_method_using_wrong_object(self): with self.assertRaises(TypeError): self.s + dict() def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", str(self.s.__radd__(self.s))) def test_radd_method_using_incompatible_alphabets(self): rna_seq = Seq.Seq("UCAAAA", IUPAC.ambiguous_rna) with self.assertRaises(TypeError): self.s.__radd__(rna_seq) def test_radd_method_using_wrong_object(self): with self.assertRaises(TypeError): self.s.__radd__(dict()) def test_to_string_deprecated_method(self): with warnings.catch_warnings(record=True): self.s.tostring() def test_contains_method(self): self.assertTrue("AAAA" in self.s) def test_startswith(self): self.assertTrue(self.s.startswith("TCA")) self.assertTrue(self.s.startswith(("CAA", "CTA"), 1)) def test_endswith(self): self.assertTrue(self.s.endswith("ATG")) self.assertTrue(self.s.endswith(("ATG", "CTA"))) def test_append_nucleotides(self): self.test_chars.append(Seq.Seq("A", IUPAC.ambiguous_dna)) self.test_chars.append(Seq.Seq("A", IUPAC.ambiguous_rna)) self.test_chars.append(Seq.Seq("A", Alphabet.generic_nucleotide)) self.assertEqual(7, len(self.test_chars)) def test_append_proteins(self): self.test_chars.append(Seq.Seq("K", Alphabet.generic_protein)) self.test_chars.append(Seq.Seq("K-", Alphabet.Gapped(Alphabet.generic_protein, "-"))) self.test_chars.append(Seq.Seq("K@", Alphabet.Gapped(IUPAC.protein, "@"))) self.assertEqual(7, len(self.test_chars)) def test_exception_when_clashing_alphabets(self): """Test by setting up clashing alphabet sequences""" b = Seq.Seq("-", Alphabet.generic_nucleotide) self.assertRaises(TypeError, self.protein[0].strip, b) b = Seq.Seq("-", Alphabet.generic_protein) self.assertRaises(TypeError, self.dna[0].strip, b) def test_stripping_characters(self): for a in self.dna + self.rna + self.nuc + self.protein: for char in self.test_chars: str_char = str(char) if isinstance(a, Seq.Seq): self.assertEqual(str(a.strip(char)), str(a).strip(str_char)) self.assertEqual(str(a.lstrip(char)), str(a).lstrip(str_char)) self.assertEqual(str(a.rstrip(char)), str(a).rstrip(str_char)) def test_finding_characters(self): for a in self.dna + self.rna + self.nuc + self.protein: for char in self.test_chars: str_char = str(char) if isinstance(a, Seq.Seq): self.assertEqual(a.find(char), str(a).find(str_char)) self.assertEqual(a.find(char, 2, -2), str(a).find(str_char, 2, -2)) self.assertEqual(a.rfind(char), str(a).rfind(str_char)) self.assertEqual(a.rfind(char, 2, -2), str(a).rfind(str_char, 2, -2)) def test_counting_characters(self): for a in self.dna + self.rna + self.nuc + self.protein: for char in self.test_chars: str_char = str(char) if isinstance(a, Seq.Seq): self.assertEqual(a.count(char), str(a).count(str_char)) self.assertEqual(a.count(char, 2, -2), str(a).count(str_char, 2, -2)) def test_splits(self): for a in self.dna + self.rna + self.nuc + self.protein: for char in self.test_chars: str_char = str(char) if isinstance(a, Seq.Seq): self.assertEqual([str(x) for x in a.split(char)], str(a).split(str_char)) self.assertEqual([str(x) for x in a.rsplit(char)], str(a).rsplit(str_char)) for max_sep in [0, 1, 2, 999]: self.assertEqual([str(x) for x in a.split(char, max_sep)], str(a).split(str_char, max_sep)) class TestSeqAddition(unittest.TestCase): def setUp(self): self.dna = [ Seq.Seq("ATCG", IUPAC.ambiguous_dna), Seq.Seq("gtca", Alphabet.generic_dna), Seq.MutableSeq("GGTCA", Alphabet.generic_dna), Seq.Seq("CTG-CA", Alphabet.Gapped(IUPAC.unambiguous_dna, "-")), "TGGTCA", ] self.rna = [ Seq.Seq("AUUUCG", IUPAC.ambiguous_rna), Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna), Seq.Seq("uCAg", Alphabet.generic_rna), Seq.MutableSeq("UC-AG", Alphabet.Gapped(Alphabet.generic_rna, "-")), Seq.Seq("U.CAG", Alphabet.Gapped(Alphabet.generic_rna, ".")), "UGCAU", ] self.nuc = [ Seq.Seq("ATCG", Alphabet.generic_nucleotide), "UUUTTTACG", ] self.protein = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("atcGPK", Alphabet.generic_protein), Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")), Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")), Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")), "TEDDF", ] def test_addition_dna_rna_with_generic_nucleotides(self): for a in self.dna + self.rna: for b in self.nuc: c = a + b self.assertEqual(str(c), str(a) + str(b)) def test_addition_rna_with_rna(self): self.rna.pop(3) for a in self.rna: for b in self.rna: c = a + b self.assertEqual(str(c), str(a) + str(b)) def test_exception_when_added_rna_has_more_than_one_gap_type(self): """Test resulting sequence has gap types '-' and '.'""" with self.assertRaises(ValueError): self.rna[3] + self.rna[4] def test_addition_dna_with_dna(self): for a in self.dna: for b in self.dna: c = a + b self.assertEqual(str(c), str(a) + str(b)) def test_addition_dna_with_rna(self): self.dna.pop(4) self.rna.pop(5) for a in self.dna: for b in self.rna: with self.assertRaises(TypeError): a + b with self.assertRaises(TypeError): b + a def test_addition_proteins(self): self.protein.pop(2) for a in self.protein: for b in self.protein: c = a + b self.assertEqual(str(c), str(a) + str(b)) def test_exception_when_added_protein_has_more_than_one_gap_type(self): """Test resulting protein has gap types '-' and '.'""" a = Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")) b = Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")) with self.assertRaises(ValueError): a + b def test_exception_when_added_protein_has_more_than_one_stop_codon_type(self): """Test resulting protein has stop codon types '*' and '@'""" a = Seq.Seq("MEDG-KRXR@", Alphabet.HasStopCodon(Alphabet.Gapped(IUPAC.extended_protein, "-"), "@")) b = Seq.Seq("MEDG-KRXR*", Alphabet.Gapped(Alphabet.HasStopCodon(IUPAC.extended_protein, "*"), "-")) with self.assertRaises(ValueError): a + b def test_exception_when_adding_protein_with_nucletides(self): for a in self.protein[0:5]: for b in self.dna[0:3] + self.rna[0:4]: with self.assertRaises(TypeError): a + b def test_adding_generic_nucleotide_with_other_nucleotides(self): for a in self.nuc: for b in self.dna + self.rna + self.nuc: c = a + b self.assertEqual(str(c), str(a) + str(b)) class TestMutableSeq(unittest.TestCase): def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) def test_mutableseq_creation(self): """Test creating MutableSeqs in multiple ways""" mutable_s = MutableSeq("TCAAAAGGATGCATCATG", IUPAC.ambiguous_dna) self.assertIsInstance(mutable_s, MutableSeq, "Creating MutableSeq") mutable_s = self.s.tomutable() self.assertIsInstance(mutable_s, MutableSeq, "Converting Seq to mutable") array_seq = MutableSeq(array.array(array_indicator, "TCAAAAGGATGCATCATG"), IUPAC.ambiguous_dna) self.assertIsInstance(array_seq, MutableSeq, "Creating MutableSeq using array") def test_repr(self): self.assertEqual("MutableSeq('TCAAAAGGATGCATCATG', IUPACAmbiguousDNA())", repr(self.mutable_s)) def test_truncated_repr(self): seq = "TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGA" expected = "MutableSeq('TCAAAAGGATGCATCATGTCAAAAGGATGCATCATGTCAAAAGGATGCATCATG...GGA', IUPACAmbiguousDNA())" self.assertEqual(expected, repr(MutableSeq(seq, IUPAC.ambiguous_dna))) def test_equal_comparison(self): """Test __eq__ comparison method""" self.assertEqual(self.mutable_s, "TCAAAAGGATGCATCATG") def test_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s == MutableSeq('UCAAAAGGA', IUPAC.ambiguous_rna) def test_not_equal_comparison(self): """Test __ne__ comparison method""" self.assertNotEqual(self.mutable_s, "other thing") def test_less_than_comparison(self): """Test __lt__ comparison method""" self.assertTrue(self.mutable_s[:-1] < self.mutable_s) def test_less_than_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] < MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] < "TCAAAAGGATGCATCATG") def test_less_than_or_equal_comparison(self): """Test __le__ comparison method""" self.assertTrue(self.mutable_s[:-1] <= self.mutable_s) def test_less_than_or_equal_comparison_of_incompatible_alphabets(self): with warnings.catch_warnings(record=True): self.mutable_s[:-1] <= MutableSeq("UCAAAAGGAUGCAUCAUG", IUPAC.ambiguous_rna) def test_less_than_or_equal_comparison_without_alphabet(self): self.assertTrue(self.mutable_s[:-1] <= "TCAAAAGGATGCATCATG") def test_add_method(self): """Test adding wrong type to MutableSeq""" with self.assertRaises(TypeError): self.mutable_s + 1234 def test_radd_method(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.mutable_s)) def test_radd_method_incompatible_alphabets(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(MutableSeq("UCAAAAGGA", IUPAC.ambiguous_rna)) def test_radd_method_using_seq_object(self): self.assertEqual("TCAAAAGGATGCATCATGTCAAAAGGATGCATCATG", self.mutable_s.__radd__(self.s)) def test_radd_method_wrong_type(self): with self.assertRaises(TypeError): self.mutable_s.__radd__(1234) def test_as_string(self): self.assertEqual("TCAAAAGGATGCATCATG", str(self.mutable_s)) def test_length(self): self.assertEqual(18, len(self.mutable_s)) def test_converting_to_immutable(self): self.assertIsInstance(self.mutable_s.toseq(), Seq.Seq) def test_first_nucleotide(self): self.assertEqual('T', self.mutable_s[0]) def test_setting_slices(self): self.assertEqual(MutableSeq('CAAA', IUPAC.ambiguous_dna), self.mutable_s[1:5], "Slice mutable seq") self.mutable_s[1:3] = "GAT" self.assertEqual(MutableSeq("TGATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with string and adding extra nucleotide") self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual(MutableSeq("TAATAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with MutableSeq") self.mutable_s[1:3] = array.array(array_indicator, "GAT") self.assertEqual(MutableSeq("TGATTAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Set slice with array") def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(MutableSeq("TCAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_appending(self): self.mutable_s.append("C") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGC", IUPAC.ambiguous_dna), self.mutable_s) def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(MutableSeq("TCAAGAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_popping_last_item(self): self.assertEqual("G", self.mutable_s.pop()) def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual(MutableSeq("TCAAAAGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s, "Remove first G") self.assertRaises(ValueError, self.mutable_s.remove, 'Z') def test_count(self): self.assertEqual(7, self.mutable_s.count("A")) self.assertEqual(2, self.mutable_s.count("AA")) def test_index(self): self.assertEqual(2, self.mutable_s.index("A")) self.assertRaises(ValueError, self.mutable_s.index, "8888") def test_reverse(self): """Test using reverse method""" self.mutable_s.reverse() self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s) def test_reverse_with_stride(self): """Test reverse using -1 stride""" self.assertEqual(MutableSeq("GTACTACGTAGGAAAACT", IUPAC.ambiguous_dna), self.mutable_s[::-1]) def test_complement(self): self.mutable_s.complement() self.assertEqual(str("AGTTTTCCTACGTAGTAC"), str(self.mutable_s)) def test_complement_rna(self): seq = Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna) seq.complement() self.assertEqual(str("UACuuuGAC"), str(seq)) def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement() def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual('UACuuuGAC', str(seq)) def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual('TACtttGAC', str(seq)) def test_reverse_complement(self): self.mutable_s.reverse_complement() self.assertEqual("CATGATGCATCCTTTTGA", str(self.mutable_s)) def test_reverse_complement_of_protein(self): seq = Seq.MutableSeq("ACTGTCGTCT", Alphabet.generic_protein) with self.assertRaises(ValueError): seq.reverse_complement() def test_to_string_method(self): """This method is currently deprecated, probably will need to remove this test soon""" with warnings.catch_warnings(record=True): self.mutable_s.tostring() def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGGAT", IUPAC.ambiguous_dna), self.mutable_s) def test_extend_with_mutable_seq(self): self.mutable_s.extend(MutableSeq("TTT", IUPAC.ambiguous_dna)) self.assertEqual(MutableSeq("TCAAAAGGATGCATCATGTTT", IUPAC.ambiguous_dna), self.mutable_s) def test_delete_stride_slice(self): del self.mutable_s[4:6 - 1] self.assertEqual(MutableSeq("TCAAAGGATGCATCATG", IUPAC.ambiguous_dna), self.mutable_s) def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3)""" self.assertEqual(MutableSeq("TAGTAA", IUPAC.ambiguous_dna), self.mutable_s[0::3]) self.assertEqual(MutableSeq("CAGGTT", IUPAC.ambiguous_dna), self.mutable_s[1::3]) self.assertEqual(MutableSeq("AAACCG", IUPAC.ambiguous_dna), self.mutable_s[2::3]) def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3)""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(MutableSeq("TCNAANGGNTGNATNATN", IUPAC.ambiguous_dna), self.mutable_s) class TestUnknownSeq(unittest.TestCase): def setUp(self): self.s = Seq.UnknownSeq(6) def test_construction(self): self.assertEqual("??????", str(Seq.UnknownSeq(6))) self.assertEqual("NNNNNN", str(Seq.UnknownSeq(6, Alphabet.generic_dna))) self.assertEqual("XXXXXX", str(Seq.UnknownSeq(6, Alphabet.generic_protein))) self.assertEqual("??????", str(Seq.UnknownSeq(6, character="?"))) with self.assertRaises(ValueError): Seq.UnknownSeq(-10) with self.assertRaises(ValueError): Seq.UnknownSeq(6, character='??') def test_length(self): self.assertEqual(6, len(self.s)) def test_repr(self): self.assertEqual("UnknownSeq(6, alphabet = Alphabet(), character = '?')", repr(self.s)) def test_add_method(self): seq1 = Seq.UnknownSeq(3, Alphabet.generic_dna) self.assertEqual("??????NNN", str(self.s + seq1)) seq2 = Seq.UnknownSeq(3, Alphabet.generic_dna) self.assertEqual("NNNNNN", str(seq1 + seq2)) def test_getitem_method(self): self.assertEqual("", self.s[-1:-1]) self.assertEqual("?", self.s[1]) self.assertEqual("?", self.s[5:]) self.assertEqual("?", self.s[:1]) self.assertEqual("??", self.s[1:3]) self.assertEqual("???", self.s[1:6:2]) self.assertEqual("????", self.s[1:-1]) with self.assertRaises(ValueError): self.s[1:6:0] def test_count(self): self.assertEqual(6, self.s.count("?")) self.assertEqual(3, self.s.count("??")) self.assertEqual(0, Seq.UnknownSeq(6, character="N").count("?")) self.assertEqual(0, Seq.UnknownSeq(6, character="N").count("??")) self.assertEqual(4, Seq.UnknownSeq(6, character="?").count("?", start=2)) self.assertEqual(2, Seq.UnknownSeq(6, character="?").count("??", start=2)) def test_complement(self): self.s.complement() self.assertEqual(str("??????"), str(self.s)) def test_complement_of_protein(self): """Test reverse complement shouldn't work on a protein!""" seq = Seq.UnknownSeq(6, Alphabet.generic_protein) with self.assertRaises(ValueError): seq.complement() def test_reverse_complement(self): self.s.reverse_complement() self.assertEqual("??????", str(self.s)) def test_reverse_complement_of_protein(self): seq = Seq.UnknownSeq(6, Alphabet.generic_protein) self.assertRaises(ValueError, seq.reverse_complement) def test_transcribe(self): self.assertEqual("??????", self.s.transcribe()) def test_back_transcribe(self): self.assertEqual("??????", self.s.back_transcribe()) def test_upper(self): seq = Seq.UnknownSeq(6, Alphabet.generic_dna) self.assertEqual("NNNNNN", str(seq.upper())) def test_lower(self): seq = Seq.UnknownSeq(6, Alphabet.generic_dna) self.assertEqual("nnnnnn", str(seq.lower())) def test_translation(self): self.assertEqual("XX", str(self.s.translate())) def test_translation_of_proteins(self): seq = Seq.UnknownSeq(6, IUPAC.protein) self.assertRaises(ValueError, seq.translate) def test_ungap(self): seq = Seq.UnknownSeq(7, alphabet=Alphabet.Gapped(Alphabet.DNAAlphabet(), "-")) self.assertEqual("NNNNNNN", str(seq.ungap("-"))) seq = Seq.UnknownSeq(20, alphabet=Alphabet.Gapped(Alphabet.DNAAlphabet(), "-"), character='-') self.assertEqual("", seq.ungap("-")) class TestAmbiguousComplements(unittest.TestCase): def test_ambiguous_values(self): """Test that other tests do not introduce characters to our values""" self.assertFalse("-" in ambiguous_dna_values) self.assertFalse("?" in ambiguous_dna_values) class TestComplement(unittest.TestCase): def test_complement_ambiguous_dna_values(self): for ambig_char, values in sorted(ambiguous_dna_values.items()): compl_values = str(Seq.Seq(values, alphabet=IUPAC.ambiguous_dna).complement()) self.assertEqual(set(compl_values), set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]])) def test_complement_ambiguous_rna_values(self): for ambig_char, values in sorted(ambiguous_rna_values.items()): compl_values = str(Seq.Seq(values, alphabet=IUPAC.ambiguous_rna).complement()) self.assertEqual(set(compl_values), set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]])) def test_complement_incompatible_alphabets(self): seq = Seq.Seq("CAGGTU") with self.assertRaises(ValueError): seq.complement() class TestReverseComplement(unittest.TestCase): def test_reverse_complement(self): test_seqs_copy = copy.copy(test_seqs) test_seqs_copy.pop(21) for nucleotide_seq in test_seqs_copy: if not isinstance(nucleotide_seq.alphabet, Alphabet.ProteinAlphabet) and \ isinstance(nucleotide_seq, Seq.Seq): expected = Seq.reverse_complement(nucleotide_seq) self.assertEqual(repr(expected), repr(nucleotide_seq.reverse_complement())) self.assertEqual(repr(expected[::-1]), repr(nucleotide_seq.complement())) self.assertEqual(str(nucleotide_seq.complement()), str(Seq.reverse_complement(nucleotide_seq))[::-1]) self.assertEqual(str(nucleotide_seq.reverse_complement()), str(Seq.reverse_complement(nucleotide_seq))) def test_reverse_complement_of_mixed_dna_rna(self): seq = "AUGAAACTG" # U and T self.assertRaises(ValueError, Seq.reverse_complement, seq) def test_reverse_complement_of_rna(self): seq = "AUGAAACUG" self.assertEqual("CAGUUUCAU", Seq.reverse_complement(seq)) def test_reverse_complement_of_dna(self): seq = "ATGAAACTG" self.assertEqual("CAGTTTCAT", Seq.reverse_complement(seq)) def test_reverse_complement_on_proteins(self): """Test reverse complement shouldn't work on a protein!""" for s in protein_seqs: with self.assertRaises(ValueError): Seq.reverse_complement(s) with self.assertRaises(ValueError): s.reverse_complement() def test_complement_on_proteins(self): """Test complement shouldn't work on a protein!""" for s in protein_seqs: with self.assertRaises(ValueError): s.complement() class TestDoubleReverseComplement(unittest.TestCase): def test_reverse_complements(self): """Test double reverse complement preserves the sequence""" for sequence in [Seq.Seq("".join(sorted(ambiguous_rna_values))), Seq.Seq("".join(sorted(ambiguous_dna_values))), Seq.Seq("".join(sorted(ambiguous_rna_values)), Alphabet.generic_rna), Seq.Seq("".join(sorted(ambiguous_dna_values)), Alphabet.generic_dna), Seq.Seq("".join(sorted(ambiguous_rna_values)).replace("X", ""), IUPAC.IUPACAmbiguousRNA()), Seq.Seq("".join(sorted(ambiguous_dna_values)).replace("X", ""), IUPAC.IUPACAmbiguousDNA()), Seq.Seq("AWGAARCKG")]: # Note no U or T reversed_sequence = sequence.reverse_complement() self.assertEqual(str(sequence), str(reversed_sequence.reverse_complement())) class TestSequenceAlphabets(unittest.TestCase): def test_sequence_alphabets(self): """Sanity test on the test sequence alphabets (see also enhancement bug 2597)""" for nucleotide_seq in test_seqs: if "U" in str(nucleotide_seq).upper(): self.assertNotIsInstance(nucleotide_seq.alphabet, Alphabet.DNAAlphabet) if "T" in str(nucleotide_seq).upper(): self.assertNotIsInstance(nucleotide_seq.alphabet, Alphabet.RNAAlphabet) class TestTranscription(unittest.TestCase): def test_transcription_dna_into_rna(self): for nucleotide_seq in test_seqs: if isinstance(nucleotide_seq.alphabet, Alphabet.DNAAlphabet): expected = Seq.transcribe(nucleotide_seq) self.assertEqual(str(nucleotide_seq).replace("t", "u").replace("T", "U"), str(expected)) def test_transcription_dna_string_into_rna(self): seq = "ATGAAACTG" self.assertEqual("AUGAAACUG", Seq.transcribe(seq)) def test_seq_object_transcription_method(self): for nucleotide_seq in test_seqs: if isinstance(nucleotide_seq.alphabet, Alphabet.DNAAlphabet) and \ isinstance(nucleotide_seq, Seq.Seq): self.assertEqual(repr(Seq.transcribe(nucleotide_seq)), repr(nucleotide_seq.transcribe())) def test_transcription_of_rna(self): """Test transcription shouldn't work on RNA!""" seq = Seq.Seq("AUGAAACUG", IUPAC.ambiguous_rna) with self.assertRaises(ValueError): seq.transcribe() def test_transcription_of_proteins(self): """Test transcription shouldn't work on a protein!""" for s in protein_seqs: with self.assertRaises(ValueError): Seq.transcribe(s) if isinstance(s, Seq.Seq): with self.assertRaises(ValueError): s.transcribe() def test_back_transcribe_rna_into_dna(self): for nucleotide_seq in test_seqs: if isinstance(nucleotide_seq.alphabet, Alphabet.RNAAlphabet): expected = Seq.back_transcribe(nucleotide_seq) self.assertEqual(str(nucleotide_seq).replace("u", "t").replace("U", "T"), str(expected)) def test_back_transcribe_rna_string_into_dna(self): seq = "AUGAAACUG" self.assertEqual("ATGAAACTG", Seq.back_transcribe(seq)) def test_seq_object_back_transcription_method(self): for nucleotide_seq in test_seqs: if isinstance(nucleotide_seq.alphabet, Alphabet.RNAAlphabet) and \ isinstance(nucleotide_seq, Seq.Seq): expected = Seq.back_transcribe(nucleotide_seq) self.assertEqual(repr(nucleotide_seq.back_transcribe()), repr(expected)) def test_back_transcription_of_proteins(self): """Test back-transcription shouldn't work on a protein!""" for s in protein_seqs: with self.assertRaises(ValueError): Seq.back_transcribe(s) if isinstance(s, Seq.Seq): with self.assertRaises(ValueError): s.back_transcribe() def test_back_transcription_of_dna(self): """Test back-transcription shouldn't work on DNA!""" seq = Seq.Seq("ATGAAACTG", IUPAC.ambiguous_dna) with self.assertRaises(ValueError): seq.back_transcribe() class TestTranslating(unittest.TestCase): def setUp(self): self.test_seqs = [ Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna), Seq.Seq("ATGAAACTG"), Seq.Seq("ATGAARCTG"), Seq.Seq("AWGAARCKG"), # Note no U or T Seq.Seq("".join(ambiguous_rna_values)), Seq.Seq("".join(ambiguous_dna_values)), Seq.Seq("".join(ambiguous_rna_values), Alphabet.generic_rna), Seq.Seq("".join(ambiguous_dna_values), Alphabet.generic_dna), Seq.Seq("".join(ambiguous_rna_values), IUPAC.IUPACAmbiguousRNA()), Seq.Seq("".join(ambiguous_dna_values), IUPAC.IUPACAmbiguousDNA()), Seq.Seq("AWGAARCKG", Alphabet.generic_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("ATGAAACTG", IUPAC.unambiguous_dna), Seq.Seq("ATGAAACTGWN", IUPAC.ambiguous_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("AUGAAACUG", IUPAC.unambiguous_rna), Seq.Seq("AUGAAACUGWN", IUPAC.ambiguous_rna), Seq.Seq("ATGAAACTG", Alphabet.generic_nucleotide), Seq.MutableSeq("ATGAAACTG", Alphabet.generic_dna), Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna), ] def test_translation(self): for nucleotide_seq in self.test_seqs: nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)] if isinstance(nucleotide_seq, Seq.Seq) and 'X' not in str(nucleotide_seq): expected = Seq.translate(nucleotide_seq) self.assertEqual(repr(expected), repr(nucleotide_seq.translate())) def test_alphabets_of_translated_seqs(self): def triple_pad(s): """Add N to ensure length is a multiple of three (whole codons).""" while len(s) % 3: s += "N" return s self.assertEqual("IUPACProtein()", repr(self.test_seqs[0].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[1].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[2].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[3].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[10].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[11].translate().alphabet)) self.assertEqual("IUPACProtein()", repr(self.test_seqs[12].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(triple_pad(self.test_seqs[13]).translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(self.test_seqs[14].translate().alphabet)) self.assertEqual("IUPACProtein()", repr(self.test_seqs[15].translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(triple_pad(self.test_seqs[16]).translate().alphabet)) self.assertEqual("ExtendedIUPACProtein()", repr(triple_pad(self.test_seqs[17]).translate().alphabet)) def test_translation_of_gapped_seq_with_gap_char_given(self): seq = Seq.Seq("ATG---AAACTG") self.assertEqual("M-KL", seq.translate(gap="-")) self.assertRaises(TranslationError, seq.translate, gap="~") def test_translation_of_gapped_seq_with_stop_codon_and_gap_char_given(self): seq = Seq.Seq("GTG---GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG") self.assertEqual("V-AIVMGR*KGAR*", seq.translate(gap="-")) self.assertRaises(TranslationError, seq.translate) def test_translation_of_gapped_seq_with_gap_char_given_and_inferred_from_alphabet(self): seq = Seq.Seq("ATG---AAACTG", Gapped(IUPAC.unambiguous_dna)) self.assertEqual("M-KL", seq.translate(gap="-")) self.assertRaises(ValueError, seq.translate, gap="~") seq = Seq.Seq("ATG~~~AAACTG", Gapped(IUPAC.unambiguous_dna)) self.assertRaises(ValueError, seq.translate, gap="~") self.assertRaises(TranslationError, seq.translate, gap="-") def test_translation_of_gapped_seq_with_gap_char_given_and_inferred_from_alphabet2(self): """Test using stop codon in sequence""" seq = Seq.Seq("ATG---AAACTGTAG", Gapped(IUPAC.unambiguous_dna)) self.assertEqual("M-KL*", seq.translate(gap="-")) self.assertRaises(ValueError, seq.translate, gap="~") seq = Seq.Seq("ATG---AAACTGTAG", Gapped(IUPAC.unambiguous_dna)) self.assertEqual("M-KL@", seq.translate(gap="-", stop_symbol="@")) self.assertRaises(ValueError, seq.translate, gap="~") seq = Seq.Seq("ATG~~~AAACTGTAG", Gapped(IUPAC.unambiguous_dna)) self.assertRaises(ValueError, seq.translate, gap="~") self.assertRaises(TranslationError, seq.translate, gap="-") def test_translation_of_gapped_seq_no_gap_char_given(self): seq = Seq.Seq("ATG---AAACTG") self.assertRaises(TranslationError, seq.translate) def test_translation_of_gapped_seq_no_gap_char_given_and_inferred_from_alphabet(self): seq = Seq.Seq("ATG---AAACTG", Gapped(IUPAC.unambiguous_dna)) self.assertEqual("M-KL", seq.translate()) seq = Seq.Seq("ATG~~~AAACTG", Gapped(IUPAC.unambiguous_dna)) self.assertRaises(TranslationError, seq.translate) seq = Seq.Seq("ATG~~~AAACTG", Gapped(IUPAC.unambiguous_dna, "~")) self.assertEqual("M~KL", seq.translate()) def test_alphabet_of_translated_gapped_seq(self): seq = Seq.Seq("ATG---AAACTG", Gapped(IUPAC.unambiguous_dna)) self.assertEqual("Gapped(ExtendedIUPACProtein(), '-')", repr(seq.translate().alphabet)) seq = Seq.Seq("ATG---AAACTG", Gapped(IUPAC.unambiguous_dna, "-")) self.assertEqual("Gapped(ExtendedIUPACProtein(), '-')", repr(seq.translate().alphabet)) seq = Seq.Seq("ATG~~~AAACTG", Gapped(IUPAC.unambiguous_dna, "~")) self.assertEqual("Gapped(ExtendedIUPACProtein(), '~')", repr(seq.translate().alphabet)) seq = Seq.Seq("ATG---AAACTG") self.assertEqual("Gapped(ExtendedIUPACProtein(), '-')", repr(seq.translate(gap="-").alphabet)) seq = Seq.Seq("ATG~~~AAACTG") self.assertEqual("Gapped(ExtendedIUPACProtein(), '~')", repr(seq.translate(gap="~").alphabet)) seq = Seq.Seq("ATG~~~AAACTGTAG") self.assertEqual("HasStopCodon(Gapped(ExtendedIUPACProtein(), '~'), '*')", repr(seq.translate(gap="~").alphabet)) seq = Seq.Seq("ATG---AAACTGTGA") self.assertEqual("HasStopCodon(Gapped(ExtendedIUPACProtein(), '-'), '*')", repr(seq.translate(gap="-").alphabet)) seq = Seq.Seq("ATG---AAACTGTGA") self.assertEqual("HasStopCodon(Gapped(ExtendedIUPACProtein(), '-'), '@')", repr(seq.translate(gap="-", stop_symbol="@").alphabet)) def test_translation_wrong_type(self): """Test translation table cannot be CodonTable""" seq = Seq.Seq("ATCGTA") with self.assertRaises(ValueError): seq.translate(table=ambiguous_dna_complement) def test_translation_of_string(self): seq = "GTGGCCATTGTAATGGGCCGC" self.assertEqual("VAIVMGR", Seq.translate(seq)) def test_translation_of_gapped_string_with_gap_char_given(self): seq = "GTG---GCCATTGTAATGGGCCGC" expected = "V-AIVMGR" self.assertEqual(expected, Seq.translate(seq, gap="-")) self.assertRaises(TypeError, Seq.translate, seq, gap=[]) self.assertRaises(ValueError, Seq.translate, seq, gap="-*") def test_translation_of_gapped_string_no_gap_char_given(self): seq = "GTG---GCCATTGTAATGGGCCGC" self.assertRaises(TranslationError, Seq.translate, seq) def test_translation_to_stop(self): for nucleotide_seq in self.test_seqs: nucleotide_seq = nucleotide_seq[:3 * (len(nucleotide_seq) // 3)] if isinstance(nucleotide_seq, Seq.Seq) and 'X' not in str(nucleotide_seq): short = Seq.translate(nucleotide_seq, to_stop=True) self.assertEqual(str(short), str(Seq.translate(nucleotide_seq).split('*')[0])) seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG" self.assertEqual("VAIVMGRWKGAR", Seq.translate(seq, table=2, to_stop=True)) def test_translation_on_proteins(self): """Test translation shouldn't work on a protein!""" for s in protein_seqs: with self.assertRaises(ValueError): Seq.translate(s) if isinstance(s, Seq.Seq): with self.assertRaises(ValueError): s.translate() def test_translation_of_invalid_codon(self): for codon in ["TA?", "N-N", "AC_", "Ac_"]: with self.assertRaises(TranslationError): Seq.translate(codon) def test_translation_of_glutamine(self): for codon in ['SAR', 'SAG', 'SAA']: self.assertEqual('Z', Seq.translate(codon)) def test_translation_of_asparagine(self): for codon in ['RAY', 'RAT', 'RAC']: self.assertEqual('B', Seq.translate(codon)) def test_translation_of_leucine(self): for codon in ['WTA', 'MTY', 'MTT', 'MTW', 'MTM', 'MTH', 'MTA', 'MTC', 'HTA']: self.assertEqual('J', Seq.translate(codon)) def test_translation_with_bad_table_argument(self): table = dict() with self.assertRaises(ValueError): Seq.translate("GTGGCCATTGTAATGGGCCGC", table=table) def test_translation_with_codon_table_as_table_argument(self): table = standard_dna_table self.assertEqual("VAIVMGR", Seq.translate("GTGGCCATTGTAATGGGCCGC", table=table)) def test_translation_incomplete_codon(self): with warnings.catch_warnings(record=True): Seq.translate("GTGGCCATTGTAATGGGCCG") def test_translation_extra_stop_codon(self): seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAGTAG" with self.assertRaises(TranslationError): Seq.translate(seq, table=2, cds=True) def test_translation_using_cds(self): seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG" self.assertEqual("MAIVMGRWKGAR", Seq.translate(seq, table=2, cds=True)) seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCG" # not multiple of three with self.assertRaises(TranslationError): Seq.translate(seq, table=2, cds=True) seq = "GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA" # no stop codon with self.assertRaises(TranslationError): Seq.translate(seq, table=2, cds=True) seq = "GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG" # no start codon with self.assertRaises(TranslationError): Seq.translate(seq, table=2, cds=True) class TestStopCodons(unittest.TestCase): def setUp(self): self.misc_stops = "TAATAGTGAAGAAGG" def test_stops(self): for nucleotide_seq in [self.misc_stops, Seq.Seq(self.misc_stops), Seq.Seq(self.misc_stops, Alphabet.generic_nucleotide), Seq.Seq(self.misc_stops, Alphabet.DNAAlphabet()), Seq.Seq(self.misc_stops, IUPAC.unambiguous_dna)]: self.assertEqual("***RR", str(Seq.translate(nucleotide_seq))) self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=1))) self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table="SGC0"))) self.assertEqual("**W**", str(Seq.translate(nucleotide_seq, table=2))) self.assertEqual("**WRR", str(Seq.translate(nucleotide_seq, table='Yeast Mitochondrial'))) self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=5))) self.assertEqual("**WSS", str(Seq.translate(nucleotide_seq, table=9))) self.assertEqual("**CRR", str(Seq.translate(nucleotide_seq, table='Euplotid Nuclear'))) self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table=11))) self.assertEqual("***RR", str(Seq.translate(nucleotide_seq, table='Bacterial'))) def test_translation_of_stops(self): self.assertEqual(Seq.translate("TAT"), "Y") self.assertEqual(Seq.translate("TAR"), "*") self.assertEqual(Seq.translate("TAN"), "X") self.assertEqual(Seq.translate("NNN"), "X") self.assertEqual(Seq.translate("TAt"), "Y") self.assertEqual(Seq.translate("TaR"), "*") self.assertEqual(Seq.translate("TaN"), "X") self.assertEqual(Seq.translate("nnN"), "X") self.assertEqual(Seq.translate("tat"), "Y") self.assertEqual(Seq.translate("tar"), "*") self.assertEqual(Seq.translate("tan"), "X") self.assertEqual(Seq.translate("nnn"), "X") if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=2) unittest.main(testRunner=runner)