# Copyright 2012 by Wibowo Arindrarto. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Tests for SearchIO blast-xml indexing.""" import unittest from search_tests_common import CheckRaw, CheckIndex class BlastXmlRawCases(CheckRaw): """Check BLAST XML get_raw method.""" fmt = 'blast-xml' def test_blastxml_2226_multiple_first(self): """Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, first (xml_2226_blastp_001.xml)""" filename = 'Blast/xml_2226_blastp_001.xml' raw = """ 1 Query_1 random_s00 32 20 6406 7 156650 0.041 0.267 0.14 No hits found """ self.check_raw(filename, "random_s00", raw) def test_blastxml_2226_multiple_middle(self): """Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, middle (xml_2226_blastp_001.xml)""" filename = 'Blast/xml_2226_blastp_001.xml' raw = """ 2 Query_2 gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] 102 1 gnl|BL_ORD_ID|1 gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7] 1 100 1 139.428 350 1.99275e-46 1 102 1 100 0 0 69 81 2 102 MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPVSFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN 2 gnl|BL_ORD_ID|2 gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946] 2 105 1 88.9669 219 6.94052e-27 1 101 1 104 0 0 48 69 5 105 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA MKKTIAASFLILLFSVVLAACGTAEQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVDDQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A 3 gnl|BL_ORD_ID|3 gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42] 3 105 1 88.9669 219 8.41012e-27 1 101 1 104 0 0 48 69 5 105 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA MKKTIAASFLILLFSVVLAACGTADQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVDDQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A 4 gnl|BL_ORD_ID|4 gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942] 4 105 1 83.1889 204 1.37847e-24 1 100 1 103 0 0 45 66 5 104 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIER MKKNVASSFLILLFSIILAACGTAEQSKEG-NGSSSSQVQNETAYYVGMADTHTIEVKIDDQPVSFEFTDDFSEILNEFEENDKVNISYLTNDKGQKELTEIEK MKK +A F ILL L+ CG Q +G + S S ++ + YVG+ADTHTIEV +D++PVS + T++ + L++F DKV I+Y ND+GQ L +IE+ 5 gnl|BL_ORD_ID|15 gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys] 15 132 1 15.779 29 7.12269 60 84 80 104 0 0 7 11 0 25 VSLDITEESTSDLDKFNSGDKVTIT VEMGFLHVGQAGLELVTSGDPPTLT V + + L+ SGD T+T 20 6406 38 361344 0.041 0.267 0.14 """ self.check_raw(filename, "gi|16080617|ref|NP_391444.1|", raw) def test_blastxml_2226_multiple_last(self): """Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, last (xml_2226_blastp_001.xml)""" filename = 'Blast/xml_2226_blastp_001.xml' raw = """ 3 Query_3 gi|11464971:4-101 pleckstrin [Mus musculus] 98 1 gnl|BL_ORD_ID|5 gi|11464971|ref|NP_062422.1| pleckstrin [Mus musculus] 5 350 1 205.682 522 2.24956e-69 1 98 4 101 0 0 98 98 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 43.5134 101 2.90061e-09 3 96 246 345 0 0 29 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A 2 gnl|BL_ORD_ID|6 gi|354480464|ref|XP_003502426.1| PREDICTED: pleckstrin-like [Cricetulus griseus] 6 350 1 205.297 521 3.2078e-69 1 98 4 101 0 0 98 98 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 43.8986 102 1.81272e-09 3 96 246 345 0 0 30 50 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDF-GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAIHLRGCVVTSVESNHDGKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G I L+G +TS + GK+ + +I T + ++ QAA +ER W++ I+ A 3 gnl|BL_ORD_ID|7 gi|156616273|ref|NP_002655.2| pleckstrin [Homo sapiens] 7 350 1 204.142 518 1.081e-68 1 98 4 101 0 0 97 97 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 47.3654 111 1.50729e-10 3 96 246 345 0 0 31 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIRAIQMA I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W+R I+ A 4 gnl|BL_ORD_ID|8 gi|297667453|ref|XP_002811995.1| PREDICTED: pleckstrin-like [Pongo abelii] 8 350 1 204.142 518 1.10449e-68 1 98 4 101 0 0 97 97 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 45.4394 106 6.1425e-10 3 96 246 345 0 0 30 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W++ I+ A 5 gnl|BL_ORD_ID|9 gi|350596020|ref|XP_003360649.2| PREDICTED: pleckstrin-like [Sus scrofa] 9 228 1 199.519 506 1.97058e-68 1 98 4 101 0 0 94 96 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK 2 32.3426 72 1.12281e-05 30 96 153 223 0 0 21 32 4 71 IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFV---LKITTTKQQDHFFQAAFLEERDAWVRDIKKA LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA + +Y P G I L+G +TS GK F+ T + +F QAA +ER W++ I+ A 20 6406 37 345626 0.041 0.267 0.14 """ self.check_raw(filename, "gi|11464971:4-101", raw) def test_blastxml_2226_single(self): """Test blast-xml raw string retrieval, BLAST 2.2.26+, single query (xml_2226_blastp_004.xml)""" filename = 'Blast/xml_2226_blastp_004.xml' raw = """ 1 Query_1 gi|11464971:4-101 pleckstrin [Mus musculus] 98 1 gnl|BL_ORD_ID|5 gi|11464971|ref|NP_062422.1| pleckstrin [Mus musculus] 5 350 1 205.682 522 2.24956e-69 1 98 4 101 0 0 98 98 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 43.5134 101 2.90061e-09 3 96 246 345 0 0 29 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A 2 gnl|BL_ORD_ID|6 gi|354480464|ref|XP_003502426.1| PREDICTED: pleckstrin-like [Cricetulus griseus] 6 350 1 205.297 521 3.2078e-69 1 98 4 101 0 0 98 98 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 43.8986 102 1.81272e-09 3 96 246 345 0 0 30 50 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDF-GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAIHLRGCVVTSVESNHDGKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G I L+G +TS + GK+ + +I T + ++ QAA +ER W++ I+ A 3 gnl|BL_ORD_ID|7 gi|156616273|ref|NP_002655.2| pleckstrin [Homo sapiens] 7 350 1 204.142 518 1.081e-68 1 98 4 101 0 0 97 97 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 47.3654 111 1.50729e-10 3 96 246 345 0 0 31 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIRAIQMA I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W+R I+ A 4 gnl|BL_ORD_ID|8 gi|297667453|ref|XP_002811995.1| PREDICTED: pleckstrin-like [Pongo abelii] 8 350 1 204.142 518 1.10449e-68 1 98 4 101 0 0 97 97 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK 2 45.4394 106 6.1425e-10 3 96 246 345 0 0 30 48 6 100 IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIKAIQVA I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W++ I+ A 5 gnl|BL_ORD_ID|9 gi|350596020|ref|XP_003360649.2| PREDICTED: pleckstrin-like [Sus scrofa] 9 228 1 199.519 506 1.97058e-68 1 98 4 101 0 0 94 96 0 98 KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK 2 32.3426 72 1.12281e-05 30 96 153 223 0 0 21 32 4 71 IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFV---LKITTTKQQDHFFQAAFLEERDAWVRDIKKA LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA + +Y P G I L+G +TS GK F+ T + +F QAA +ER W++ I+ A 20 6406 37 345626 0.041 0.267 0.14 """ self.check_raw(filename, "gi|11464971:4-101", raw) class BlastXmlIndexCases(CheckIndex): fmt = 'blast-xml' def test_blastxml_2212L_blastp_001(self): """Test blast-xml indexing, BLAST 2.2.12""" filename = 'Blast/xml_2212L_blastp_001.xml' self.check_index(filename, self.fmt) def test_blastxml_2218_blastp_001(self): """Test blast-xml indexing, BLAST 2.2.18+""" filename = 'Blast/xml_2218_blastp_001.xml' self.check_index(filename, self.fmt) def test_blastxml_2222_blastx_001(self): """Test blast-xml indexing, BLAST 2.2.22+""" filename = 'Blast/xml_2222_blastx_001.xml' self.check_index(filename, self.fmt) def test_blastxml_2226_tblastn_001(self): """Test blast-xml indexing, BLAST 2.2.26+, multiple queries""" filename = 'Blast/xml_2226_tblastn_001.xml' self.check_index(filename, self.fmt) def test_blastxml_2226_tblastn_002(self): """Test blast-xml indexing, BlAST 2.2.26+, single query, no hits""" filename = 'Blast/xml_2226_tblastn_002.xml' self.check_index(filename, self.fmt) def test_blastxml_2226_tblastn_004(self): """Test blast-xml indexing, BLAST 2.2.26+, single query, multiple hits""" filename = 'Blast/xml_2226_tblastn_004.xml' self.check_index(filename, self.fmt) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity = 2) unittest.main(testRunner=runner)