# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for SearchIO blast-xml indexing."""
import unittest
from search_tests_common import CheckRaw, CheckIndex
class BlastXmlRawCases(CheckRaw):
"""Check BLAST XML get_raw method."""
fmt = 'blast-xml'
def test_blastxml_2226_multiple_first(self):
"""Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, first (xml_2226_blastp_001.xml)"""
filename = 'Blast/xml_2226_blastp_001.xml'
raw = """
1
Query_1
random_s00
32
20
6406
7
156650
0.041
0.267
0.14
No hits found
"""
self.check_raw(filename, "random_s00", raw)
def test_blastxml_2226_multiple_middle(self):
"""Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, middle (xml_2226_blastp_001.xml)"""
filename = 'Blast/xml_2226_blastp_001.xml'
raw = """
2
Query_2
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]
102
1
gnl|BL_ORD_ID|1
gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]
1
100
1
139.428
350
1.99275e-46
1
102
1
100
0
0
69
81
2
102
MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN
MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPVSFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN
MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN
2
gnl|BL_ORD_ID|2
gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]
2
105
1
88.9669
219
6.94052e-27
1
101
1
104
0
0
48
69
5
105
MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA
MKKTIAASFLILLFSVVLAACGTAEQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVDDQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA
MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A
3
gnl|BL_ORD_ID|3
gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]
3
105
1
88.9669
219
8.41012e-27
1
101
1
104
0
0
48
69
5
105
MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA
MKKTIAASFLILLFSVVLAACGTADQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVDDQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA
MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A
4
gnl|BL_ORD_ID|4
gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942]
4
105
1
83.1889
204
1.37847e-24
1
100
1
103
0
0
45
66
5
104
MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIER
MKKNVASSFLILLFSIILAACGTAEQSKEG-NGSSSSQVQNETAYYVGMADTHTIEVKIDDQPVSFEFTDDFSEILNEFEENDKVNISYLTNDKGQKELTEIEK
MKK +A F ILL L+ CG Q +G + S S ++ + YVG+ADTHTIEV +D++PVS + T++ + L++F DKV I+Y ND+GQ L +IE+
5
gnl|BL_ORD_ID|15
gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]
15
132
1
15.779
29
7.12269
60
84
80
104
0
0
7
11
0
25
VSLDITEESTSDLDKFNSGDKVTIT
VEMGFLHVGQAGLELVTSGDPPTLT
V + + L+ SGD T+T
20
6406
38
361344
0.041
0.267
0.14
"""
self.check_raw(filename, "gi|16080617|ref|NP_391444.1|", raw)
def test_blastxml_2226_multiple_last(self):
"""Test blast-xml raw string retrieval, BLAST 2.2.26+, multiple queries, last (xml_2226_blastp_001.xml)"""
filename = 'Blast/xml_2226_blastp_001.xml'
raw = """
3
Query_3
gi|11464971:4-101 pleckstrin [Mus musculus]
98
1
gnl|BL_ORD_ID|5
gi|11464971|ref|NP_062422.1| pleckstrin [Mus musculus]
5
350
1
205.682
522
2.24956e-69
1
98
4
101
0
0
98
98
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
43.5134
101
2.90061e-09
3
96
246
345
0
0
29
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A
2
gnl|BL_ORD_ID|6
gi|354480464|ref|XP_003502426.1| PREDICTED: pleckstrin-like [Cricetulus griseus]
6
350
1
205.297
521
3.2078e-69
1
98
4
101
0
0
98
98
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
43.8986
102
1.81272e-09
3
96
246
345
0
0
30
50
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDF-GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAIHLRGCVVTSVESNHDGKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + GK+ + +I T + ++ QAA +ER W++ I+ A
3
gnl|BL_ORD_ID|7
gi|156616273|ref|NP_002655.2| pleckstrin [Homo sapiens]
7
350
1
204.142
518
1.081e-68
1
98
4
101
0
0
97
97
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
47.3654
111
1.50729e-10
3
96
246
345
0
0
31
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIRAIQMA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W+R I+ A
4
gnl|BL_ORD_ID|8
gi|297667453|ref|XP_002811995.1| PREDICTED: pleckstrin-like [Pongo abelii]
8
350
1
204.142
518
1.10449e-68
1
98
4
101
0
0
97
97
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
45.4394
106
6.1425e-10
3
96
246
345
0
0
30
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W++ I+ A
5
gnl|BL_ORD_ID|9
gi|350596020|ref|XP_003360649.2| PREDICTED: pleckstrin-like [Sus scrofa]
9
228
1
199.519
506
1.97058e-68
1
98
4
101
0
0
94
96
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK
KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK
2
32.3426
72
1.12281e-05
30
96
153
223
0
0
21
32
4
71
IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFV---LKITTTKQQDHFFQAAFLEERDAWVRDIKKA
LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA
+ +Y P G I L+G +TS GK F+ T + +F QAA +ER W++ I+ A
20
6406
37
345626
0.041
0.267
0.14
"""
self.check_raw(filename, "gi|11464971:4-101", raw)
def test_blastxml_2226_single(self):
"""Test blast-xml raw string retrieval, BLAST 2.2.26+, single query (xml_2226_blastp_004.xml)"""
filename = 'Blast/xml_2226_blastp_004.xml'
raw = """
1
Query_1
gi|11464971:4-101 pleckstrin [Mus musculus]
98
1
gnl|BL_ORD_ID|5
gi|11464971|ref|NP_062422.1| pleckstrin [Mus musculus]
5
350
1
205.682
522
2.24956e-69
1
98
4
101
0
0
98
98
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
43.5134
101
2.90061e-09
3
96
246
345
0
0
29
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTS--PCQDFGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAVHLRGCVVTSVESSHDVKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G + L+G +TS D K + +I T + ++ QAA +ER W++ I+ A
2
gnl|BL_ORD_ID|6
gi|354480464|ref|XP_003502426.1| PREDICTED: pleckstrin-like [Cricetulus griseus]
6
350
1
205.297
521
3.2078e-69
1
98
4
101
0
0
98
98
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
43.8986
102
1.81272e-09
3
96
246
345
0
0
30
50
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDF-GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGEDPLGAIHLRGCVVTSVESNHDGKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + GK+ + +I T + ++ QAA +ER W++ I+ A
3
gnl|BL_ORD_ID|7
gi|156616273|ref|NP_002655.2| pleckstrin [Homo sapiens]
7
350
1
204.142
518
1.081e-68
1
98
4
101
0
0
97
97
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
47.3654
111
1.50729e-10
3
96
246
345
0
0
31
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIRAIQMA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W+R I+ A
4
gnl|BL_ORD_ID|8
gi|297667453|ref|XP_002811995.1| PREDICTED: pleckstrin-like [Pongo abelii]
8
350
1
204.142
518
1.10449e-68
1
98
4
101
0
0
97
97
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
2
45.4394
106
6.1425e-10
3
96
246
345
0
0
30
48
6
100
IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMF----VLKITTTKQQDHFFQAAFLEERDAWVRDIKKA
IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGAEDPLGAIHLRGCVVTSVESNSNGRKSEEENLFEIITADEVHYFLQAATPKERTEWIKAIQVA
I++G L+K+G WK +L ED + +Y P G I L+G +TS + R + +I T + +F QAA +ER W++ I+ A
5
gnl|BL_ORD_ID|9
gi|350596020|ref|XP_003360649.2| PREDICTED: pleckstrin-like [Sus scrofa]
9
228
1
199.519
506
1.97058e-68
1
98
4
101
0
0
94
96
0
98
KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK
KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK
KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNSPKGMIPLKGSTLTSPCQDFGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK
2
32.3426
72
1.12281e-05
30
96
153
223
0
0
21
32
4
71
IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFV---LKITTTKQQDHFFQAAFLEERDAWVRDIKKA
LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA
+ +Y P G I L+G +TS GK F+ T + +F QAA +ER W++ I+ A
20
6406
37
345626
0.041
0.267
0.14
"""
self.check_raw(filename, "gi|11464971:4-101", raw)
class BlastXmlIndexCases(CheckIndex):
fmt = 'blast-xml'
def test_blastxml_2212L_blastp_001(self):
"""Test blast-xml indexing, BLAST 2.2.12"""
filename = 'Blast/xml_2212L_blastp_001.xml'
self.check_index(filename, self.fmt)
def test_blastxml_2218_blastp_001(self):
"""Test blast-xml indexing, BLAST 2.2.18+"""
filename = 'Blast/xml_2218_blastp_001.xml'
self.check_index(filename, self.fmt)
def test_blastxml_2222_blastx_001(self):
"""Test blast-xml indexing, BLAST 2.2.22+"""
filename = 'Blast/xml_2222_blastx_001.xml'
self.check_index(filename, self.fmt)
def test_blastxml_2226_tblastn_001(self):
"""Test blast-xml indexing, BLAST 2.2.26+, multiple queries"""
filename = 'Blast/xml_2226_tblastn_001.xml'
self.check_index(filename, self.fmt)
def test_blastxml_2226_tblastn_002(self):
"""Test blast-xml indexing, BlAST 2.2.26+, single query, no hits"""
filename = 'Blast/xml_2226_tblastn_002.xml'
self.check_index(filename, self.fmt)
def test_blastxml_2226_tblastn_004(self):
"""Test blast-xml indexing, BLAST 2.2.26+, single query, multiple hits"""
filename = 'Blast/xml_2226_tblastn_004.xml'
self.check_index(filename, self.fmt)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity = 2)
unittest.main(testRunner=runner)