# Copyright 2012 by Wibowo Arindrarto. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Tests for SearchIO HmmerIO hmmer3-tab parser.""" import os import unittest from Bio import BiopythonExperimentalWarning import warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', BiopythonExperimentalWarning) from Bio.SearchIO import parse # test case files are in the Blast directory TEST_DIR = 'Hmmer' FMT = 'hmmer3-tab' def get_file(filename): """Returns the path of a test file.""" return os.path.join(TEST_DIR, filename) class HmmscanCases(unittest.TestCase): def test_31b1_hmmscan_001(self): """Test parsing hmmer3-tab, hmmscan 3.1b1, multiple queries (tab_31b1_hmmscan_001)""" tab_file = get_file('tab_31b1_hmmscan_001.out') qresults = list(parse(tab_file, FMT)) self.assertEqual(4, len(qresults)) # first qresult, first hit, first hsp qresult = qresults[0] self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(1e-22, hit.evalue) self.assertEqual(80.5, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(1.6e-22, hsp.evalue) self.assertEqual(79.8, hsp.bitscore) self.assertEqual(0.3, hsp.bias) # last qresult, last hit, last hsp qresult = qresults[-1] self.assertEqual(5, len(qresult)) self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[-1] self.assertEqual(1, len(hit)) self.assertEqual('DUF521', hit.id) self.assertEqual('PF04412.8', hit.accession) self.assertEqual(0.15, hit.evalue) self.assertEqual(10.5, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.4, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Protein of unknown function (DUF521)', hit.description) hsp = hit.hsps[0] self.assertEqual(0.28, hsp.evalue) self.assertEqual(9.6, hsp.bitscore) self.assertEqual(0.1, hsp.bias) def test_30_hmmscan_001(self): "Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)" tab_file = get_file('tab_30_hmmscan_001.out') qresults = parse(tab_file, FMT) counter = 0 # first qresult qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # second qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|126362951:116-221', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Ig_3', hit.id) self.assertEqual('PF13927.1', hit.accession) self.assertEqual(1.4e-09, hit.evalue) self.assertEqual(38.2, hit.bitscore) self.assertEqual(0.4, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) self.assertEqual(0.3, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Ig_2', hit.id) self.assertEqual('PF13895.1', hit.accession) self.assertEqual(3.5e-05, hit.evalue) self.assertEqual(23.7, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.1, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.3e-05, hsp.evalue) self.assertEqual(23.4, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # third qresult qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|22748937|ref|NP_065801.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Xpo1', hit.id) self.assertEqual('PF08389.7', hit.accession) self.assertEqual(7.8e-34, hit.evalue) self.assertEqual(116.6, hit.bitscore) self.assertEqual(7.8, hit.bias) self.assertEqual(2.8, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Exportin 1-like protein', hit.description) hsp = hit.hsps[0] self.assertEqual(1.1e-33, hsp.evalue) self.assertEqual(116.1, hsp.bitscore) self.assertEqual(3.4, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('IBN_N', hit.id) self.assertEqual('PF03810.14', hit.accession) self.assertEqual(0.0039, hit.evalue) self.assertEqual(16.9, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.7, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Importin-beta N-terminal domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.033, hsp.evalue) self.assertEqual(14.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # last qresult qresult = next(qresults) counter += 1 self.assertEqual(5, len(qresult)) self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id) self.assertEqual('-', qresult.accession) # first hit hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Pou', hit.id) self.assertEqual('PF00157.12', hit.accession) self.assertEqual(7e-37, hit.evalue) self.assertEqual(124.8, hit.bitscore) self.assertEqual(0.5, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Pou domain - N-terminal to homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(1.4e-36, hsp.evalue) self.assertEqual(123.9, hsp.bitscore) self.assertEqual(0.3, hsp.bias) # second hit hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox', hit.id) self.assertEqual('PF00046.24', hit.accession) self.assertEqual(2.1e-18, hit.evalue) self.assertEqual(65.5, hit.bitscore) self.assertEqual(1.1, hit.bias) self.assertEqual(1.5, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Homeobox domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.1e-18, hsp.evalue) self.assertEqual(64.6, hsp.bitscore) self.assertEqual(0.7, hsp.bias) # third hit hit = qresult[2] self.assertEqual(1, len(hit)) self.assertEqual('HTH_31', hit.id) self.assertEqual('PF13560.1', hit.accession) self.assertEqual(0.012, hit.evalue) self.assertEqual(15.6, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.2, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Helix-turn-helix domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.16, hsp.evalue) self.assertEqual(12.0, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fourth hit hit = qresult[3] self.assertEqual(1, len(hit)) self.assertEqual('Homeobox_KN', hit.id) self.assertEqual('PF05920.6', hit.accession) self.assertEqual(0.039, hit.evalue) self.assertEqual(13.5, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(1.6, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Homeobox KN domain', hit.description) hsp = hit.hsps[0] self.assertEqual(0.095, hsp.evalue) self.assertEqual(12.3, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # fifth hit hit = qresult[4] self.assertEqual(1, len(hit)) self.assertEqual('DUF521', hit.id) self.assertEqual('PF04412.8', hit.accession) self.assertEqual(0.14, hit.evalue) self.assertEqual(10.5, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.4, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(0, hit.domain_included_num) self.assertEqual('Protein of unknown function (DUF521)', hit.description) hsp = hit.hsps[0] self.assertEqual(0.26, hsp.evalue) self.assertEqual(9.6, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(4, counter) def test_30_hmmscan_002(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, no hits (tab_30_hmmscan_002)" tab_file = get_file('tab_30_hmmscan_002.out') qresults = parse(tab_file, FMT) self.assertRaises(StopIteration, next, qresults) def test_30_hmmscan_003(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, single hit, single hsp (tab_30_hmmscan_003)" tab_file = get_file('tab_30_hmmscan_003.out') qresults = parse(tab_file, FMT) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual(1, len(qresult)) self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Globin', hit.id) self.assertEqual('PF00042.17', hit.accession) self.assertEqual(6e-21, hit.evalue) self.assertEqual(74.6, hit.bitscore) self.assertEqual(0.3, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Globin', hit.description) hsp = hit.hsps[0] self.assertEqual(9.2e-21, hsp.evalue) self.assertEqual(74.0, hsp.bitscore) self.assertEqual(0.2, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter) def test_30_hmmscan_004(self): "Test parsing hmmer3-tab, hmmscan 3.0, single query, multiple hits (tab_30_hmmscan_004)" tab_file = get_file('tab_30_hmmscan_004.out') qresults = parse(tab_file, FMT) counter = 0 qresult = next(qresults) counter += 1 self.assertEqual(2, len(qresult)) self.assertEqual('gi|126362951:116-221', qresult.id) self.assertEqual('-', qresult.accession) hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('Ig_3', hit.id) self.assertEqual('PF13927.1', hit.accession) self.assertEqual(1.4e-09, hit.evalue) self.assertEqual(38.2, hit.bitscore) self.assertEqual(0.4, hit.bias) self.assertEqual(1.3, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(2.1e-09, hsp.evalue) self.assertEqual(37.6, hsp.bitscore) self.assertEqual(0.3, hsp.bias) hit = qresult[1] self.assertEqual(1, len(hit)) self.assertEqual('Ig_2', hit.id) self.assertEqual('PF13895.1', hit.accession) self.assertEqual(3.5e-05, hit.evalue) self.assertEqual(23.7, hit.bitscore) self.assertEqual(0.1, hit.bias) self.assertEqual(1.1, hit.domain_exp_num) self.assertEqual(1, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(1, hit.env_num) self.assertEqual(1, hit.domain_obs_num) self.assertEqual(1, hit.domain_reported_num) self.assertEqual(1, hit.domain_included_num) self.assertEqual('Immunoglobulin domain', hit.description) hsp = hit.hsps[0] self.assertEqual(4.3e-05, hsp.evalue) self.assertEqual(23.4, hsp.bitscore) self.assertEqual(0.1, hsp.bias) # test if we've properly finished iteration self.assertRaises(StopIteration, next, qresults) self.assertEqual(1, counter) class HmmsearchCases(unittest.TestCase): def test_31b1_hmmsearch_001(self): """Test parsing hmmer3-tab, hmmsearch 3.1b1, multiple queries (tab_31b1_hmmscan_001)""" tab_file = get_file('tab_31b1_hmmsearch_001.out') qresults = list(parse(tab_file, FMT)) self.assertEqual(1, len(qresults)) # first qresult qresult = qresults[0] self.assertEqual(4, len(qresult)) self.assertEqual('Pkinase', qresult.id) self.assertEqual('PF00069.17', qresult.accession) # first hit, first hsp hit = qresult[0] self.assertEqual(1, len(hit)) self.assertEqual('sp|Q9WUT3|KS6A2_MOUSE', hit.id) self.assertEqual('-', hit.accession) self.assertEqual(8.5e-147, hit.evalue) self.assertEqual(492.3, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.1, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(2, hit.domain_included_num) self.assertEqual('Ribosomal protein S6 kinase alpha-2 OS=Mus musculus GN=Rps6ka2 PE=1 SV=1', hit.description) hsp = hit.hsps[0] self.assertEqual(1.2e-72, hsp.evalue) self.assertEqual(249.3, hsp.bitscore) self.assertEqual(0.0, hsp.bias) # last hit, last hsp hit = qresult[-1] self.assertEqual(1, len(hit)) self.assertEqual('sp|P18652|KS6AA_CHICK', hit.id) self.assertEqual('-', hit.accession) self.assertEqual(2.6e-145, hit.evalue) self.assertEqual(487.5, hit.bitscore) self.assertEqual(0.0, hit.bias) self.assertEqual(2.1, hit.domain_exp_num) self.assertEqual(2, hit.region_num) self.assertEqual(0, hit.cluster_num) self.assertEqual(0, hit.overlap_num) self.assertEqual(2, hit.env_num) self.assertEqual(2, hit.domain_obs_num) self.assertEqual(2, hit.domain_reported_num) self.assertEqual(2, hit.domain_included_num) self.assertEqual('Ribosomal protein S6 kinase 2 alpha OS=Gallus gallus GN=RPS6KA PE=2 SV=1', hit.description) hsp = hit.hsps[-1] self.assertEqual(7.6e-72, hsp.evalue) self.assertEqual(246.7, hsp.bitscore) self.assertEqual(0.0, hsp.bias) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity = 2) unittest.main(testRunner=runner)