# Copyright 2012 by Wibowo Arindrarto. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Tests for SearchIO BlastIO plain text parsers.""" import os import unittest from Bio import BiopythonExperimentalWarning import warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', BiopythonExperimentalWarning) from Bio.SearchIO import parse # test case files are in the Blast directory TEST_DIR = 'Blast' FMT = 'blast-text' def get_file(filename): """Returns the path of a test file.""" return os.path.join(TEST_DIR, filename) class BaseBlastCases(unittest.TestCase): def check_common_attrs(self, qresults): # check common attributes for qresult in qresults: for hit in qresult: self.assertEqual(qresult.id, hit.query_id) for hsp in hit: self.assertEqual(hit.id, hsp.hit_id) self.assertEqual(qresult.id, hsp.query_id) class BlastnCases(BaseBlastCases): def test_text_2226_blastn_001(self): """Test parsing blastn output (text_2226_blastn_001.txt)""" blast_file = get_file('text_2226_blastn_001.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) def test_text_2226_blastn_002(self): """Test parsing blastn output (text_2226_blastn_002.txt)""" blast_file = get_file('text_2226_blastn_002.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|356995852:1-490', qresult.id) self.assertEqual('Mus musculus POU domain, class 5, transcriptionfactor 1 (Pou5f1), transcript variant 1, mRNA', qresult.description) self.assertEqual(490, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(8, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NM_013633.3|', hit.id) self.assertEqual('Mus musculus POU domain, class 5, transcription factor 1 (Pou5f1), transcript variant 1, mRNA', hit.description) self.assertEqual(1353, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(490, hsp.aln_span) self.assertEqual(0.0, hsp.evalue) self.assertEqual(905.0, hsp.bitscore) self.assertEqual(490.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(490, hsp.ident_num) self.assertEqual(490, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(490, hsp.query_end) self.assertEqual(490, hsp.hit_end) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.hit.seq)[:40]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XR_141831.1|', hit.id) self.assertEqual('PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_105837.2| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_141464.1| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_141446.1| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA', hit.description) self.assertEqual(570, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(490, hsp.aln_span) self.assertEqual(0.0, hsp.evalue) self.assertEqual(900.0, hsp.bitscore) self.assertEqual(487.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(489, hsp.ident_num) self.assertEqual(490, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(80, hsp.hit_start) self.assertEqual(490, hsp.query_end) self.assertEqual(570, hsp.hit_end) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.hit.seq)[:40]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.hit.seq)[-40:]) def test_text_2226_blastn_003(self): """Test parsing blastn output (text_2226_blastn_003.txt)""" blast_file = get_file('text_2226_blastn_003.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('hg19_dna', qresult.id) self.assertEqual('range=chr1:1207307-1207372 5\'pad=0 3\'pad=0 strand=+repeatMasking=none', qresult.description) self.assertEqual(66, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|XM_003267724.1|', hit.id) self.assertEqual('PREDICTED: Nomascus leucogenys ATG14 autophagy related 14 homolog (S. cerevisiae) (ATG14), mRNA', hit.description) self.assertEqual(4771, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(62, hsp.aln_span) self.assertEqual(3e-24, hsp.evalue) self.assertEqual(115.0, hsp.bitscore) self.assertEqual(62.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(62, hsp.ident_num) self.assertEqual(62, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(4, hsp.query_start) self.assertEqual(2864, hsp.hit_start) self.assertEqual(66, hsp.query_end) self.assertEqual(2926, hsp.hit_end) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.hit.seq)[:40]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCaaaaaaaaaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|NM_001040441.1|', hit.id) self.assertEqual('Homo sapiens zinc finger and BTB domain containing 8A (ZBTB8A), mRNA', hit.description) self.assertEqual(7333, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(62, hsp.aln_span) self.assertEqual(3e-24, hsp.evalue) self.assertEqual(115.0, hsp.bitscore) self.assertEqual(62.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(62, hsp.ident_num) self.assertEqual(62, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(4, hsp.query_start) self.assertEqual(3676, hsp.hit_start) self.assertEqual(66, hsp.query_end) self.assertEqual(3738, hsp.hit_end) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.hit.seq)[:40]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCaaaaaaaaaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA', str(hsp.hit.seq)[-40:]) # first qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(53, hsp.aln_span) self.assertEqual(3e-19, hsp.evalue) self.assertEqual(99.0, hsp.bitscore) self.assertEqual(53.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(53, hsp.ident_num) self.assertEqual(53, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(5, hsp.query_start) self.assertEqual(2823, hsp.hit_start) self.assertEqual(58, hsp.query_end) self.assertEqual(2876, hsp.hit_end) self.assertEqual('CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGT', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGT', str(hsp.hit.seq)[:40]) self.assertEqual('GCCTGGGCAACAAGAGCGAAACTCCGTCTCaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA', str(hsp.hit.seq)[-40:]) def test_text_2226_blastn_004(self): """Test parsing blastn output (text_2226_blastn_004.txt)""" blast_file = get_file('text_2226_blastn_004.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(3, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults[1] self.assertEqual('gi|356995852:1-490', qresult.id) self.assertEqual('Mus musculus POU domain, class 5, transcriptionfactor 1 (Pou5f1), transcript variant 1, mRNA', qresult.description) self.assertEqual(490, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # second qresult, first hit hit = qresult[0] self.assertEqual('gi|356995852|ref|NM_013633.3|', hit.id) self.assertEqual('Mus musculus POU domain, class 5, transcription factor 1 (Pou5f1), transcript variant 1, mRNA', hit.description) self.assertEqual(1353, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(490, hsp.aln_span) self.assertEqual(0.0, hsp.evalue) self.assertEqual(905.0, hsp.bitscore) self.assertEqual(490.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(490, hsp.ident_num) self.assertEqual(490, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(490, hsp.query_end) self.assertEqual(490, hsp.hit_end) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.hit.seq)[:40]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.hit.seq)[-40:]) # second qresult, second hit hit = qresult[1] self.assertEqual('gi|377833530|ref|XR_141831.1|', hit.id) self.assertEqual('PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA', hit.description) self.assertEqual(570, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(490, hsp.aln_span) self.assertEqual(0.0, hsp.evalue) self.assertEqual(900.0, hsp.bitscore) self.assertEqual(487.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(489, hsp.ident_num) self.assertEqual(490, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(80, hsp.hit_start) self.assertEqual(490, hsp.query_end) self.assertEqual(570, hsp.hit_end) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GAGGTGAAACCGTCCCTAGGTGAGCCGTCTTTCCACCAGG', str(hsp.hit.seq)[:40]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AGTCCCAGGACATGAAAGCCCTGCAGAAGGAGCTAGAACA', str(hsp.hit.seq)[-40:]) # test third qresult qresult = qresults[2] self.assertEqual('hg19_dna', qresult.id) self.assertEqual('range=chr1:1207307-1207372 5\'pad=0 3\'pad=0 strand=+repeatMasking=none', qresult.description) self.assertEqual(66, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('blastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # third qresult, first hit hit = qresult[0] self.assertEqual('gi|94721341|ref|NM_001040441.1|', hit.id) self.assertEqual('Homo sapiens zinc finger and BTB domain containing 8A (ZBTB8A), mRNA', hit.description) self.assertEqual(7333, hit.seq_len) self.assertEqual(2, len(hit)) # third qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(62, hsp.aln_span) self.assertEqual(6e-29, hsp.evalue) self.assertEqual(115.0, hsp.bitscore) self.assertEqual(62.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(62, hsp.ident_num) self.assertEqual(62, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(4, hsp.query_start) self.assertEqual(3676, hsp.hit_start) self.assertEqual(66, hsp.query_end) self.assertEqual(3738, hsp.hit_end) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.hit.seq)[:40]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCaaaaaaaaaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA', str(hsp.hit.seq)[-40:]) # third qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(53, hsp.aln_span) self.assertEqual(6e-24, hsp.evalue) self.assertEqual(99.0, hsp.bitscore) self.assertEqual(53.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(53, hsp.ident_num) self.assertEqual(53, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(5, hsp.query_start) self.assertEqual(2823, hsp.hit_start) self.assertEqual(58, hsp.query_end) self.assertEqual(2876, hsp.hit_end) self.assertEqual('CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGT', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('CCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCGT', str(hsp.hit.seq)[:40]) self.assertEqual('GCCTGGGCAACAAGAGCGAAACTCCGTCTCaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GCCTGGGCAACAAGAGCGAAACTCCGTCTCAAAAAAAAAA', str(hsp.hit.seq)[-40:]) # third qresult, second hit hit = qresult[1] self.assertEqual('gi|332237160|ref|XM_003267724.1|', hit.id) self.assertEqual('PREDICTED: Nomascus leucogenys ATG14 autophagy related 14 homolog (S. cerevisiae) (ATG14), mRNA', hit.description) self.assertEqual(4771, hit.seq_len) self.assertEqual(1, len(hit)) # third qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(62, hsp.aln_span) self.assertEqual(6e-29, hsp.evalue) self.assertEqual(115.0, hsp.bitscore) self.assertEqual(62.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(62, hsp.ident_num) self.assertEqual(62, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(4, hsp.query_start) self.assertEqual(2864, hsp.hit_start) self.assertEqual(66, hsp.query_end) self.assertEqual(2926, hsp.hit_end) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.query.seq)[:40]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GCCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCCG', str(hsp.hit.seq)[:40]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCaaaaaaaaaaaaaaaaaa', str(hsp.query.seq)[-40:]) self.assertEqual('||||||||||||||||||||||||||||||||||||||||', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('AACAAGAGCGAAACTCCGTCTCAAAAAAAAAAAAAAAAAA', str(hsp.hit.seq)[-40:]) class BlastpCases(BaseBlastCases): def test_text_2226_blastp_001(self): """Test parsing blastp output (text_2226_blastp_001.txt)""" blast_file = get_file('text_2226_blastp_001.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(32, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) def test_text_2226_blastp_002(self): """Test parsing blastp output (text_2226_blastp_002.txt)""" blast_file = get_file('text_2226_blastp_002.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('membrane bound lipoprotein [Bacillussubtilis subsp. subtilis str. 168]', qresult.description) self.assertEqual(102, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NP_391444.1|', hit.id) self.assertEqual('membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] ref|ZP_03593363.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168] ref|ZP_03597648.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. NCIB 3610] ref|ZP_03602051.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. JH642] ref|ZP_03606337.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. SMY] ref|YP_004205398.1| unnamed protein product [Bacillus subtilis BSn5]', hit.description) self.assertEqual(102, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(102, hsp.aln_span) self.assertEqual(1e-66, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(521.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(102, hsp.ident_num) self.assertEqual(102, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(102, hsp.query_end) self.assertEqual(102, hsp.hit_end) self.assertEqual('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEG', str(hsp.query.seq)[:40]) self.assertEqual('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEG', str(hsp.hit.seq)[:40]) self.assertEqual('DITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN', str(hsp.query.seq)[-40:]) self.assertEqual('DITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('DITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|YP_003922001.1|', hit.id) self.assertEqual('membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]', hit.description) self.assertEqual(100, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(102, hsp.aln_span) self.assertEqual(1e-40, hsp.evalue) self.assertEqual(139.0, hsp.bitscore) self.assertEqual(350.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(2, hsp.gap_num) self.assertEqual(69, hsp.ident_num) self.assertEqual(81, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(102, hsp.query_end) self.assertEqual(100, hsp.hit_end) self.assertEqual('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEG', str(hsp.query.seq)[:40]) self.assertEqual('MKK LFFILLL+GCGV ++KSQGED + TKEG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEG', str(hsp.hit.seq)[:40]) self.assertEqual('DITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN', str(hsp.query.seq)[-40:]) self.assertEqual('DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('DITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN', str(hsp.hit.seq)[-40:]) def test_text_2226_blastp_003(self): """Test parsing blastp output (text_2226_blastp_003.txt)""" blast_file = get_file('text_2226_blastp_003.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('pleckstrin [Mus musculus]', qresult.description) self.assertEqual(98, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NP_062422.1|', hit.id) self.assertEqual('pleckstrin [Mus musculus]', hit.description) self.assertEqual(350, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(1e-63, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(522.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(98, hsp.ident_num) self.assertEqual(98, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(3, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(101, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # first qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(0.002, hsp.evalue) self.assertEqual(43.5, hsp.bitscore) self.assertEqual(101.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(29, hsp.ident_num) self.assertEqual(48, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(245, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(345, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('FGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual(' K + +I T + ++ QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XP_003502426.1|', hit.id) self.assertEqual('PREDICTED: pleckstrin-like [Cricetulus griseus]', hit.description) self.assertEqual(350, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(2e-63, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(521.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(98, hsp.ident_num) self.assertEqual(98, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(3, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(101, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # first qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(0.001, hsp.evalue) self.assertEqual(43.9, hsp.bitscore) self.assertEqual(102.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(30, hsp.ident_num) self.assertEqual(50, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(245, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(345, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual('GK+ + +I T + ++ QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) def test_text_2226_blastp_004(self): """Test parsing blastp output (text_2226_blastp_004.txt)""" blast_file = get_file('text_2226_blastp_004.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(3, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(32, qresult.seq_len) self.assertEqual('minirefseq_prot', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults[1] self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('membrane bound lipoprotein [Bacillussubtilis subsp. subtilis str. 168]', qresult.description) self.assertEqual(102, qresult.seq_len) self.assertEqual('minirefseq_prot', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # second qresult, first hit hit = qresult[0] self.assertEqual('gi|308175296|ref|YP_003922001.1|', hit.id) self.assertEqual('membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]', hit.description) self.assertEqual(100, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(102, hsp.aln_span) self.assertEqual(2e-46, hsp.evalue) self.assertEqual(139.0, hsp.bitscore) self.assertEqual(350.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(2, hsp.gap_num) self.assertEqual(69, hsp.ident_num) self.assertEqual(81, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(102, hsp.query_end) self.assertEqual(100, hsp.hit_end) self.assertEqual('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEG', str(hsp.query.seq)[:40]) self.assertEqual('MKK LFFILLL+GCGV ++KSQGED + TKEG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEG', str(hsp.hit.seq)[:40]) self.assertEqual('DITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN', str(hsp.query.seq)[-40:]) self.assertEqual('DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('DITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN', str(hsp.hit.seq)[-40:]) # second qresult, second hit hit = qresult[1] self.assertEqual('gi|375363999|ref|YP_005132038.1|', hit.id) self.assertEqual('lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]', hit.description) self.assertEqual(105, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(105, hsp.aln_span) self.assertEqual(7e-27, hsp.evalue) self.assertEqual(89.0, hsp.bitscore) self.assertEqual(219.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(5, hsp.gap_num) self.assertEqual(48, hsp.ident_num) self.assertEqual(69, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(101, hsp.query_end) self.assertEqual(104, hsp.hit_end) self.assertEqual('MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIE', str(hsp.query.seq)[:40]) self.assertEqual('MKK IA F ILL L+ CG Q +G S ++ +', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MKKTIAASFLILLFSVVLAACGTAEQSKKGSG-SSENQAQ', str(hsp.hit.seq)[:40]) self.assertEqual('LDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA', str(hsp.query.seq)[-40:]) self.assertEqual(' + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA', str(hsp.hit.seq)[-40:]) # test third qresult qresult = qresults[2] self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('pleckstrin [Mus musculus]', qresult.description) self.assertEqual(98, qresult.seq_len) self.assertEqual('minirefseq_prot', qresult.target) self.assertEqual('blastp', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # third qresult, first hit hit = qresult[0] self.assertEqual('gi|11464971|ref|NP_062422.1|', hit.id) self.assertEqual('pleckstrin [Mus musculus]', hit.description) self.assertEqual(350, hit.seq_len) self.assertEqual(2, len(hit)) # third qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(2e-69, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(522.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(98, hsp.ident_num) self.assertEqual(98, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(3, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(101, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # third qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(3e-09, hsp.evalue) self.assertEqual(43.5, hsp.bitscore) self.assertEqual(101.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(29, hsp.ident_num) self.assertEqual(48, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(245, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(345, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('FGK--RMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual(' K + +I T + ++ QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VKKSDEENLFEIITADEVHYYLQAATSKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) # third qresult, second hit hit = qresult[1] self.assertEqual('gi|354480464|ref|XP_003502426.1|', hit.id) self.assertEqual('PREDICTED: pleckstrin-like [Cricetulus griseus]', hit.description) self.assertEqual(350, hit.seq_len) self.assertEqual(2, len(hit)) # third qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(3e-69, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(521.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(98, hsp.ident_num) self.assertEqual(98, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(3, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(101, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # third qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(2e-09, hsp.evalue) self.assertEqual(43.9, hsp.bitscore) self.assertEqual(102.0, hsp.bitscore_raw) self.assertEqual(0, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(30, hsp.ident_num) self.assertEqual(50, hsp.pos_num) self.assertEqual(0, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(245, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(345, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual('GK+ + +I T + ++ QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) class BlastxCases(BaseBlastCases): def test_text_2226_blastx_001(self): """Test parsing blastx output (text_2226_blastx_001.txt)""" blast_file = get_file('text_2226_blastx_001.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) def test_text_2226_blastx_002(self): """Test parsing blastx output (text_2226_blastx_002.txt)""" blast_file = get_file('text_2226_blastx_002.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|356995852:1-490', qresult.id) self.assertEqual('Mus musculus POU domain, class 5, transcriptionfactor 1 (Pou5f1), transcript variant 1, mRNA', qresult.description) self.assertEqual(490, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NP_038661.2|', hit.id) self.assertEqual('POU domain, class 5, transcription factor 1 isoform 1 [Mus musculus]', hit.description) self.assertEqual(352, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(140, hsp.aln_span) self.assertEqual(4e-57, hsp.evalue) self.assertEqual(192.0, hsp.bitscore) self.assertEqual(487.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(140, hsp.ident_num) self.assertEqual(140, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(68, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(488, hsp.query_end) self.assertEqual(140, hsp.hit_end) self.assertEqual('MAGHLAsdfafspppgggdgsagLEPGWVDPRTWLSFQgp', str(hsp.query.seq)[:40]) self.assertEqual('MAGHLASDFAFSPPPGGGDGSAGLEPGWVDPRTWLSFQGP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MAGHLASDFAFSPPPGGGDGSAGLEPGWVDPRTWLSFQGP', str(hsp.hit.seq)[:40]) self.assertEqual('NSEGTSSEPCADRPNAVKLEKVEPTPEESQDMKALQKELE', str(hsp.query.seq)[-40:]) self.assertEqual('NSEGTSSEPCADRPNAVKLEKVEPTPEESQDMKALQKELE', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('NSEGTSSEPCADRPNAVKLEKVEPTPEESQDMKALQKELE', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|NP_001009178.1|', hit.id) self.assertEqual('POU class 5 homeobox 1 [Rattus norvegicus]', hit.description) self.assertEqual(352, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(140, hsp.aln_span) self.assertEqual(3e-52, hsp.evalue) self.assertEqual(179.0, hsp.bitscore) self.assertEqual(454.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(133, hsp.ident_num) self.assertEqual(135, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(68, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(488, hsp.query_end) self.assertEqual(140, hsp.hit_end) self.assertEqual('MAGHLAsdfafspppgggdgsagLEPGWVDPRTWLSFQgp', str(hsp.query.seq)[:40]) self.assertEqual('MAGHLASDFAFSPPPGGGDGSAGLEPGWVDPRTWLSFQGP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('MAGHLASDFAFSPPPGGGDGSAGLEPGWVDPRTWLSFQGP', str(hsp.hit.seq)[:40]) self.assertEqual('NSEGTSSEPCADRPNAVKLEKVEPTPEESQDMKALQKELE', str(hsp.query.seq)[-40:]) self.assertEqual('NSEG SS PC RP+AVKLEKVEP+PEESQDMKALQKELE', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('NSEGASSGPCTARPSAVKLEKVEPSPEESQDMKALQKELE', str(hsp.hit.seq)[-40:]) def test_text_2226_blastx_003(self): """Test parsing blastx output (text_2226_blastx_003.txt)""" blast_file = get_file('text_2226_blastx_003.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('hg19_dna', qresult.id) self.assertEqual('range=chr1:1207057-1207541 5\'pad=0 3\'pad=0 strand=+repeatMasking=none', qresult.description) self.assertEqual(485, qresult.seq_len) self.assertEqual('NCBI Protein Reference Sequences', qresult.target) self.assertEqual('blastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|XP_003278367.1|', hit.id) self.assertEqual('PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]', hit.description) self.assertEqual(132, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(95, hsp.aln_span) self.assertEqual(2e-32, hsp.evalue) self.assertEqual(121.0, hsp.bitscore) self.assertEqual(304.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(69, hsp.ident_num) self.assertEqual(74, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(15, hsp.query_start) self.assertEqual(24, hsp.hit_start) self.assertEqual(300, hsp.query_end) self.assertEqual(119, hsp.hit_end) self.assertEqual('LRRSFALVAQAGVQWLDLGppqpppPGFK*FSCLSHPSSW', str(hsp.query.seq)[:40]) self.assertEqual('LRRSFALVAQ VQW +LG PQPPPPGFK FSCLS SSW', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LRRSFALVAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSW', str(hsp.hit.seq)[:40]) self.assertEqual('VETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP', str(hsp.query.seq)[-40:]) self.assertEqual('VE GF HVGQAGLE SG+ P SQS GI GVSH AQP', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VEMGFLHVGQAGLELVTSGDPPTLTSQSAGIIGVSHCAQP', str(hsp.hit.seq)[-40:]) # first qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(72, hsp.aln_span) self.assertEqual(2e-06, hsp.evalue) self.assertEqual(51.6, hsp.bitscore) self.assertEqual(122.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(5, hsp.gap_num) self.assertEqual(34, hsp.ident_num) self.assertEqual(41, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(243, hsp.query_start) self.assertEqual(31, hsp.hit_start) self.assertEqual(459, hsp.query_end) self.assertEqual(98, hsp.hit_end) self.assertEqual('VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPP', str(hsp.query.seq)[:40]) self.assertEqual('V RVQ ++L S QPP P FK FS LSL SSW+ R PP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('VAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPP', str(hsp.hit.seq)[:40]) self.assertEqual('WDCRCPPPHPANffffffffFLRRSFALVAQAGVQWLDLG', str(hsp.query.seq)[-40:]) self.assertEqual('W+ R PPH AN F F + F V QAG++ + G', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('WEYRHVPPHLAN-----FLFLVEMGFLHVGQAGLELVTSG', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|NP_001243358.1|', hit.id) self.assertEqual('PDZ and LIM domain protein 5 isoform i [Homo sapiens]', hit.description) self.assertEqual(136, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(88, hsp.aln_span) self.assertEqual(1e-29, hsp.evalue) self.assertEqual(114.0, hsp.bitscore) self.assertEqual(286.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(63, hsp.ident_num) self.assertEqual(69, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(15, hsp.query_start) self.assertEqual(29, hsp.hit_start) self.assertEqual(279, hsp.query_end) self.assertEqual(117, hsp.hit_end) self.assertEqual('VAQAGVQWLDLGppqpppPGFK*FSCLSHPSSWDYRHMPP', str(hsp.query.seq)[:40]) self.assertEqual('++ AGVQW +LG PQPP P FK FSCLS PSSWDYRH+PP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('ISSAGVQWRNLGSPQPPSPEFKRFSCLSLPSSWDYRHVPP', str(hsp.hit.seq)[:40]) self.assertEqual('VETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP', str(hsp.query.seq)[-40:]) self.assertEqual('VET F +VGQAGLE P SG+LP ASQS ITGVSH A P', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VETKFPYVGQAGLELPTSGDLPTSASQSAKITGVSHRAWP', str(hsp.hit.seq)[-40:]) # first qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(69, hsp.aln_span) self.assertEqual(1e-06, hsp.evalue) self.assertEqual(52.4, hsp.bitscore) self.assertEqual(124.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(5, hsp.gap_num) self.assertEqual(33, hsp.ident_num) self.assertEqual(41, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(258, hsp.query_start) self.assertEqual(27, hsp.hit_start) self.assertEqual(465, hsp.query_end) self.assertEqual(91, hsp.hit_end) self.assertEqual('VSVGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCP', str(hsp.query.seq)[:40]) self.assertEqual('+++ A VQ +L S QPP+PEFK FS LSL SSWD R ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LTISSAGVQWRNLGSPQPPSPEFKRFSCLSLPSSWDYRHV', str(hsp.hit.seq)[:40]) self.assertEqual('SLQSSWDCRCPPPHPANffffffffFLRRSFALVAQAGVQ', str(hsp.query.seq)[-40:]) self.assertEqual('SL SSWD R PP AN F F + F V QAG++', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('SLPSSWDYRHVPPRLAN-----FVFLVETKFPYVGQAGLE', str(hsp.hit.seq)[-40:]) def test_text_2226_blastx_004(self): """Test parsing blastx output (text_2226_blastx_004.txt)""" blast_file = get_file('text_2226_blastx_004.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(2, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('minirefseq_prot', qresult.target) self.assertEqual('blastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults[1] self.assertEqual('hg19_dna', qresult.id) self.assertEqual('range=chr1:1207057-1207541 5\'pad=0 3\'pad=0 strand=+repeatMasking=none', qresult.description) self.assertEqual(485, qresult.seq_len) self.assertEqual('minirefseq_prot', qresult.target) self.assertEqual('blastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # second qresult, first hit hit = qresult[0] self.assertEqual('gi|332258565|ref|XP_003278367.1|', hit.id) self.assertEqual('PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]', hit.description) self.assertEqual(132, hit.seq_len) self.assertEqual(2, len(hit)) # second qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(95, hsp.aln_span) self.assertEqual(3e-38, hsp.evalue) self.assertEqual(121.0, hsp.bitscore) self.assertEqual(304.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(69, hsp.ident_num) self.assertEqual(74, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(15, hsp.query_start) self.assertEqual(24, hsp.hit_start) self.assertEqual(300, hsp.query_end) self.assertEqual(119, hsp.hit_end) self.assertEqual('LRRSFALVAQAGVQWLDLGppqpppPGFK*FSCLSHPSSW', str(hsp.query.seq)[:40]) self.assertEqual('LRRSFALVAQ VQW +LG PQPPPPGFK FSCLS SSW', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LRRSFALVAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSW', str(hsp.hit.seq)[:40]) self.assertEqual('VETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP', str(hsp.query.seq)[-40:]) self.assertEqual('VE GF HVGQAGLE SG+ P SQS GI GVSH AQP', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VEMGFLHVGQAGLELVTSGDPPTLTSQSAGIIGVSHCAQP', str(hsp.hit.seq)[-40:]) # second qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(72, hsp.aln_span) self.assertEqual(3e-12, hsp.evalue) self.assertEqual(51.6, hsp.bitscore) self.assertEqual(122.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(5, hsp.gap_num) self.assertEqual(34, hsp.ident_num) self.assertEqual(41, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(243, hsp.query_start) self.assertEqual(31, hsp.hit_start) self.assertEqual(459, hsp.query_end) self.assertEqual(98, hsp.hit_end) self.assertEqual('VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPP', str(hsp.query.seq)[:40]) self.assertEqual('V RVQ ++L S QPP P FK FS LSL SSW+ R PP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('VAQTRVQWYNLGSPQPPPPGFKRFSCLSLLSSWEYRHVPP', str(hsp.hit.seq)[:40]) self.assertEqual('WDCRCPPPHPANffffffffFLRRSFALVAQAGVQWLDLG', str(hsp.query.seq)[-40:]) self.assertEqual('W+ R PPH AN F F + F V QAG++ + G', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('WEYRHVPPHLAN-----FLFLVEMGFLHVGQAGLELVTSG', str(hsp.hit.seq)[-40:]) # second qresult, second hit hit = qresult[1] self.assertEqual('gi|374093214|ref|NP_001243358.1|', hit.id) self.assertEqual('PDZ and LIM domain protein 5 isoform i [Homo sapiens]', hit.description) self.assertEqual(136, hit.seq_len) self.assertEqual(2, len(hit)) # second qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(88, hsp.aln_span) self.assertEqual(2e-35, hsp.evalue) self.assertEqual(114.0, hsp.bitscore) self.assertEqual(286.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(63, hsp.ident_num) self.assertEqual(69, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(15, hsp.query_start) self.assertEqual(29, hsp.hit_start) self.assertEqual(279, hsp.query_end) self.assertEqual(117, hsp.hit_end) self.assertEqual('VAQAGVQWLDLGppqpppPGFK*FSCLSHPSSWDYRHMPP', str(hsp.query.seq)[:40]) self.assertEqual('++ AGVQW +LG PQPP P FK FSCLS PSSWDYRH+PP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('ISSAGVQWRNLGSPQPPSPEFKRFSCLSLPSSWDYRHVPP', str(hsp.hit.seq)[:40]) self.assertEqual('VETGFYHVGQAGLEPPISGNLPAWASQSVGITGVSHHAQP', str(hsp.query.seq)[-40:]) self.assertEqual('VET F +VGQAGLE P SG+LP ASQS ITGVSH A P', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VETKFPYVGQAGLELPTSGDLPTSASQSAKITGVSHRAWP', str(hsp.hit.seq)[-40:]) # second qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(69, hsp.aln_span) self.assertEqual(2e-12, hsp.evalue) self.assertEqual(52.4, hsp.bitscore) self.assertEqual(124.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(5, hsp.gap_num) self.assertEqual(33, hsp.ident_num) self.assertEqual(41, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(258, hsp.query_start) self.assertEqual(27, hsp.hit_start) self.assertEqual(465, hsp.query_end) self.assertEqual(91, hsp.hit_end) self.assertEqual('VSVGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCP', str(hsp.query.seq)[:40]) self.assertEqual('+++ A VQ +L S QPP+PEFK FS LSL SSWD R ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LTISSAGVQWRNLGSPQPPSPEFKRFSCLSLPSSWDYRHV', str(hsp.hit.seq)[:40]) self.assertEqual('SLQSSWDCRCPPPHPANffffffffFLRRSFALVAQAGVQ', str(hsp.query.seq)[-40:]) self.assertEqual('SL SSWD R PP AN F F + F V QAG++', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('SLPSSWDYRHVPPRLAN-----FVFLVETKFPYVGQAGLE', str(hsp.hit.seq)[-40:]) class TblastnCases(BaseBlastCases): def test_text_2226_tblastn_001(self): """Test parsing tblastn output (text_2226_tblastn_001.txt)""" blast_file = get_file('text_2226_tblastn_001.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(32, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) def test_text_2226_tblastn_002(self): """Test parsing tblastn output (text_2226_tblastn_002.txt)""" blast_file = get_file('text_2226_tblastn_002.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('membrane bound lipoprotein [Bacillussubtilis subsp. subtilis str. 168]', qresult.description) self.assertEqual(102, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(4, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|XM_001425911.1|', hit.id) self.assertEqual('Paramecium tetraurelia hypothetical protein (GSPATT00004923001) partial mRNA', hit.description) self.assertEqual(4632, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(43, hsp.aln_span) self.assertEqual(0.74, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual(78.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(15, hsp.ident_num) self.assertEqual(26, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(30, hsp.query_start) self.assertEqual(1743, hsp.hit_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1872, hsp.hit_end) self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTS', str(hsp.query.seq)[:40]) self.assertEqual('P + TK+GT +GL HTI + + +SL++ E++ ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*K', str(hsp.hit.seq)[:40]) self.assertEqual('NIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq)[-40:]) self.assertEqual(' TK+GT +GL HTI + + +SL++ E++ D+D', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('ATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XM_003382561.1|', hit.id) self.assertEqual('PREDICTED: Amphimedon queenslandica CWF19-like protein 1-like (LOC100635130), mRNA', hit.description) self.assertEqual(1811, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(74, hsp.aln_span) self.assertEqual(6.4, hsp.evalue) self.assertEqual(32.0, hsp.bitscore) self.assertEqual(71.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(11, hsp.gap_num) self.assertEqual(19, hsp.ident_num) self.assertEqual(36, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(28, hsp.query_start) self.assertEqual(1105, hsp.hit_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1318, hsp.hit_end) self.assertEqual('VSPDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEES', str(hsp.query.seq)[:40]) self.assertEqual('+ DS + +G GL D H + + + + P S+D +E ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IGNDSYLALSKG---GLVDEHVLILPIGHYPSSIDAPQEV', str(hsp.hit.seq)[:40]) self.assertEqual('DITEESTSDLDK--------FNSGDKVTITYEKNDEGQLL', str(hsp.query.seq)[-40:]) self.assertEqual('D +E ++DK F+S ++ + +E+N Q L', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('DAPQEVIEEIDKFKVALRKYFSSKNQTCVMFERNFRSQHL', str(hsp.hit.seq)[-40:]) def test_text_2226_tblastn_003(self): """Test parsing tblastn output (text_2226_tblastn_003.txt)""" blast_file = get_file('text_2226_tblastn_003.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('pleckstrin [Mus musculus]', qresult.description) self.assertEqual(98, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|XM_003502378.1|', hit.id) self.assertEqual('PREDICTED: Cricetulus griseus pleckstrin-like (LOC100773128), mRNA', hit.description) self.assertEqual(1119, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(1e-63, hsp.evalue) self.assertEqual(205.0, hsp.bitscore) self.assertEqual(521.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(98, hsp.ident_num) self.assertEqual(98, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(75, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(369, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # first qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(0.0005, hsp.evalue) self.assertEqual(43.9, hsp.bitscore) self.assertEqual(102.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(30, hsp.ident_num) self.assertEqual(50, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(801, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(1101, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('GKRM---FVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual('GK+ + +I T + ++ QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GKKSDDENLFEIITADEVHYYLQAAAPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XM_003360601.2|', hit.id) self.assertEqual('PREDICTED: Sus scrofa pleckstrin-like (LOC100626968), mRNA', hit.description) self.assertEqual(772, hit.seq_len) self.assertEqual(2, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(1e-62, hsp.evalue) self.assertEqual(199.0, hsp.bitscore) self.assertEqual(506.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(94, hsp.ident_num) self.assertEqual(96, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(94, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(388, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # first qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(71, hsp.aln_span) self.assertEqual(2.8, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) self.assertEqual(73.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(4, hsp.gap_num) self.assertEqual(21, hsp.ident_num) self.assertEqual(33, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(29, hsp.query_start) self.assertEqual(541, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(754, hsp.hit_end) self.assertEqual('IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK-', str(hsp.query.seq)[:40]) self.assertEqual('+ +Y P G I L+G +TS GK F+ + ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWER', str(hsp.hit.seq)[:40]) self.assertEqual('GKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual('GK F+ + T + +F QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) def test_text_2226_tblastn_004(self): """Test parsing tblastn output (text_2226_tblastn_004.txt)""" blast_file = get_file('text_2226_tblastn_004.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(3, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(32, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults[1] self.assertEqual('gi|16080617|ref|NP_391444.1|', qresult.id) self.assertEqual('membrane bound lipoprotein [Bacillussubtilis subsp. subtilis str. 168]', qresult.description) self.assertEqual(102, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(3, len(qresult)) # second qresult, first hit hit = qresult[0] self.assertEqual('gi|145479850|ref|XM_001425911.1|', hit.id) self.assertEqual('Paramecium tetraurelia hypothetical protein (GSPATT00004923001) partial mRNA', hit.description) self.assertEqual(4632, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(43, hsp.aln_span) self.assertEqual(1e-05, hsp.evalue) self.assertEqual(34.7, hsp.bitscore) self.assertEqual(78.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(15, hsp.ident_num) self.assertEqual(26, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(30, hsp.query_start) self.assertEqual(1743, hsp.hit_start) self.assertEqual(73, hsp.query_end) self.assertEqual(1872, hsp.hit_end) self.assertEqual('PDSNIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTS', str(hsp.query.seq)[:40]) self.assertEqual('P + TK+GT +GL HTI + + +SL++ E++ ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('PKTATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*K', str(hsp.hit.seq)[:40]) self.assertEqual('NIETKEGTYVGLADTHTIEVTVDNEPVSLDITEESTSDLD', str(hsp.query.seq)[-40:]) self.assertEqual(' TK+GT +GL HTI + + +SL++ E++ D+D', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('ATGTKKGTIIGLLSIHTILFILTSHALSLEVKEQT*KDID', str(hsp.hit.seq)[-40:]) # second qresult, second hit hit = qresult[1] self.assertEqual('gi|72012412|ref|XM_777959.1|', hit.id) self.assertEqual('PREDICTED: Strongylocentrotus purpuratus hypothetical LOC577746 (LOC577746), mRNA', hit.description) self.assertEqual(1593, hit.seq_len) self.assertEqual(1, len(hit)) # second qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(59, hsp.aln_span) self.assertEqual(0.0001, hsp.evalue) self.assertEqual(31.6, hsp.bitscore) self.assertEqual(70.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(8, hsp.gap_num) self.assertEqual(20, hsp.ident_num) self.assertEqual(29, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(43, hsp.query_start) self.assertEqual(1056, hsp.hit_start) self.assertEqual(94, hsp.query_end) self.assertEqual(1233, hsp.hit_end) self.assertEqual('GLADTHTIEVTVDNEPVSLDITEESTSDLDKFNSG-----', str(hsp.query.seq)[:40]) self.assertEqual('GL HT+ + V + LD+TEE ++LD+F S ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('GLVPDHTLILPVGHYQSMLDLTEEVQTELDQFKSALRKYY', str(hsp.hit.seq)[:40]) self.assertEqual('DITEESTSDLDKFNSG--------DKVTITYEKNDEGQLL', str(hsp.query.seq)[-40:]) self.assertEqual('D+TEE ++LD+F S K + YE+N Q L', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('DLTEEVQTELDQFKSALRKYYLSKGKTCVIYERNFRTQHL', str(hsp.hit.seq)[-40:]) # test third qresult qresult = qresults[2] self.assertEqual('gi|11464971:4-101', qresult.id) self.assertEqual('pleckstrin [Mus musculus]', qresult.description) self.assertEqual(98, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('tblastn', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # third qresult, first hit hit = qresult[0] self.assertEqual('gi|350596019|ref|XM_003360601.2|', hit.id) self.assertEqual('PREDICTED: Sus scrofa pleckstrin-like (LOC100626968), mRNA', hit.description) self.assertEqual(772, hit.seq_len) self.assertEqual(2, len(hit)) # third qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(199.0, hsp.bitscore) self.assertEqual(506.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(94, hsp.ident_num) self.assertEqual(96, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(94, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(388, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVKKGS+FNTWKPMWV+LLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKKGSMFNTWKPMWVILLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFV KITTTKQQDHFFQAAFLEERD WVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVFKITTTKQQDHFFQAAFLEERDGWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # third qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(71, hsp.aln_span) self.assertEqual(4e-05, hsp.evalue) self.assertEqual(32.7, hsp.bitscore) self.assertEqual(73.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(4, hsp.gap_num) self.assertEqual(21, hsp.ident_num) self.assertEqual(33, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(29, hsp.query_start) self.assertEqual(541, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(754, hsp.hit_end) self.assertEqual('IEFYKKKSDNSPKGMIPLKGSTLTS-PCQDFGKRMFVLK-', str(hsp.query.seq)[:40]) self.assertEqual('+ +Y P G I L+G +TS GK F+ + ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LHYYDPAGGEDPLGAIHLRGCVVTSVESNTDGKNGFLWER', str(hsp.hit.seq)[:40]) self.assertEqual('GKRMFVLK---ITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual('GK F+ + T + +F QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('GKNGFLWERAXXITADEVHYFLQAANPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) # third qresult, second hit hit = qresult[1] self.assertEqual('gi|301779869|ref|XM_002925302.1|', hit.id) self.assertEqual('PREDICTED: Ailuropoda melanoleuca pleckstrin-like (LOC100466932), mRNA', hit.description) self.assertEqual(1144, hit.seq_len) self.assertEqual(2, len(hit)) # third qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(98, hsp.aln_span) self.assertEqual(2e-67, hsp.evalue) self.assertEqual(202.0, hsp.bitscore) self.assertEqual(515.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(96, hsp.ident_num) self.assertEqual(97, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(77, hsp.hit_start) self.assertEqual(98, hsp.query_end) self.assertEqual(371, hsp.hit_end) self.assertEqual('KRIREGYLVKKGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('KRIREGYLVK+GSVFNTWKPMWVVLLEDGIEFYKKKSDNS', hsp.aln_annotation['similarity'][:40]) self.assertEqual('KRIREGYLVKRGSVFNTWKPMWVVLLEDGIEFYKKKSDNS', str(hsp.hit.seq)[:40]) self.assertEqual('FGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.query.seq)[-40:]) self.assertEqual('FGKRMFV KITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('FGKRMFVFKITTTKQQDHFFQAAFLEERDAWVRDIKKAIK', str(hsp.hit.seq)[-40:]) # third qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(100, hsp.aln_span) self.assertEqual(3e-09, hsp.evalue) self.assertEqual(45.1, hsp.bitscore) self.assertEqual(105.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(0, hsp.hit_frame) self.assertEqual(6, hsp.gap_num) self.assertEqual(30, hsp.ident_num) self.assertEqual(48, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(0, hsp.hit_strand) self.assertEqual(2, hsp.query_start) self.assertEqual(803, hsp.hit_start) self.assertEqual(96, hsp.query_end) self.assertEqual(1103, hsp.hit_end) self.assertEqual('IREGYLVKKGSVFNTWKPMWVVLLEDG--IEFYKKKSDNS', str(hsp.query.seq)[:40]) self.assertEqual('I++G L+K+G WK +L ED + +Y ', hsp.aln_annotation['similarity'][:40]) self.assertEqual('IKQGCLLKQGHRRKNWKVRKFILREDPAYLHYYDPAGGED', str(hsp.hit.seq)[:40]) self.assertEqual('QDFGKRMFVLKITTTKQQDHFFQAAFLEERDAWVRDIKKA', str(hsp.query.seq)[-40:]) self.assertEqual(' + + +I T + +F QAA +ER W++ I+ A', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('VRKSEEENLFEIITADEVHYFLQAATPKERTEWIKAIQVA', str(hsp.hit.seq)[-40:]) class TblastxCases(BaseBlastCases): def test_text_2226_tblastx_001(self): """Test parsing tblastx output (text_2226_tblastx_001.txt)""" blast_file = get_file('text_2226_tblastx_001.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) def test_text_2226_tblastx_002(self): """Test parsing tblastx output (text_2226_tblastx_002.txt)""" blast_file = get_file('text_2226_tblastx_002.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('gi|356995852:1-490', qresult.id) self.assertEqual('Mus musculus POU domain, class 5, transcriptionfactor 1 (Pou5f1), transcript variant 1, mRNA', qresult.description) self.assertEqual(490, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NM_013633.3|', hit.id) self.assertEqual('Mus musculus POU domain, class 5, transcription factor 1 (Pou5f1), transcript variant 1, mRNA', hit.description) self.assertEqual(1353, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(163, hsp.aln_span) self.assertEqual(2e-115, hsp.evalue) self.assertEqual(418.0, hsp.bitscore) self.assertEqual(908.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(163, hsp.ident_num) self.assertEqual(163, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(0, hsp.hit_start) self.assertEqual(489, hsp.query_end) self.assertEqual(489, hsp.hit_end) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', str(hsp.query.seq)[:40]) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', str(hsp.hit.seq)[:40]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQLPRSPRT*KPCRRS*N', str(hsp.query.seq)[-40:]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQLPRSPRT*KPCRRS*N', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQLPRSPRT*KPCRRS*N', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XR_141831.1|', hit.id) self.assertEqual('PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_105837.2| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_141464.1| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA ref|XR_141446.1| PREDICTED: Mus musculus predicted gene, 19553 (Gm19553), miscRNA', hit.description) self.assertEqual(570, hit.seq_len) self.assertEqual(1, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(163, hsp.aln_span) self.assertEqual(3e-114, hsp.evalue) self.assertEqual(415.0, hsp.bitscore) self.assertEqual(900.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(-1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(162, hsp.ident_num) self.assertEqual(162, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(-1, hsp.hit_strand) self.assertEqual(0, hsp.query_start) self.assertEqual(81, hsp.hit_start) self.assertEqual(489, hsp.query_end) self.assertEqual(570, hsp.hit_end) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', str(hsp.query.seq)[:40]) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('EVKPSLGEPSFHQAPGSGCPPSPWLDTWLQTSPSHPHQVG', str(hsp.hit.seq)[:40]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQLPRSPRT*KPCRRS*N', str(hsp.query.seq)[-40:]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQL RSPRT*KPCRRS*N', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('TQREPPLSPVPTAPMP*SWRRWNQLQRSPRT*KPCRRS*N', str(hsp.hit.seq)[-40:]) def test_text_2226_tblastx_003(self): """Test parsing tblastx output (text_2226_tblastx_003.txt)""" blast_file = get_file('text_2226_tblastx_003.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(1, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('hg19_dna', qresult.id) self.assertEqual('range=chr1:1207057-1207541 5\'pad=0 3\'pad=0 strand=+repeatMasking=none', qresult.description) self.assertEqual(485, qresult.seq_len) self.assertEqual('NCBI Transcript Reference Sequences', qresult.target) self.assertEqual('tblastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(10, len(qresult)) # first qresult, first hit hit = qresult[0] self.assertEqual('ref|NM_002985.2|', hit.id) self.assertEqual('Homo sapiens chemokine (C-C motif) ligand 5 (CCL5), mRNA', hit.description) self.assertEqual(1237, hit.seq_len) self.assertEqual(3, len(hit)) # first qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(107, hsp.aln_span) self.assertEqual(4e-49, hsp.evalue) self.assertEqual(118.0, hsp.bitscore) self.assertEqual(252.0, hsp.bitscore_raw) self.assertEqual(-3, hsp.query_frame) self.assertEqual(-1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(68, hsp.ident_num) self.assertEqual(72, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(-1, hsp.hit_strand) self.assertEqual(138, hsp.query_start) self.assertEqual(622, hsp.hit_start) self.assertEqual(459, hsp.query_end) self.assertEqual(943, hsp.hit_end) self.assertEqual('VGPARVQ*HDLSSLQPPAPEFK*FSHLSLQSSWDCRCPPP', str(hsp.query.seq)[:40]) self.assertEqual('V A V+ H+LSSLQPP P FK FS LSL SSWD R PP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('VTQAGVKWHNLSSLQPPPPGFKQFSCLSLPSSWDYRRGPP', str(hsp.hit.seq)[:40]) self.assertEqual('WLDLGppqpppPGFK*FSCLSHPSSWDYRHMPPCLINFVF', str(hsp.query.seq)[-40:]) self.assertEqual('W DLG Q PPPGF FSCLS PSSWDYR P NF++', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('WRDLGSLQAPPPGFTPFSCLSLPSSWDYRRPLPRPANFLY', str(hsp.hit.seq)[-40:]) # first qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(44, hsp.aln_span) self.assertEqual(4e-49, hsp.evalue) self.assertEqual(100.0, hsp.bitscore) self.assertEqual(214.0, hsp.bitscore_raw) self.assertEqual(-2, hsp.query_frame) self.assertEqual(-2, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(37, hsp.ident_num) self.assertEqual(38, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(-1, hsp.hit_strand) self.assertEqual(16, hsp.query_start) self.assertEqual(498, hsp.hit_start) self.assertEqual(148, hsp.query_end) self.assertEqual(630, hsp.hit_end) self.assertEqual('FCIFSRDGVLPCWSGWSRTPDLR*SACLGLPKCWDYRCEP', str(hsp.query.seq)[:40]) self.assertEqual('FCIFSRDGV CW GWSRTPDL+*S LGLPKCWDYR EP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('FCIFSRDGVSSCWPGWSRTPDLK*STHLGLPKCWDYRREP', str(hsp.hit.seq)[:40]) self.assertEqual('SRDGVLPCWSGWSRTPDLR*SACLGLPKCWDYRCEPPRPA', str(hsp.query.seq)[-40:]) self.assertEqual('SRDGV CW GWSRTPDL+*S LGLPKCWDYR EPPRPA', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('SRDGVSSCWPGWSRTPDLK*STHLGLPKCWDYRREPPRPA', str(hsp.hit.seq)[-40:]) # first qresult, second hit hit = qresult[1] self.assertEqual('ref|XM_003255417.1|', hit.id) self.assertEqual('PREDICTED: Nomascus leucogenys 5\'-nucleotidase, cytosolic II, transcript variant 2 (NT5C2), mRNA', hit.description) self.assertEqual(3285, hit.seq_len) self.assertEqual(3, len(hit)) # first qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(94, hsp.aln_span) self.assertEqual(9e-49, hsp.evalue) self.assertEqual(197.0, hsp.bitscore) self.assertEqual(425.0, hsp.bitscore_raw) self.assertEqual(-2, hsp.query_frame) self.assertEqual(-2, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(78, hsp.ident_num) self.assertEqual(79, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(-1, hsp.hit_strand) self.assertEqual(16, hsp.query_start) self.assertEqual(2744, hsp.hit_start) self.assertEqual(298, hsp.query_end) self.assertEqual(3026, hsp.hit_end) self.assertEqual('ETEFRSCCPGWSAMA*SWPTTASTSWIQVILLPQSPE*LG', str(hsp.query.seq)[:40]) self.assertEqual('E EFRSCCPGWSAMA SW S SW+QVIL PQ PE*LG', hsp.aln_annotation['similarity'][:40]) self.assertEqual('EMEFRSCCPGWSAMAQSWLIATSVSWVQVILWPQPPE*LG', str(hsp.hit.seq)[:40]) self.assertEqual('SRDGVLPCWSGWSRTPDLR*SACLGLPKCWDYRCEPPRPA', str(hsp.query.seq)[-40:]) self.assertEqual('SRDGV PCWSGWSRTPDLR*SACLGLPKCWDYR EPP PA', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('SRDGVSPCWSGWSRTPDLR*SACLGLPKCWDYRREPPCPA', str(hsp.hit.seq)[-40:]) # first qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(94, hsp.aln_span) self.assertEqual(4e-43, hsp.evalue) self.assertEqual(178.0, hsp.bitscore) self.assertEqual(384.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(77, hsp.ident_num) self.assertEqual(83, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(17, hsp.query_start) self.assertEqual(2745, hsp.hit_start) self.assertEqual(299, hsp.query_end) self.assertEqual(3027, hsp.hit_end) self.assertEqual('AGRGGSHL*SQHFGRPRQADYLRSGVRDQPDQHGKTPSLL', str(hsp.query.seq)[:40]) self.assertEqual('AG GGS L*SQHFGRPRQAD+LRSGVRDQPDQHG+TPSLL', hsp.aln_annotation['similarity'][:40]) self.assertEqual('AGHGGSRL*SQHFGRPRQADHLRSGVRDQPDQHGETPSLL', str(hsp.hit.seq)[:40]) self.assertEqual('PSYSGD*GRRIT*IQEVEAVVGQDQAIALQPGQQERNSVS', str(hsp.query.seq)[-40:]) self.assertEqual('PSYSG *G+RIT* QE E + QD AIALQPGQQERNS+S', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('PSYSGG*GQRIT*TQETEVAMSQDCAIALQPGQQERNSIS', str(hsp.hit.seq)[-40:]) def test_text_2226_tblastx_004(self): """Test parsing tblastx output (text_2226_tblastx_004.txt)""" blast_file = get_file('text_2226_tblastx_004.txt') qresults = list(parse(blast_file, FMT)) self.assertEqual(2, len(qresults)) self.check_common_attrs(qresults) # test first qresult qresult = qresults[0] self.assertEqual('random_s00', qresult.id) self.assertEqual('', qresult.description) self.assertEqual(128, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('tblastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(0, len(qresult)) # test second qresult qresult = qresults[1] self.assertEqual('gi|296147483:1-350', qresult.id) self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,complete cds', qresult.description) self.assertEqual(350, qresult.seq_len) self.assertEqual('minirefseq_mrna', qresult.target) self.assertEqual('tblastx', qresult.program) self.assertEqual('2.2.26+', qresult.version) self.assertEqual(5, len(qresult)) # second qresult, first hit hit = qresult[0] self.assertEqual('gi|296147483|ref|NM_001183135.1|', hit.id) self.assertEqual('Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds', hit.description) self.assertEqual(4911, hit.seq_len) self.assertEqual(8, len(hit)) # second qresult, first hit, first hsp hsp = qresult[0].hsps[0] self.assertEqual(116, hsp.aln_span) self.assertEqual(2e-81, hsp.evalue) self.assertEqual(289.0, hsp.bitscore) self.assertEqual(626.0, hsp.bitscore_raw) self.assertEqual(2, hsp.query_frame) self.assertEqual(2, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(116, hsp.ident_num) self.assertEqual(116, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(1, hsp.query_start) self.assertEqual(1, hsp.hit_start) self.assertEqual(349, hsp.query_end) self.assertEqual(349, hsp.hit_end) self.assertEqual('WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINP', str(hsp.query.seq)[:40]) self.assertEqual('WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('WP*TLEGLTPCKGNLKQNCVLYLPNRKEEIQPFAMLVINP', str(hsp.hit.seq)[:40]) self.assertEqual('WQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K', str(hsp.query.seq)[-40:]) self.assertEqual('WQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('WQCNAYRDCQPFHLFLEAGCLKFWMPSLRLLISRWRFN*K', str(hsp.hit.seq)[-40:]) # second qresult, first hit, second hsp hsp = qresult[0].hsps[1] self.assertEqual(116, hsp.aln_span) self.assertEqual(5e-78, hsp.evalue) self.assertEqual(278.0, hsp.bitscore) self.assertEqual(602.0, hsp.bitscore_raw) self.assertEqual(-2, hsp.query_frame) self.assertEqual(-3, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(116, hsp.ident_num) self.assertEqual(116, hsp.pos_num) self.assertEqual(-1, hsp.query_strand) self.assertEqual(-1, hsp.hit_strand) self.assertEqual(1, hsp.query_start) self.assertEqual(1, hsp.hit_start) self.assertEqual(349, hsp.query_end) self.assertEqual(349, hsp.hit_end) self.assertEqual('LLIESPSRDE*PQ*RHPKFQTAGFEE*MERLTVPVGIALP', str(hsp.query.seq)[:40]) self.assertEqual('LLIESPSRDE*PQ*RHPKFQTAGFEE*MERLTVPVGIALP', hsp.aln_annotation['similarity'][:40]) self.assertEqual('LLIESPSRDE*PQ*RHPKFQTAGFEE*MERLTVPVGIALP', str(hsp.hit.seq)[:40]) self.assertEqual('WIYH*HGEWLNFFFSIRKIKNAILLQVAFAWSQTLQCSWP', str(hsp.query.seq)[-40:]) self.assertEqual('WIYH*HGEWLNFFFSIRKIKNAILLQVAFAWSQTLQCSWP', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('WIYH*HGEWLNFFFSIRKIKNAILLQVAFAWSQTLQCSWP', str(hsp.hit.seq)[-40:]) # second qresult, second hit hit = qresult[1] self.assertEqual('gi|365982352|ref|XM_003667962.1|', hit.id) self.assertEqual('Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA', hit.description) self.assertEqual(4932, hit.seq_len) self.assertEqual(10, len(hit)) # second qresult, second hit, first hsp hsp = qresult[1].hsps[0] self.assertEqual(85, hsp.aln_span) self.assertEqual(5e-42, hsp.evalue) self.assertEqual(152.0, hsp.bitscore) self.assertEqual(327.0, hsp.bitscore_raw) self.assertEqual(1, hsp.query_frame) self.assertEqual(1, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(62, hsp.ident_num) self.assertEqual(73, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(93, hsp.query_start) self.assertEqual(87, hsp.hit_start) self.assertEqual(348, hsp.query_end) self.assertEqual(342, hsp.hit_end) self.assertEqual('TIRHASDKSIEILKRVHSFEELERHPDFALPFVLACQSRN', str(hsp.query.seq)[:40]) self.assertEqual('TI+HASDKSI+ILK + + EEL RHPDF P VLAC SRN', hsp.aln_annotation['similarity'][:40]) self.assertEqual('TIKHASDKSIDILKTIQNIEELVRHPDFVTPLVLACSSRN', str(hsp.hit.seq)[:40]) self.assertEqual('LAMQCLQGLSTVPSIPRSRLSEILDAFIEATHLAMEIQLK', str(hsp.query.seq)[-40:]) self.assertEqual('+AMQCLQGL++VPSIP SR+ E+LD FIEAT LAMEIQLK', hsp.aln_annotation['similarity'][-40:]) self.assertEqual('IAMQCLQGLASVPSIPESRIPEVLDGFIEATQLAMEIQLK', str(hsp.hit.seq)[-40:]) # second qresult, second hit, second hsp hsp = qresult[1].hsps[1] self.assertEqual(14, hsp.aln_span) self.assertEqual(5e-42, hsp.evalue) self.assertEqual(26.3, hsp.bitscore) self.assertEqual(51.0, hsp.bitscore_raw) self.assertEqual(3, hsp.query_frame) self.assertEqual(3, hsp.hit_frame) self.assertEqual(0, hsp.gap_num) self.assertEqual(11, hsp.ident_num) self.assertEqual(11, hsp.pos_num) self.assertEqual(1, hsp.query_strand) self.assertEqual(1, hsp.hit_strand) self.assertEqual(68, hsp.query_start) self.assertEqual(62, hsp.hit_start) self.assertEqual(110, hsp.query_end) self.assertEqual(104, hsp.hit_end) self.assertEqual('FRIEKKKFNHSPC*', str(hsp.query.seq)) self.assertEqual('FRI KKKFNH C*', hsp.aln_annotation['similarity']) self.assertEqual('FRI*KKKFNH*TC*', str(hsp.hit.seq)) if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity = 2) unittest.main(testRunner=runner)