# Copyright 2012 by Wibowo Arindrarto. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Bio.SearchIO parser for BLAST+ tab output format, with or without comments.""" import re from Bio._py3k import _as_bytes, _bytes_to_string from Bio._py3k import basestring from Bio.SearchIO._index import SearchIndexer from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment __all__ = ['BlastTabIndexer', 'BlastTabParser', 'BlastTabWriter'] # longname-shortname map # maps the column names shown in a commented output to its short name # (the one used in the command line) _LONG_SHORT_MAP = { 'query id': 'qseqid', 'query acc.': 'qacc', 'query acc.ver': 'qaccver', 'query length': 'qlen', 'subject id': 'sseqid', 'subject acc.': 'sacc', 'subject acc.ver': 'saccver', 'subject length': 'slen', 'alignment length': 'length', 'bit score': 'bitscore', 'score': 'score', 'evalue': 'evalue', 'identical': 'nident', '% identity': 'pident', 'positives': 'positive', '% positives': 'ppos', 'mismatches': 'mismatch', 'gaps': 'gaps', 'q. start': 'qstart', 'q. end': 'qend', 's. start': 'sstart', 's. end': 'send', 'query frame': 'qframe', 'sbjct frame': 'sframe', 'query/sbjct frames': 'frames', 'query seq': 'qseq', 'subject seq': 'sseq', 'gap opens': 'gapopen', 'query gi': 'qgi', 'subject ids': 'sallseqid', 'subject gi': 'sgi', 'subject gis': 'sallgi', 'BTOP': 'btop', 'subject accs.': 'sallacc', 'subject tax ids': 'staxids', 'subject sci names': 'sscinames', 'subject com names': 'scomnames', 'subject blast names': 'sblastnames', 'subject super kingdoms': 'sskingdoms', 'subject title': 'stitle', 'subject titles': 'salltitles', 'subject strand': 'sstrand', '% subject coverage': 'qcovs', '% hsp coverage': 'qcovhsp', } # function to create a list from semicolon-delimited string # used in BlastTabParser._parse_result_row _list_semicol = lambda x: x.split(';') _list_diamond = lambda x: x.split('<>') # column to class attribute map _COLUMN_QRESULT = { 'qseqid': ('id', str), 'qacc': ('accession', str), 'qaccver': ('accession_version', str), 'qlen': ('seq_len', int), 'qgi': ('gi', str), } _COLUMN_HIT = { 'sseqid': ('id', str), 'sallseqid': ('id_all', _list_semicol), 'sacc': ('accession', str), 'saccver': ('accession_version', str), 'sallacc': ('accession_all', _list_semicol), 'sgi': ('gi', str), 'sallgi': ('gi_all', str), 'slen': ('seq_len', int), 'staxids': ('tax_ids', _list_semicol), 'sscinames': ('sci_names', _list_semicol), 'scomnames': ('com_names', _list_semicol), 'sblastnames': ('blast_names', _list_semicol), 'sskingdoms': ('super_kingdoms', _list_semicol), 'stitle': ('title', str), 'salltitles': ('title_all', _list_diamond), # set strand as HSP property? 'sstrand': ('strand', str), 'qcovs': ('query_coverage', float), } _COLUMN_HSP = { 'bitscore': ('bitscore', float), 'score': ('bitscore_raw', int), 'evalue': ('evalue', float), 'nident': ('ident_num', int), 'pident': ('ident_pct', float), 'positive': ('pos_num', int), 'ppos': ('pos_pct', float), 'mismatch': ('mismatch_num', int), 'gaps': ('gap_num', int), 'gapopen': ('gapopen_num', int), 'btop': ('btop', str), 'qcovhsp': ('query_coverage', float), } _COLUMN_FRAG = { 'length': ('aln_span', int), 'qstart': ('query_start', int), 'qend': ('query_end', int), 'sstart': ('hit_start', int), 'send': ('hit_end', int), 'qframe': ('query_frame', int), 'sframe': ('hit_frame', int), 'frames': ('frames', str), 'qseq': ('query', str), 'sseq': ('hit', str), } _SUPPORTED_FIELDS = set(list(_COLUMN_QRESULT) + list(_COLUMN_HIT) + list(_COLUMN_HSP) + list(_COLUMN_FRAG)) # column order in the non-commented tabular output variant # values must be keys inside the column-attribute maps above _DEFAULT_FIELDS = ['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore'] # one field from each of the following sets must exist in order for the # parser to work _MIN_QUERY_FIELDS = set(['qseqid', 'qacc', 'qaccver']) _MIN_HIT_FIELDS = set(['sseqid', 'sacc', 'saccver']) # simple function to create BLAST HSP attributes that may be computed if # other certain attributes are present # This was previously implemented in the HSP objects in the old model _RE_GAPOPEN = re.compile(r'\w-') def _compute_gapopen_num(hsp): """Returns the number of gap openings in the given HSP.""" gapopen = 0 for seq_type in ('query', 'hit'): seq = str(getattr(hsp, seq_type).seq) gapopen += len(re.findall(_RE_GAPOPEN, seq)) return gapopen def _augment_blast_hsp(hsp, attr): """Calculates the given HSP attribute, for writing.""" if attr == 'aln_span': # aln_span is number of identical matches + mismatches + gaps func = lambda hsp: hsp.ident_num + hsp.mismatch_num + hsp.gap_num # ident and gap will require the num values to be computed first elif attr.startswith('ident'): func = lambda hsp: hsp.aln_span - hsp.mismatch_num - hsp.gap_num elif attr.startswith('gap'): func = lambda hsp: hsp.aln_span - hsp.ident_num - hsp.mismatch_num elif attr == 'mismatch_num': func = lambda hsp: hsp.aln_span - hsp.ident_num - hsp.gap_num elif attr == 'gapopen_num': if not hasattr(hsp, 'query') or not hasattr(hsp, 'hit'): # mock function so that the except clause below is triggered # as both the query and hit are required to compute gapopen def mock(hsp): raise AttributeError func = mock else: func = _compute_gapopen_num # set the num values # requires the endswith check, since we only want to set 'num' or 'span' # attributes here if not hasattr(hsp, attr) and not attr.endswith('_pct'): value = func(hsp) setattr(hsp, attr, value) # if the attr is a percent value, calculate it if attr == 'ident_pct': func2 = lambda hsp: hsp.ident_num / float(hsp.aln_span) * 100 elif attr == 'pos_pct': func = lambda hsp: hsp.pos_num / float(hsp.aln_span) * 100 elif attr == 'gap_pct': func2 = lambda hsp: hsp.gap_num / float(hsp.aln_span) * 100 else: func2 = None # set the pct values if func2 is not None: value = func2(hsp) setattr(hsp, attr, value) class BlastTabParser(object): """Parser for the BLAST tabular format.""" def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS): self.handle = handle self.has_comments = comments self.fields = self._prep_fields(fields) self.line = self.handle.readline().strip() def __iter__(self): # stop iteration if file has no lines if not self.line: raise StopIteration # determine which iterator to use elif self.has_comments: iterfunc = self._parse_commented_qresult else: iterfunc = self._parse_qresult for qresult in iterfunc(): yield qresult def _prep_fields(self, fields): """Validates and formats the given fields for use by the parser.""" # cast into list if fields is a space-separated string if isinstance(fields, basestring): fields = fields.strip().split(' ') # blast allows 'std' as a proxy for the standard default lists # we want to transform 'std' to its proper column names if 'std' in fields: idx = fields.index('std') fields = fields[:idx] + _DEFAULT_FIELDS + fields[idx+1:] # if set(fields) has a null intersection with minimum required # fields for hit and query, raise an exception if not set(fields).intersection(_MIN_QUERY_FIELDS) or \ not set(fields).intersection(_MIN_HIT_FIELDS): raise ValueError("Required query and/or hit ID field not found.") return fields def _parse_commented_qresult(self): """Iterator returning `QueryResult` objects from a commented file.""" while True: comments = self._parse_comments() if comments: try: self.fields = comments['fields'] # iterator for the query results qres_iter = self._parse_qresult() except KeyError: # no fields means the query has no results assert 'fields' not in comments # create an iterator returning one empty qresult # if the query has no results qres_iter = iter([QueryResult()]) for qresult in qres_iter: for key, value in comments.items(): setattr(qresult, key, value) yield qresult else: break def _parse_comments(self): """Returns a dictionary containing tab file comments.""" comments = {} while True: # parse program and version # example: # BLASTX 2.2.26+ if 'BLAST' in self.line and 'processed' not in self.line: program_line = self.line[len(' #'):].split(' ') comments['program'] = program_line[0].lower() comments['version'] = program_line[1] # parse query id and description (if available) # example: # Query: gi|356995852 Mus musculus POU domain elif 'Query' in self.line: query_line = self.line[len('# Query: '):].split(' ', 1) comments['id'] = query_line[0] if len(query_line) == 2: comments['description'] = query_line[1] # parse target database # example: # Database: db/minirefseq_protein elif 'Database' in self.line: comments['target'] = self.line[len('# Database: '):] # parse RID (from remote searches) elif 'RID' in self.line: comments['rid'] = self.line[len('# RID: '):] # parse column order, required for parsing the result lines # example: # Fields: query id, query gi, query acc., query length elif 'Fields' in self.line: comments['fields'] = self._parse_fields_line() # if the line has these strings, it's either the end of a comment # or the end of a file, so we return all the comments we've parsed elif ' hits found' in self.line or 'processed' in self.line: self.line = self.handle.readline().strip() return comments self.line = self.handle.readline() if not self.line: return comments else: self.line = self.line.strip() def _parse_fields_line(self): """Returns a list of column short names from the 'Fields' comment line.""" raw_field_str = self.line[len('# Fields: '):] long_fields = raw_field_str.split(', ') fields = [_LONG_SHORT_MAP[long_name] for long_name in long_fields] return self._prep_fields(fields) def _parse_result_row(self): """Returns a dictionary of parsed row values.""" fields = self.fields columns = self.line.strip().split('\t') assert len(fields) == len(columns), "Expected %i columns, found: " \ "%i" % (len(fields), len(columns)) qresult, hit, hsp, frag = {}, {}, {}, {} for idx, value in enumerate(columns): sname = fields[idx] # flag to check if any of the _COLUMNs contain sname in_mapping = False # iterate over each dict, mapping pair to determine # attribute name and value of each column for parsed_dict, mapping in ( (qresult, _COLUMN_QRESULT), (hit, _COLUMN_HIT), (hsp, _COLUMN_HSP), (frag, _COLUMN_FRAG)): # process parsed value according to mapping if sname in mapping: attr_name, caster = mapping[sname] if caster is not str: value = caster(value) parsed_dict[attr_name] = value in_mapping = True # make sure that any unhandled field is not supported if not in_mapping: assert sname not in _SUPPORTED_FIELDS return {'qresult': qresult, 'hit': hit, 'hsp': hsp, 'frag': frag} def _get_id(self, parsed): """Returns the value used for a QueryResult or Hit ID from a parsed row.""" # use 'id', with 'accession' and 'accession_version' fallbacks # one of these must have a value since we've checked whether # they exist or not when parsing the comments id_cache = parsed.get('id') if id_cache is None: id_cache = parsed.get('accession') if id_cache is None: id_cache = parsed.get('accession_version') return id_cache def _parse_qresult(self): """Generator function that returns QueryResult objects.""" # state values, used to determine what to do with each line state_EOF = 0 state_QRES_NEW = 1 state_QRES_SAME = 3 state_HIT_NEW = 2 state_HIT_SAME = 4 # dummies for initial states qres_state = None hit_state = None file_state = None # dummies for initial id caches prev_qid = None prev_hid = None # dummies for initial parsed value containers cur, prev = None, None hit_list, hsp_list = [], [] while True: # store previous line's parsed values if we've past the first line if cur is not None: prev = cur prev_qid = cur_qid prev_hid = cur_hid # only parse the line if it's not EOF or not a comment line if self.line and not self.line.startswith('#'): cur = self._parse_result_row() cur_qid = self._get_id(cur['qresult']) cur_hid = self._get_id(cur['hit']) else: file_state = state_EOF # mock values for cur_qid and cur_hid since the line is empty cur_qid, cur_hid = None, None # get the state of hit and qresult if prev_qid != cur_qid: qres_state = state_QRES_NEW else: qres_state = state_QRES_SAME # new hits are hits with different id or hits in a new qresult if prev_hid != cur_hid or qres_state == state_QRES_NEW: hit_state = state_HIT_NEW else: hit_state = state_HIT_SAME # we're creating objects for the previously parsed line(s), # so nothing is done in the first parsed line (prev == None) if prev is not None: # every line is essentially an HSP with one fragment, so we # create both of these for every line frag = HSPFragment(prev_hid, prev_qid) for attr, value in prev['frag'].items(): # adjust coordinates to Python range # NOTE: this requires both start and end coords to be # present, otherwise a KeyError will be raised. # Without this limitation, we might misleadingly set the # start / end coords for seq_type in ('query', 'hit'): if attr == seq_type + '_start': value = min(value, prev['frag'][seq_type + '_end']) - 1 elif attr == seq_type + '_end': value = max(value, prev['frag'][seq_type + '_start']) setattr(frag, attr, value) # strand and frame setattr require the full parsed values # to be set first for seq_type in ('hit', 'query'): # try to set hit and query frame frame = self._get_frag_frame(frag, seq_type, prev['frag']) setattr(frag, '%s_frame' % seq_type, frame) # try to set hit and query strand strand = self._get_frag_strand(frag, seq_type, prev['frag']) setattr(frag, '%s_strand' % seq_type, strand) hsp = HSP([frag]) for attr, value in prev['hsp'].items(): setattr(hsp, attr, value) hsp_list.append(hsp) # create hit and append to temp hit container if hit_state # says we're not at the same hit or at a new query if hit_state == state_HIT_NEW: hit = Hit(hsp_list) for attr, value in prev['hit'].items(): setattr(hit, attr, value) hit_list.append(hit) hsp_list = [] # create qresult and yield if we're at a new qresult or EOF if qres_state == state_QRES_NEW or file_state == state_EOF: qresult = QueryResult(hit_list, prev_qid) for attr, value in prev['qresult'].items(): setattr(qresult, attr, value) yield qresult # if current line is EOF, break if file_state == state_EOF: break hit_list = [] self.line = self.handle.readline().strip() def _get_frag_frame(self, frag, seq_type, parsedict): """Returns `HSPFragment` frame given the object, its sequence type, and its parsed dictionary values.""" assert seq_type in ('query', 'hit') frame = getattr(frag, '%s_frame' % seq_type, None) if frame is not None: return frame else: if 'frames' in parsedict: # frames is 'x1/x2' string, x1 is query frame, x2 is hit frame idx = 0 if seq_type == 'query' else 1 return int(parsedict['frames'].split('/')[idx]) # else implicit None return def _get_frag_strand(self, frag, seq_type, parsedict): """Returns `HSPFragment` strand given the object, its sequence type, and its parsed dictionary values.""" # NOTE: this will never set the strands as 0 for protein # queries / hits, since we can't detect the blast flavors # from the columns alone. assert seq_type in ('query', 'hit') strand = getattr(frag, '%s_strand' % seq_type, None) if strand is not None: return strand else: # using parsedict instead of the fragment object since # we need the unadjusted coordinated values start = parsedict.get('%s_start' % seq_type) end = parsedict.get('%s_end' % seq_type) if start is not None and end is not None: return 1 if start <= end else -1 # else implicit None return class BlastTabIndexer(SearchIndexer): """Indexer class for BLAST+ tab output.""" _parser = BlastTabParser def __init__(self, filename, comments=False, fields=_DEFAULT_FIELDS): SearchIndexer.__init__(self, filename, comments=comments, fields=fields) # if the file doesn't have comments, # get index of column used as the key (qseqid / qacc / qaccver) if not self._kwargs['comments']: if 'qseqid' in fields: self._key_idx = fields.index('qseqid') elif 'qacc' in fields: self._key_idx = fields.index('qacc') elif 'qaccver' in fields: self._key_idx = fields.index('qaccver') else: raise ValueError("Custom fields is missing an ID column. " "One of these must be present: 'qseqid', 'qacc', or 'qaccver'.") def __iter__(self): """Iterates over the file handle; yields key, start offset, and length.""" handle = self._handle handle.seek(0) if not self._kwargs['comments']: iterfunc = self._qresult_index else: iterfunc = self._qresult_index_commented for key, offset, length in iterfunc(): yield _bytes_to_string(key), offset, length def _qresult_index_commented(self): """Indexer for commented BLAST tabular files.""" handle = self._handle handle.seek(0) start_offset = 0 # mark of a new query query_mark = None # mark of the query's ID qid_mark = _as_bytes('# Query: ') # mark of the last line end_mark = _as_bytes('# BLAST processed') while True: end_offset = handle.tell() line = handle.readline() if query_mark is None: query_mark = line start_offset = end_offset elif line.startswith(qid_mark): qresult_key = line[len(qid_mark):].split()[0] elif line == query_mark or line.startswith(end_mark): yield qresult_key, start_offset, end_offset - start_offset start_offset = end_offset elif not line: break def _qresult_index(self): """Indexer for noncommented BLAST tabular files.""" handle = self._handle handle.seek(0) start_offset = 0 qresult_key = None key_idx = self._key_idx tab_char = _as_bytes('\t') while True: # get end offset here since we only know a qresult ends after # encountering the next one end_offset = handle.tell() #line = handle.readline() line = handle.readline() if qresult_key is None: qresult_key = line.split(tab_char)[key_idx] else: try: curr_key = line.split(tab_char)[key_idx] except IndexError: curr_key = _as_bytes('') if curr_key != qresult_key: yield qresult_key, start_offset, end_offset - start_offset qresult_key = curr_key start_offset = end_offset # break if we've reached EOF if not line: break def get_raw(self, offset): """Returns the raw string of a QueryResult object from the given offset.""" if self._kwargs['comments']: getfunc = self._get_raw_qresult_commented else: getfunc = self._get_raw_qresult return getfunc(offset) def _get_raw_qresult(self, offset): """Returns the raw string of a single QueryResult from a noncommented file.""" handle = self._handle handle.seek(offset) qresult_raw = _as_bytes('') tab_char = _as_bytes('\t') key_idx = self._key_idx qresult_key = None while True: line = handle.readline() # get the key if the first line (qresult key) if qresult_key is None: qresult_key = line.split(tab_char)[key_idx] else: try: curr_key = line.split(tab_char)[key_idx] except IndexError: curr_key = _as_bytes('') # only break when qresult is finished (key is different) if curr_key != qresult_key: break # append to the raw string as long as qresult is the same qresult_raw += line return qresult_raw def _get_raw_qresult_commented(self, offset): """Returns the raw string of a single QueryResult from a commented file.""" handle = self._handle handle.seek(offset) qresult_raw = _as_bytes('') end_mark = _as_bytes('# BLAST processed') # query mark is the line marking a new query # something like '# TBLASTN 2.2.25+' query_mark = None line = handle.readline() while line: # since query_mark depends on the BLAST search, we need to obtain it # first if query_mark is None: query_mark = line # break when we've reached the next qresult or the search ends elif line == query_mark or line.startswith(end_mark): break qresult_raw += line line = handle.readline() return qresult_raw class BlastTabWriter(object): """Writer for blast-tab output format.""" def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS): self.handle = handle self.has_comments = comments self.fields = fields def write_file(self, qresults): """Writes to the handle, returns how many QueryResult objects are written.""" handle = self.handle qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0 for qresult in qresults: if self.has_comments: handle.write(self._build_comments(qresult)) if qresult: handle.write(self._build_rows(qresult)) if not self.has_comments: qresult_counter += 1 hit_counter += len(qresult) hsp_counter += sum(len(hit) for hit in qresult) frag_counter += sum(len(hit.fragments) for hit in qresult) # if it's commented and there are no hits in the qresult, we still # increment the counter if self.has_comments: qresult_counter += 1 # commented files have a line saying how many queries were processed if self.has_comments: handle.write('# BLAST processed %i queries' % qresult_counter) return qresult_counter, hit_counter, hsp_counter, frag_counter def _build_rows(self, qresult): """Returns a string containing tabular rows of the QueryResult object.""" coordinates = set(['qstart', 'qend', 'sstart', 'send']) qresult_lines = '' for hit in qresult: for hsp in hit: line = [] for field in self.fields: # get the column value ~ could either be an attribute # of qresult, hit, or hsp if field in _COLUMN_QRESULT: value = getattr(qresult, _COLUMN_QRESULT[field][0]) elif field in _COLUMN_HIT: value = getattr(hit, _COLUMN_HIT[field][0]) # special case, since 'frames' can be determined from # query frame and hit frame elif field == 'frames': value = '%i/%i' % (hsp.query_frame, hsp.hit_frame) elif field in _COLUMN_HSP: try: value = getattr(hsp, _COLUMN_HSP[field][0]) except AttributeError: attr = _COLUMN_HSP[field][0] _augment_blast_hsp(hsp, attr) value = getattr(hsp, attr) elif field in _COLUMN_FRAG: value = getattr(hsp, _COLUMN_FRAG[field][0]) else: assert field not in _SUPPORTED_FIELDS continue # adjust from and to according to strand, if from and to # is included in the output field if field in coordinates: value = self._adjust_coords(field, value, hsp) # adjust output formatting value = self._adjust_output(field, value) line.append(value) hsp_line = '\t'.join(line) qresult_lines += hsp_line + '\n' return qresult_lines def _adjust_coords(self, field, value, hsp): """Adjusts start and end coordinates according to strand.""" assert field in ('qstart', 'qend', 'sstart', 'send') # determine sequence type to operate on based on field's first letter seq_type = 'query' if field.startswith('q') else 'hit' strand = getattr(hsp, '%s_strand' % seq_type, None) if strand is None: raise ValueError("Required attribute %r not found." % ('%s_strand' % (seq_type))) # switch start <--> end coordinates if strand is -1 if strand < 0: if field.endswith('start'): value = getattr(hsp, '%s_end' % seq_type) elif field.endswith('end'): value = getattr(hsp, '%s_start' % seq_type) + 1 elif field.endswith('start'): # adjust start coordinate for positive strand value += 1 return value def _adjust_output(self, field, value): """Adjusts formatting of the given field and value to mimic native tab output.""" # qseq and sseq are stored as SeqRecord, but here we only need the str if field in ('qseq', 'sseq'): value = str(value.seq) # evalue formatting, adapted from BLAST+ source: # src/objtools/align_format/align_format_util.cpp#L668 elif field == 'evalue': if value < 1.0e-180: value = '0.0' elif value < 1.0e-99: value = '%2.0e' % value elif value < 0.0009: value = '%3.0e' % value elif value < 0.1: value = '%4.3f' % value elif value < 1.0: value = '%3.2f' % value elif value < 10.0: value = '%2.1f' % value else: value = '%5.0f' % value # pident and ppos formatting elif field in ('pident', 'ppos'): value = '%.2f' % value # evalue formatting, adapted from BLAST+ source: # src/objtools/align_format/align_format_util.cpp#L723 elif field == 'bitscore': if value > 9999: value = '%4.3e' % value elif value > 99.9: value = '%4.0d' % value else: value = '%4.1f' % value # coverages have no comma (using floats still ~ a more proper # representation) elif field in ('qcovhsp', 'qcovs'): value = '%.0f' % value # list into '<>'-delimited string elif field == 'salltitles': value = '<>'.join(value) # list into ';'-delimited string elif field in ('sallseqid', 'sallacc', 'staxids', 'sscinames', 'scomnames', 'sblastnames', 'sskingdoms'): value = ';'.join(value) # everything else else: value = str(value) return value def _build_comments(self, qres): """Returns a string of a QueryResult tabular comment.""" comments = [] # inverse mapping of the long-short name map, required # for writing comments inv_field_map = dict((v, k) for k, v in _LONG_SHORT_MAP.items()) # try to anticipate qress without version if not hasattr(qres, 'version'): program_line = '# %s' % qres.program.upper() else: program_line = '# %s %s' % (qres.program.upper(), qres.version) comments.append(program_line) # description may or may not be None if qres.description is None: comments.append('# Query: %s' % qres.id) else: comments.append('# Query: %s %s' % (qres.id, qres.description)) # try appending RID line, if present try: comments.append('# RID: %s' % qres.rid) except AttributeError: pass comments.append('# Database: %s' % qres.target) # qresults without hits don't show the Fields comment if qres: comments.append('# Fields: %s' % ', '.join(inv_field_map[field] for field in self.fields)) comments.append('# %i hits found' % len(qres)) return '\n'.join(comments) + '\n' # if not used as a module, run the doctest if __name__ == "__main__": from Bio._utils import run_doctest run_doctest()