#!/usr/bin/env python3 """ blblastout.py - Translate NCBI blast output into other formats, and open those files using the appropriate application, or write output to a file. We call blast_formatter to Synopsis: blblastout.py --archive filename [--delete] --outfmt format --destination dest [--outfile filename] blblastout.py --rid rid [--delete] --outfmt format --destination dest [--outfile filename] --archive filename - filename is output from BLAST in ASN.1 format (-outfmt 11) --rid rid - RID number of a blast search done at NCBI. --archive and --rid are mutually exclusive --delete - Delete infile when finished. This is mainly intended for running from BioLegato, where infile is a temporary file. --outfmt format - format is any output format supported by blast_formatter --destination dest - dest is one of the following: For BLAST viewable Report: textedit - open output files in text editor specified by the $BL_TextEditor environment variable browser - open in web browser specified by $BL_browser textfile - write to files, using the basename specified by destination. htmlfile - write to HTML file For BLAST tsv Report: blnfetch - Open in blnfetch, BioLegato interface for retrieving DNA/RNA entries using ACCESSION numbers blpfetch - Open in blpfetch, BioLegato interface for retrieving protein entries using ACCESSION numbers tsvfile - write to a tsvfile. For XML output: xmlfile - write to an xmlfile. blastviewer - open XML output in blastviewer --outfile - filename for saving an output file. @modified: January 14, 2021 @author: Brian Fristensky @contact: brian.fristensky@umanitoba.ca """ import os import subprocess import sys #optparse is deprecated in favor of argparse as of Python 2.7. However, # since 2.7 is not always present on many systems, at this writing, # it is safer to stick with optparse for now. It should be easy # to change later, since the syntax is very similar between argparse and optparse. from optparse import OptionParser blib = os.environ.get("BIRCHPYLIB") sys.path.append(blib) from birchlib import Birchmod PROGRAM = "blblastout.py : " USAGE = "\n\tUSAGE: blblastout.py --archive filename [--delete] --outfmt format --destination dest [--outfile filename]" USAGE = USAGE + "\n\tUSAGE: blblastout.py --rid rid [--delete] --outfmt format --destination dest [--outfile filename]" BM = Birchmod(PROGRAM, USAGE) DEBUG = True class Parameters: """ Wrapper class for command line parameters By default, ETHRESHOLD is set to 10000, so that all hits will be returned, if -e is not set at the command line """ def __init__(self): """ Initializes arguments: IFN="" RFN="" OUTFMT="" DESTINATION="" OFN="" PID = str(os.getpid()) Then calls read_args() to fill in their values from command line """ self.IFN = "" self.RFN = "" self.DELETEFILE = False self.OUTFMT="" self.DESTINATION = "" self.OFN = "" self.PID = str(os.getpid()) self.read_args() def read_args(self): """ Read command line arguments into a Parameter object """ parser = OptionParser() parser.add_option("--archive", dest="archive", action="store", type="string", default="", help="BLAST output file in ASN.1 format") parser.add_option("--rid", dest="rid", action="store", type="string", default="", help="RFN number from BLAST run remotely at NCBI") parser.add_option("--delete", dest="delete", action="store_true", default=False, help="Delete archive when done") parser.add_option("--outfmt", dest="outfmt", action="store", type="string", default="", help="output format specified by BLAST -outfmt option") parser.add_option("--destination", dest="destination", action="store", type="string", default="", help="destination for output") parser.add_option("--outfile", dest="outfile", action="store", type="string", default="", help="output filename") (options, args) = parser.parse_args() self.IFN = options.archive self.RFN = options.rid self.DELETEFILE = options.delete self.OUTFMT = options.outfmt self.DESTINATION = options.destination self.OFN = options.outfile if DEBUG : print('------------ Parameters from command line ------' + '\n') print(' IFN: ' + self.IFN) print(' RFN: ' + self.RFN) print(' DELETEFILE: ' + str(self.DELETEFILE)) print(' OUTFMT: ' + self.OUTFMT) print(' DESTINATION: ' + self.DESTINATION) print(' OFN: ' + self.OFN) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def ParseRID(RFN): """ Parse the RID number from the first line containing 'RID:' """ infile=open(RFN,"r") lines = infile.readlines() infile.close() # First, we process the comment lines, printing all comments but the Field line # Then, we print the modified field line. i = int(0) RIDNUM = "" while (i < len(lines)) and (RIDNUM == "") : if "RID:" in lines[i] : TOKENS = lines[i].split(':') RIDNUM = TOKENS[1].strip() i += 1 print('RID: ' + RIDNUM) return RIDNUM # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunBlast_Formatter(SOURCE,SOURCENAME,OUTFMT,FILETYPE,OFN): """ Run blast_formatter to convert an NCBI ASN.1 archive (outfmt=11) into other supported formats. If blast results came from NCBI, rather than a local database, blast_formatter will use the RID to send a request to NCBI for output file in the specified format. SOURCE - "-archive" or "-rid" SOURCENAME - filename or RID string OUTFMT - -outfmt field used by BLAST or blast_formatter FILETYPE - htmlfile|tsvfile|textfile|xmlfile """ # For print formats giving tab-separated values, blblastout.py takes a comma-separated list, # eg. 7,sacc,slen,evalue # We have to convert it to a string blast_formatter recognizes # eg. '7 sacc slen evalue' # For other formats, we just use the OUTFMT used in the command line # If we run blastformatter using os.system, the PRFMT needs to be enclosed by quotes # If we run blastformatter using subprocess, it must not be enclosed by quotes TOKENS = OUTFMT.split(",") if TOKENS[0] in ["6","7","10"] : #PRFMT = "'" PRFMT="" for T in TOKENS : PRFMT = PRFMT + " " + T #PRFMT = PRFMT + "'" else : PRFMT=OUTFMT #COMMAND = 'blast_formatter ' + SOURCE + ' ' + SOURCENAME + ' -outfmt ' + PRFMT if FILETYPE == 'htmlfile' : p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-html','-out',OFN]) #COMMAND = COMMAND + ' -html ' + ' -out ' + OFN elif FILETYPE == 'tsvfile' : #COMMAND = COMMAND + " -out " + OFN p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-out',OFN]) else : #COMMAND = COMMAND + ' -out ' + OFN p = subprocess.Popen(['blast_formatter',SOURCE,SOURCENAME, '-outfmt',PRFMT,'-out',OFN]) p.wait() return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def ParseHeaders(IFN,OFN): """ For tabular BLAST output, parse the Fields line so that the names of fields appear as column headings. """ tempfile=open(IFN,"r") lines = tempfile.readlines() tempfile.close() outfile=open(OFN,"w") # First, we process the comment lines, printing all comments but the Field line # Then, we print the modified field line, if it was present in the input. # In the case where no hits were found, there will be no Fields line in the input file. i = int(0) FieldLine = "" while (i < len(lines)) and (lines[i].startswith("#")) : if lines[i].startswith("# Fields:") : FieldLine = "# " + lines[i][10:] # get rid of ' Fields: ' FieldLine = FieldLine.replace(",","\t") else : outfile.write(lines[i]) i += 1 if len(FieldLine) > 0 : outfile.write(FieldLine) # Now write the rest of the file while i < len(lines) : outfile.write(lines[i]) i += 1 outfile.close() return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RUNTEXTEDIT(OFN,DELETEFILE): """ Run the texteditor in the background and remove the temporary file when done """ if DELETEFILE : p = subprocess.Popen(['chooseviewer.py', OFN, '--delete']) else : p = subprocess.Popen(['chooseviewer.py', OFN]) p.wait() return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RUNBROWSER(OFN,DELETEFILE): """ Run web browser in the background and remove the temporary file when done """ #COMMAND = '(nohup `chooseviewer.py` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&' if DELETEFILE : p = subprocess.Popen(['chooseviewer.py', OFN, '--delete']) else : p = subprocess.Popen(['chooseviewer.py', OFN]) p.wait() #COMMAND = '(nohup $BL_Browser ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&' #os.system(COMMAND) return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunBioLegato(DESTINATION, FN): """ Run the blnfetch or blpfetch in the background and remove the temporary file when done """ COMMAND = '(nohup ' + DESTINATION + ' ' + FN + '; rm -f ' + FN + ' > /dev/null)&' os.system(COMMAND) return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunBlastViewer(DESTINATION, FN): """ Run the blastviewer in the background and remove the temporary file when done """ COMMAND = '(nohup ' + DESTINATION + ' -in ' + FN + ' > /dev/null; rm ' + FN + ' )&' os.system(COMMAND) return #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ P = Parameters () if P.RFN != "" : SOURCE="-rid" SOURCENAME=ParseRID(P.RFN) else : SOURCE="-archive" SOURCENAME=P.IFN # Write the output to a file, or send it to a window, as specified # in --destination if P.DESTINATION == 'textedit': TEMPOFN = P.PID + '.' + 'txt' RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'textfile',TEMPOFN) RUNTEXTEDIT(TEMPOFN,False) elif P.DESTINATION == 'browser': TEMPOFN = P.PID + '.' + 'html' RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'htmlfile',TEMPOFN) RUNBROWSER(TEMPOFN,True) elif P.DESTINATION in ['textfile'] : RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'textfile',P.OFN + '.txt') elif P.DESTINATION in ['xmlfile'] : RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'xmlfile',P.OFN + '.xml') elif P.DESTINATION == 'htmlfile': RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'htmlfile',P.OFN + ".html") elif P.DESTINATION in ['blnfetch','blpfetch','tsvfile'] : TEMPOFN = P.PID + '.' + 'tmp' RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'tsvfile',TEMPOFN) PARSEDOFN = P.PID + '.' + 'tsv' ParseHeaders(TEMPOFN,PARSEDOFN) if P.DESTINATION in ['blnfetch','blpfetch'] : RunBioLegato(P.DESTINATION,PARSEDOFN) else : os.rename(PARSEDOFN,P.OFN + '.tsv') os.remove(TEMPOFN) elif P.DESTINATION in ['blastviewer'] : TEMPOFN = P.PID + '.' + 'tmp.xml' RunBlast_Formatter(SOURCE,SOURCENAME,P.OUTFMT,'xmlfile',TEMPOFN) RunBlastViewer(P.DESTINATION,TEMPOFN) # If --delete, delete the input file when done if P.DELETEFILE : if SOURCE == "-archive" : os.remove(P.IFN) else : os.remove(P.RFN) BM.exit_success() if (BM.documentor() or "-test" in sys.argv): pass else: main()