#!/usr/bin/env python3 """ blfastaout.py - Translate FASTA table output into other formats, and open those files using the appropriate application, or write output to a file. Synopsis: blfastaout.py filename [--delete] --outfmt format --destination dest [--outfile filename] --delete - Delete infile when finished. This is mainly intended for running from BioLegato, where infile is a temporary file. --outfmt format - format is any output format supported by blast_formatter --destination dest - dest is one of the following: For FASTA viewable Report: textedit - open output files in text editor specified by the $BL_TextEditor environment variable browser - open in web browser specified by $BL_browser textfile - write to files, using the basename specified by destination. htmlfile - write to HTML file For FASTA tsv Report: blnfetch - Open in blnfetch, BioLegato interface for retrieving DNA/RNA entries using ACCESSION numbers blpfetch - Open in blpfetch, BioLegato interface for retrieving protein entries using ACCESSION numbers tsvfile - write to a tsvfile. --outfile - filename for saving an output file. @modified: January 22, 2020 @author: Brian Fristensky @contact: brian.fristensky@umanitoba.ca """ import argparse import os import shutil import subprocess import sys #blib = os.environ.get("BIRCHPYLIB") #sys.path.append(blib) #from birchlib import Birchmod #PROGRAM = "blfastaout.py : " #USAGE = "\n\tUSAGE: blfastaout.py filename [--delete] --outfmt format --destination dest [--outfile filename]" #BM = Birchmod(PROGRAM, USAGE) DEBUG = True class Parameters: """ Wrapper class for command line parameters By default, ETHRESHOLD is set to 10000, so that all hits will be returned, if -e is not set at the command line """ def __init__(self): """ Initializes arguments: IFN="" OUTFMT="" DESTINATION="" OFN="" PID = str(os.getpid()) Then calls read_args() to fill in their values from command line """ self.IFN = "" self.DELETEFILE = False self.OUTFMT="" self.DESTINATION = "" self.OFN = "" self.PID = str(os.getpid()) self.read_args() def read_args(self): """ Read command line arguments into a Parameter object """ parser = argparse.ArgumentParser() parser.add_argument("infile", type=str, help="Input filename") parser.add_argument("--delete", action="store_true", default=False, help="Delete input file when done") parser.add_argument("--outfmt", type=str, default="fasta", help="Output format specified by BLAST -outfmt option") parser.add_argument("--destination", type=str, default="blnfetch", help="Name of program to launch or type of file to save") parser.add_argument("--outfile", type=str, default="", help="Filename used when action is to save a file") args = parser.parse_args() self.IFN = args.infile self.DELETEFILE = args.delete self.OUTFMT = args.outfmt self.DESTINATION = args.destination self.OFN = args.outfile if DEBUG : print('------------ Parameters from command line ------' + '\n') print(' IFN: ' + self.IFN) print(' DELETEFILE: ' + str(self.DELETEFILE)) print(' OUTFMT: ' + self.OUTFMT) print(' DESTINATION: ' + self.DESTINATION) print(' OFN: ' + self.OFN) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunBlast_Formatter(IFN,OUTFMT,FILETYPE,OFN): """ Run blast_formatter to convert an NCBI ASN.1 archive (outfmt=11) into other supported formats IFN - filename or RID string OUTFMT - -outfmt field used by BLAST or blast_formatter FILETYPE - htmlfile|tsvfile|textfile """ # For print formats giving tab-separated values, blfastaout.py takes a comma-separated list, # eg. 7,sacc,slen,evalue # We have to convert it to a string blast_formatter recognizes # eg. '7 sacc slen evalue' # For other formats, we just use the OUTFMT used in the command line # If we run blastformatter using os.system, the PRFMT needs to be enclosed by quotes # If we run blastformatter using subprocess, it must not be enclosed by quotes TOKENS = OUTFMT.split(",") if TOKENS[0] in ["6","7","10"] : #PRFMT = "'" PRFMT="" for T in TOKENS : PRFMT = PRFMT + " " + T #PRFMT = PRFMT + "'" else : PRFMT=OUTFMT #COMMAND = 'blast_formatter ' + IFN + ' -outfmt ' + PRFMT if FILETYPE == 'htmlfile' : p = subprocess.Popen(['blast_formatter',IFN, '-outfmt',PRFMT,'-html','-out',OFN]) #COMMAND = COMMAND + ' -html ' + ' -out ' + OFN elif FILETYPE == 'tsvfile' : #COMMAND = COMMAND + " -out " + OFN p = subprocess.Popen(['blast_formatter',IFN, '-outfmt',PRFMT,'-out',OFN]) else : #COMMAND = COMMAND + ' -out ' + OFN p = subprocess.Popen(['blast_formatter',IFN, '-outfmt',PRFMT,'-out',OFN]) p.wait() return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def ParseFastaTable(IFN,OFN): """ For tabular FASTA output, parse the Fields line so that the names of fields appear as column headings. """ # First, get rid of the leftmost field, which has just the query seq. name # Next, eliminate everything except the accession number from field 2 # return a line of tab-separated fields. def fixline(l) : outline = "" fields = l.split("\t") f2 = fields[1].split("|")[3] outline = f2 x = 2 while x < len(fields): outline = outline + "\t" + fields[x] x += 1 return outline tempfile=open(IFN,"r") lines = tempfile.readlines() tempfile.close() outfile=open(OFN,"w") # First, we process the comment lines, printing all comments but the Field line # Then, we print the modified field line. i = int(0) while (i < len(lines)) and (lines[i].startswith("#")) : # Hold on to the comment line with the column headings. if lines[i].startswith("# Fields:") : FieldLine = lines[i] else : outfile.write(lines[i]) i += 1 # We need to get rid of the column heading for the query id # before writing out FieldLine tmpfields = FieldLine.split(",")[1:] FieldLine = "#" + tmpfields[0] for f in tmpfields[1:] : FieldLine = FieldLine + "\t" + f outfile.write(FieldLine) # Now write the rest of the file while i < len(lines) : if not lines[i].startswith("#") : outline = fixline(lines[i]) print(outline) outfile.write(outline) i += 1 outfile.close() return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RUNTEXTEDIT(OFN): """ Run the texteditor in the background and remove the temporary file when done """ COMMAND = '(nohup `choose_edit_wrapper.sh` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&' # It's surprising how many issues there are with launching multiple # files in a text editor. choose_edit_wrapper.sh takes care of # these issues. #COMMAND = '($BL_TextEditor ' + OFN + '; $RM_CMD ' + OFN + ')&' os.system(COMMAND) return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RUNBROWSER(OFN): """ Run web browser in the background and remove the temporary file when done """ #COMMAND = '(nohup `chooseviewer.py` ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&' p = subprocess.Popen(['chooseviewer.py', OFN, '--delete']) p.wait() #COMMAND = '(nohup $BL_Browser ' + OFN + '; $RM_CMD ' + OFN + ' > /dev/null)&' #os.system(COMMAND) return # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunBioLegato(DESTINATION, FN): """ Run the blnfetch or blpfetch in the background and remove the temporary file when done """ COMMAND = '(nohup ' + DESTINATION + ' ' + FN + '; rm -f ' + FN + ' > /dev/null)&' os.system(COMMAND) return #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ P = Parameters () # Write the output to a file, or send it to a window, as specified # in --destination if P.DESTINATION == 'textedit': RUNTEXTEDIT(P.IFN) elif P.DESTINATION == 'browser': RUNBROWSER(P.IFN) elif P.DESTINATION == 'textfile': shutil.copy2(P.IFN,P.OFN) elif P.DESTINATION == 'htmlfile': shutil.copy2(P.IFN,P.OFN) elif P.DESTINATION in ['blnfetch','blpfetch','tsvfile'] : TEMPOFN = P.PID + '.' + 'tmp' #RunBlast_Formatter(P.IFN,P.OUTFMT,'tsvfile',TEMPOFN) PARSEDOFN = P.PID + '.' + 'tsv' ParseFastaTable(P.IFN,PARSEDOFN) if P.DESTINATION in ['blnfetch','blpfetch'] : RunBioLegato(P.DESTINATION,PARSEDOFN) else : os.rename(PARSEDOFN,P.OFN) if os.path.exists(TEMPOFN) : os.remove(TEMPOFN) # If --delete, delete the input file when done if P.DELETEFILE : os.remove(P.IFN) #BM.exit_success() #if (BM.documentor() or "-test" in sys.argv): if "-test" in sys.argv: pass else: main()