#!/usr/bin/env python3 """ optparse is deprecated in favor of argparse as of Python 2.7. However, since 2.7 is not always present on many systems, at this writing, it is safer to stick with optparse for now. It should be easy to change later, since the syntax is very similar between argparse and optparse. from optparse import OptionParser """ from optparse import OptionParser import datetime import getpass import os import re import shutil import stat import subprocess import sys import time ''' bl_soap-trans.py - Run SOAPdenovo-Trans Synopsis: bl_soap-trans.py --tsv tsvfile [options] @modified: April 6, 2019 @author: Brian Fristensky @contact: Brian.Fristensky@umanitoba.ca ''' #blib = os.environ.get("BIRCHPYLIB") #sys.path.append(blib) #from birchlib import Birchmod PROGRAM = "bl_soap-trans.py : " USAGE = "\n\tUSAGE: bl_soap-trans.py --tsv tsvfile [options]" DEBUG = True if DEBUG : print('bl_soap-trans: Debugging mode on') #BM = Birchmod(PROGRAM, USAGE) # - - - - - - - - - - - - - Utility classes - - - - - - - - - - - - - - - - - def chmod_ar(filename): """ Make a file world-readable. """ if os.path.exists(filename): st = os.stat(filename) os.chmod(filename, st.st_mode | stat.S_IREAD \ | stat.S_IRGRP | stat.S_IROTH) def chmod_arx(filename): """ Make a file or directory world-readable and world-executable/searchable. """ if os.path.exists(filename): st = os.stat(filename) os.chmod(filename, st.st_mode | stat.S_IEXEC | stat.S_IREAD \ | stat.S_IXGRP | stat.S_IRGRP | stat.S_IXOTH \ | stat.S_IROTH) def LocalHostname(): """ Return the name of the local machine. Tries a number of methods to get a name other than 'localhost' or a null result. """ import socket import platform def CheckName(name) : if name == None or name.startswith("localhost") or name == "" : OKAY = False else : OKAY = True return OKAY name = os.getenv('HOSTNAME') if not CheckName(name) : name = platform.uname()[1] if not CheckName(name) : if socket.gethostname().find('.')>=0: name=socket.gethostname() else: name=socket.gethostbyaddr(socket.gethostname())[0] return name def SendEmail(From,To,Subject,Text) : """ Very simple email method adapted from: http://stackoverflow.com/questions/882712/sending-html-email-using-python There are more elaborate examples on this site for sending HTML messages and attachments. """ import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText Host = 'localhost' msg = MIMEMultipart('alternative') msg['Subject'] = Subject Html = """\

%s

""" %(Text) part1 = MIMEText(Text, 'plain') part2 = MIMEText(Html, 'html') msg.attach(part1) msg.attach(part2) try: server = smtplib.SMTP(Host) server.sendmail(From, To, msg.as_string()) server.quit() print("Successfully sent email") except : print("Error: unable to send email") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: TSVFILE = "" PROGRAM = "SOAPdenovo-Trans-31mer" MERGELEVEL = 1 MINCONTIG = 100 MAXREADLEN = 100 ASMFLAGS = 3 PREFIX = "" OUTDIR = "" EMAIL = "" Then calls read_args() to fill in their values from command line """ self.TSVFILE = "." self.PROGRAM = "SOAPdenovo-Trans-31mer" self.THREADS = "2" self.MERGELEVEL = 1 self.MINCONTIG = 100 self.MAXREADLEN = 100 self.ASMFLAGS = 3 self.OUTDIR = "." self.PREFIX = "" self.EMAIL = "" self.read_args() if DEBUG : print('------------ Parameters from command line ------') print(' TSVFILE: ' + self.TSVFILE) print(' PROGRAM: ' + self.PROGRAM) print(' MERGELEVEL: ' + str(self.MERGELEVEL)) print(' MINCONTIG: ' + str(self.MINCONTIG)) print(' MAXREADLEN: ' + str(self.MAXREADLEN)) print(' ASMFLAGS: ' + str(self.ASMFLAGS)) print(' THREADS: ' + str(self.THREADS)) print(' OUTDIR: ' + self.OUTDIR) print(' PREFIX: ' + self.PREFIX) print(' EMAIL: ' + self.EMAIL) print() def read_args(self): """ Read command line arguments into a Parameter object """ parser = OptionParser() parser.add_option("--tsvfile", dest="tsvfile", action="store", default="", help="list of paired/unpaired read files") parser.add_option("--program", dest="program", action="store", default="SOAPdenovo-Trans-31mer", help="SOAPdenovo-Trans-31mer or SOAPdenovo-Trans-127mer") parser.add_option("--mergelevel", dest="mergelevel", action="store", default=1, help="strength of merging similar sequences") parser.add_option("--mincontig", dest="mincontig", action="store", default=100, help="shortest contig for scaffolding") parser.add_option("--maxreadlen", dest="maxreadlen", action="store", default=100, help="max. read length") parser.add_option("--asmflags", dest="asmflags", action="store", default=3, help="when to use reads: contigs|scaffolds|both") parser.add_option("--threads", dest="threads", action="store", default=2, help="number of threads to use") #parser.add_option("--buckets", dest="buckets", action="store_true", default=False, # help="Write number of sequences per bucket to output") parser.add_option("--outdir", dest="outdir", action="store", default=".", help="output directory") parser.add_option("--prefix", dest="prefix", action="store", default="", help="prefix for output files") parser.add_option("--email", dest="email", action="store", default="", help="send email upon completion") (options, args) = parser.parse_args() self.TSVFILE = options.tsvfile self.PROGRAM = options.program self.MERGELEVEL = options.mergelevel self.MINCONTIG = options.mincontig self.MAXREADLEN = options.maxreadlen self.ASMFLAGS = options.asmflags self.THREADS = options.threads self.OUTDIR = options.outdir self.PREFIX = options.prefix self.EMAIL = options.email # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class TSVFiles : """ Methods for reading lists of paired read TSV files, and for writing lists to output. """ def __init__(self): """ Initializes arguments: READPAIRS = [] """ self.READPAIRS = [] def ReadTSVfile(self,FN) : """ TSV file containing names of paired-end and/or single end read files. Paired-end files are on lines such as leftreadfile.fq.gzrightreadfile.fq.gz Single-end files have a each file on a separate line reads1.fq.gz reads2.fq.gz reads3.fq.gz """ TAB = '\t' F = open(FN,"r") for line in F.readlines() : line = line.strip() if len(line) > 0 and not line.startswith('#') : # get rid of double quotes that enclose fields when some programs write # output, and then split by TABs. tokens = line.replace('"','').split(TAB) # ignore blank fields. Add either single or pair of filenames # to list. Only process names from first two fields on a line # and ignore other fields. if len(tokens) > 0 : r1 = tokens[0].strip() if len(r1) > 0 : fnames = [r1] else : fnames = [] if len(tokens) > 1 : r2 = tokens[1].strip() if len(r2) > 0 : fnames.append(r2) if len(fnames) > 0 : self.READPAIRS.append(fnames) if DEBUG : print(str(self.READPAIRS)) F.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create config file for running SOAPdenovo-Trans def BuildConfig(P,TF,CFN) : # Return a code indicating the type of file(s), based on file exension(s) def FileType(PR) : FT = "" Fastq = ['.fq','.fastq'] # There are too many file extensions that have been used for fasta files, # so we assume that it it's not a fastq file, it must be a fasta file. if len(PR) == 2 : Ext1 = os.path.splitext(PR[0])[1].lower() Ext2 = os.path.splitext(PR[1])[1].lower() if Ext1 in Fastq and Ext2 in Fastq : FT = 'qn' else : FT = 'fn' else : Ext1 = os.path.splitext(PR[0])[1].lower() if Ext1 in Fastq : FT = 'q' else : FT = 'f' return FT NL = '\n' CFILE = open(CFN,'w') CFILE.write('#===== SOAPdenovo-Trans Config file) generated by bl_soap-trans.py =====' + NL) CFILE.write(NL) CFILE.write('# - - - - - PARAMETERS - - - - -' + NL) CFILE.write('max_rd_len=' + P.MAXREADLEN + NL) # Multiple libraries are not supported, but we still need this line # or SOAP crashes. CFILE.write('[LIB]' + NL) CFILE.write('asm_flags=' + str(P.ASMFLAGS) + NL) CFILE.write(NL) CFILE.write('# - - - - - READ FILES - - - - -' + NL) for PR in TF : FT = FileType(PR) if FT == 'qn' : # paired fastq files CFILE.write('q1=' + PR[0] + NL) CFILE.write('q2=' + PR[1] + NL) elif FT == 'fn' : # paired fasta files CFILE.write('f1=' + PR[0] + NL) CFILE.write('f2=' + PR[1] + NL) elif FT == 'q' : # single fastq file CFILE.write('q=' + PR[0] + NL) elif FT == 'f' : # single fasta file CFILE.write('f=' + PR[0] + NL) CFILE.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunSOAPdenovoTrans(P,PR,CFN,LOGFILE) : COMMAND=[P.PROGRAM] COMMAND.extend(["all", "-s", CFN, "-p", P.THREADS, "-M", P.MERGELEVEL, "-L", P.MINCONTIG, "-o", P.PREFIX]) # Run the command - - - - - - - - - - - - - - - - - LOGFILE.write('======== SOAPdenovo-Trans ==========' + '\n') LOGFILE.write(str(COMMAND) + '\n') StartTime = datetime.datetime.now() LOGFILE.write('Start time: ' + str(StartTime) + '\n') LOGFILE.write('\n') LOGFILE.flush() p = subprocess.Popen(COMMAND,stdout=LOGFILE, stderr=LOGFILE) p.wait() FinishTime = datetime.datetime.now() LOGFILE.write('\n') LOGFILE.write('Finish time: ' + str(FinishTime) + '\n') ElapsedTime = FinishTime - StartTime LOGFILE.write('Elapsed time: ' + str(ElapsedTime) + '\n') #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ # Read parameters from command line P = Parameters() TF = TSVFiles() if not P.TSVFILE == "" : TF.ReadTSVfile(P.TSVFILE) OKAY = True # We'll be working in the output directory, so we need # absolute file paths to the input files. for PR in TF.READPAIRS : PR[0] = os.path.abspath(PR[0]) if len(PR) == 2 : PR[1] = os.path.abspath(PR[1]) else : OKAY = False # From now on, everything happens in the output directory. if not os.path.exists(P.OUTDIR) : os.mkdir(P.OUTDIR) os.chdir(P.OUTDIR) LOGFN = 'bl_soap-trans.log' # First create the file, then re-open it so that we can append to it. # We first create with 'w' just to make sure that we're creating a fresh copy, # rather than appending to an existing copy from a previous run. LOGFILE = open(LOGFN,'w') LOGFILE.close() LOGFILE = open(LOGFN,'a') LOGFILE.write('\n') if not OKAY : LOGFILE.write('bl_soap-trans.py: >>> Need to specify a list of sequencing read files <<<' + '\n') # Create config file for running SOAPdenovo-Trans CFN = P.PREFIX + '.config' BuildConfig(P,TF.READPAIRS,CFN) # Run SOAPdenovo-Trans if OKAY : RunSOAPdenovoTrans(P,PR,CFN,LOGFILE) LOGFILE.close() # Notify user when job is done, if email address was # supplied using --email if P.EMAIL != "" : Sender = getpass.getuser() + '@' + LocalHostname() Subject = 'bl_soap-trans.py completed' Message = 'bl_soap-trans.py: Completed
' LOGFILE = open(os.path.join('bl_soap-trans.log'),'r') for line in LOGFILE.readlines() : Message = Message + line + '
' LOGFILE.close() SendEmail(Sender,[P.EMAIL],Subject,Message) if __name__ == "__main__": main() #else: #used to generate documentation # import doctest # doctest.testmod() #if (BM.documentor() or "-test" in sys.argv): # pass #else: # main()