#!/usr/bin/env python3 ''' bl_rcorrector.py - Given a series of paired-end read files, run rcorrector for each pair of files, and generate output files for each pair. Synopsis: bl_rcorrector.py tsvfile threads outdir [rcorrector options] control1_R1.fastq.gzcontrol1_R2.fastq.gz would output to control1_R.bam tsvfile - a tab-separated value file with each pair of filenames on separate lines MUST be the first argument. All rcorrector arguments follow. outdir - director for writing corrected files [rcorrector options] - options to be passed to rcorrector @modified: March 2, 2019 @author: Brian Fristensky @contact: Brian.Fristensky@umanitoba.ca ''' """ optparse is deprecated in favor of argparse as of Python 2.7. However, since 2.7 is not always present on many systems, at this writing, it is safer to stick with optparse for now. It should be easy to change later, since the syntax is very similar between argparse and optparse. from optparse import OptionParser """ from optparse import OptionParser import os #import re #import stat import subprocess import sys PROGRAM = "bl_rcorrector.py : " USAGE = "\n\tUSAGE: bl_rcorrector.py tsvfile threads outdir [rcorrector options] " DEBUG = True if DEBUG : print('bl_rcorrector: Debugging mode on') # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: TSVFILE = "" THREADS = "2" OUTDIR = "reads.corrected" rcorrectorargs = [] Then calls read_args() to fill in their values from command line """ self.TSVFILE = "" self.THREADS = "2" self.OUTDIR = "reads.corrected" self.rcorrectorargs = [] self.read_args() if DEBUG : print('------------ Parameters from command line ------') print(' TSVFILE: ' + self.TSVFILE) print(' THREADS: ' + str(self.THREADS)) print(' OUTDIR: ' + self.OUTDIR) print(' rcorrectorargs: ' + str(self.rcorrectorargs)) print() def read_args(self): """ Read command line arguments into a Parameter object """ self.TSVFILE = sys.argv[1] self.THREADS = sys.argv[2] self.OUTDIR = sys.argv[3] self.rcorrectorargs = sys.argv[4:] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class TSVFiles : """ Methods for reading lists of paired read TSV files, and for writing lists to output. """ def __init__(self): """ Initializes arguments: READPAIRS = [] """ self.READPAIRS = [] def ReadTSVfile(self,FN) : """ TSV file containing names of paired-end and/or single end read files. Paired-end files are on lines such as leftreadfile.fq.gzrightreadfile.fq.gz Single-end files have a each file on a separate line reads1.fq.gz reads2.fq.gz reads3.fq.gz """ TAB = '\t' F = open(FN,"r") for line in F.readlines() : line = line.strip() if len(line) > 0 and not line.startswith('#') : # get rid of double quotes that enclose fields when some programs write # output, and then split by TABs. tokens = line.replace('"','').split(TAB) # ignore blank fields. Add either single or pair of filenames # to list. Only process names from first two fields on a line # and ignore other fields. if len(tokens) > 0 : r1 = tokens[0].strip() if len(r1) > 0 : fnames = [r1] else : fnames = [] if len(tokens) > 1 : r2 = tokens[1].strip() if len(r2) > 0 : fnames.append(r2) if len(fnames) > 0 : self.READPAIRS.append(fnames) if DEBUG : print(str(self.READPAIRS)) F.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def Runrcorrector(PR,THREADS,OUTDIR,rcorrectorargs,LOGFILE) : # Make sure output directory exists # We allow overwriting results from a previous run of Rcorrector. if not os.path.isdir(OUTDIR) : os.mkdir(OUTDIR) # Construct the command string - - - - - - - - - - - - - - # Dummy command for testing #COMSTR=["rcorrector","--help"] COMSTR=["run_rcorrector.pl"] # Add the names of the left and right read files if len(PR) > 0 : if len(PR) == 1 : COMSTR.extend(['-s', PR[0], ' ']) else: COMSTR.extend(['-1', PR[0], '-2', PR[1]]) # Append the rcorrector options to the command COMSTR.extend(['-t', THREADS]) COMSTR.extend(['-od', OUTDIR]) COMSTR.extend(rcorrectorargs) if DEBUG : print('COMSTR: ' + str(COMSTR)) # Run rcorrector - - - - - - - - - - - - - - - - - LOGFILE.write('======== rcorrector ==========' + '\n') LOGFILE.write(str(COMSTR) + '\n') LOGFILE.write('\n') LOGFILE.flush() p = subprocess.Popen(COMSTR,stdout=LOGFILE,stderr=LOGFILE) p.wait() LOGFILE.write('\n') #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ # Read parameters from command line P = Parameters() TF = TSVFiles() if not P.TSVFILE == "" : TF.ReadTSVfile(P.TSVFILE) LOGFN = os.path.join('bl_rcorrector.log') LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') # Run rcorrector for PR in TF.READPAIRS : Runrcorrector(PR,P.THREADS,P.OUTDIR,P.rcorrectorargs,LOGFILE) LOGFILE.close() if __name__ == "__main__": main() #else: #used to generate documentation # import doctest # doctest.testmod() #if (BM.documentor() or "-test" in sys.argv): # pass #else: # main()