#!/usr/bin/env python3 """ bl_stringtie.py - Given a series of Bam files, run stringtie and create GTF files that include FPKM values for genes annotated in the specified GTF file for the genome Synopsis: bl_stringtie.py tsvfile threads outdir [stringtie options] This script implements the stringtie steps in Pertea M et al. (2016) Transcript-level expression analysis of RNA-seq experiments with HISAT, StringTie and Ballgown. Nature Protocols 11:1650-1667. doi:10.1038/nprot.2016.095. For each Bam file in tsvfile, create a name for the GTF output file. The name of the output file is the longest leading string in common between the two filenames, followed by the .gtf extension. Thus, control1_R.bam control2_R.bam control3_R.bam would output to control1_R.gtf control2_R.gtf control3_R.gtf tsvfile - a tab-separated value file with filenames on separate lines. The first column is assumed to be file names. All other columns are ignored. MUST be the first argument. All stringtie arguments follow. threads - number of CPUs to use outdir - output directory [stringtie options] - options to be passed to stringtie @modified: June 2, 2018 @author: Brian Fristensky @contact: Brian.Fristensky@umanitoba.ca """ import os import subprocess import sys PROGRAM = "bl_stringtie.py : " USAGE = "\n\tUSAGE: bl_stringtie.py tsvfile threads outdir [stringtie options] " DEBUG = True if DEBUG : print('bl_stringtie: Debugging mode on') # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: TSVFILE = "" THREADS = "2" OUTDIR = "" stringtieargs = [] Then calls read_args() to fill in their values from command line """ self.TSVFILE = "" self.THREADS = "2" self.OUTDIR = "" self.stringtieargs = [] self.read_args() if DEBUG : print('------------ Parameters from command line ------') print(' TSVFILE: ' + self.TSVFILE) print(' THREADS: ' + str(self.THREADS)) print(' OUTDIR: ' + self.OUTDIR) print(' stringtieargs: ' + str(self.stringtieargs)) print() def read_args(self): """ Read command line arguments into a Parameter object """ self.TSVFILE = sys.argv[1] self.THREADS = sys.argv[2] self.OUTDIR = sys.argv[3] self.stringtieargs = sys.argv[4:] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class TSVFile : """ Methods for reading lists of Bam files, and for writing lists to output. """ def __init__(self): """ Initializes arguments: BAMFILES = [] """ self.BAMFILES = [] def ReadTSVfile(self,FN) : """ TSV file containing names of bam files. reads1.bam reads2.bam reads3.bam """ TAB = '\t' F = open(FN,"r") for line in F.readlines() : line = line.strip() if len(line) > 0 and not line.startswith('#') : # get rid of double quotes that enclose fields when some programs write # output, and then split by TABs. tokens = line.replace('"','').split(TAB) # ignore blank fields. Only process names from first field on a line # and ignore other fields. if len(tokens) > 0 : fname = tokens[0].strip() if len(fname) > 0 : self.BAMFILES.append(fname) if DEBUG : print(str(self.BAMFILES)) F.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunStringtie(BF,THREADS,OUTDIR,STRINGTIEARGS,LOGFILE) : # Return the filename minus the extension def RmExt(FN) : NAME = os.path.splitext(FN)[0] if DEBUG : print("Basename: ",NAME) return NAME # Construct the command string - - - - - - - - - - - - - - # Dummy command for testing #COMSTR=["stringtie","--help"] COMSTR=["stringtie", BF ] COMSTR.extend(['-p', THREADS]) if not os.path.exists(OUTDIR) : os.mkdir(OUTDIR) # BName is the basename of the .bam file (ie. without the .bam extension) Bname = RmExt(BF) BALLGOWN = '-B' in STRINGTIEARGS if BALLGOWN : # Step 6 in Pertea et al. # Generate output for ballgown ie. stringtie -B # Within OUTDIR, each dataset will have a sub-directory called Bname, and # all output files arising from the .bam file will begin with Bname, but have # different extensions eg. .gtf. GtfName = os.path.abspath(OUTDIR) + os.sep + Bname + os.sep + Bname + '.gtf' else: # Step 3 in Pertea et al. # Generate .gtf files for each .bam file GtfName = os.path.abspath(OUTDIR) + os.sep + Bname + '.gtf' COMSTR.extend(['-o',GtfName]) # Append the stringtie options to the command COMSTR.extend(STRINGTIEARGS) if DEBUG : print('COMSTR: ' + str(COMSTR)) # Run Stringtie - - - - - - - - - - - - - - - - - LOGFILE.write('======== stringtie ==========' + '\n') LOGFILE.write(str(COMSTR) + '\n') LOGFILE.write('\n') LOGFILE.flush() p = subprocess.Popen(COMSTR,stdout=LOGFILE,stderr=LOGFILE) p.wait() LOGFILE.write('\n') #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ # Read parameters from command line P = Parameters() TF = TSVFile() if not P.TSVFILE == "" : TF.ReadTSVfile(P.TSVFILE) # Make sure that the output directory exists if not os.path.isdir(P.OUTDIR) : os.mkdir(P.OUTDIR) LOGFN = os.path.join(P.OUTDIR,'bl_stringtie.log') LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') # Run stringtie for BF in TF.BAMFILES : RunStringtie(BF,P.THREADS,P.OUTDIR,P.stringtieargs,LOGFILE) LOGFILE.close() if __name__ == "__main__": main() #else: #used to generate documentation # import doctest # doctest.testmod() #if (BM.documentor() or "-test" in sys.argv): # pass #else: # main()