#!/usr/bin/env python3 """ bl_gffread.py - Given a series of gff/gff3 files, run gffread and create GTF files Synopsis: bl_gffread.py tsvfile [gffread options] tsvfile - a tab-separated value file with filenames on separate lines. The first column is assumed to be file names. All other columns are ignored. MUST be the first argument. All gffread arguments follow. [gffread options] - options to be passed to gffread @modified: June 6, 2018 @author: Brian Fristensky @contact: Brian.Fristensky@umanitoba.ca """ import os import subprocess import sys PROGRAM = "bl_gffread.py : " USAGE = "\n\tUSAGE: bl_gffread.py tsvfile [gffread options] " DEBUG = True if DEBUG : print('bl_gffread: Debugging mode on') # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: TSVFILE = "" gffreadargs = [] Then calls read_args() to fill in their values from command line """ self.TSVFILE = "" self.gffreadargs = [] self.read_args() if DEBUG : print('------------ Parameters from command line ------') print(' TSVFILE: ' + self.TSVFILE) print(' gffreadargs: ' + str(self.gffreadargs)) print() def read_args(self): """ Read command line arguments into a Parameter object """ self.TSVFILE = sys.argv[1] self.gffreadargs = sys.argv[2:] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class TSVFile : """ Methods for reading lists of gff files, and for writing lists to output. """ def __init__(self): """ Initializes arguments: GFFFILES = [] """ self.GFFFILES = [] def ReadTSVfile(self,FN) : """ TSV file containing names of bam files. reads1.gff reads2.gff reads3.gff """ TAB = '\t' F = open(FN,"r") for line in F.readlines() : line = line.strip() if len(line) > 0 and not line.startswith('#') : # get rid of double quotes that enclose fields when some programs write # output, and then split by TABs. tokens = line.replace('"','').split(TAB) # ignore blank fields. Only process names from first field on a line # and ignore other fields. if len(tokens) > 0 : fname = tokens[0].strip() if len(fname) > 0 : self.GFFFILES.append(fname) if DEBUG : print(str(self.GFFFILES)) F.close() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def RunGffread(GF,gffreadargs,LOGFILE) : # Return the filename minus the extension def RmExt(FN) : NAME = os.path.splitext(FN)[0] if DEBUG : print("Basename: ",NAME) return NAME # Construct the command string - - - - - - - - - - - - - - COMSTR=["gffread", GF ] # BName is the basename of the .gff file (ie. without the .gff extension) Bname = RmExt(GF) # Generate .gtf files for each .bam file GtfName = Bname + '.gtf' COMSTR.extend(['-o',GtfName]) # Append the gffread options to the command COMSTR.extend(gffreadargs) if DEBUG : print('COMSTR: ' + str(COMSTR)) # Run gffread - - - - - - - - - - - - - - - - - LOGFILE.write('======== gffread ==========' + '\n') LOGFILE.write(str(COMSTR) + '\n') LOGFILE.write('\n') LOGFILE.flush() p = subprocess.Popen(COMSTR,stdout=LOGFILE,stderr=LOGFILE) p.wait() LOGFILE.write('\n') #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ # Read parameters from command line P = Parameters() TF = TSVFile() if not P.TSVFILE == "" : TF.ReadTSVfile(P.TSVFILE) LOGFN = os.path.join('bl_gffread.log') LOGFILE = open(LOGFN,'w') LOGFILE.write('\n') # Run gffread for GF in TF.GFFFILES : if GF.lower().endswith(('.gff','.gff3')) : RunGffread(GF,P.gffreadargs,LOGFILE) LOGFILE.close() if __name__ == "__main__": main() #else: #used to generate documentation # import doctest # doctest.testmod() #if (BM.documentor() or "-test" in sys.argv): # pass #else: # main()