#!/usr/bin/env python """ rebasecnv.py - Create subsets of REBASE based on types of cutting sites. Synopsis: rebasecnv.py --infile infile --outfile outfile [-outfmt name|number ] [--prototypes] [--commercial] [--ends 5|3|b] [--symmetric a|s|b] @modified: January 4, 2017 @author: Brian Fristensky @contact: frist@cc.umanitoba.ca """ import os import subprocess import sys #optparse is deprecated in favor of argparse as of Python 2.7. However, # since 2.7 is not always present on many systems, at this writing, # it is safer to stick with optparse for now. It should be easy # to change later, since the syntax is very similar between argparse and optparse. from optparse import OptionParser blib = os.environ.get("BIRCHPYLIB") sys.path.append(blib) from birchlib import Birchmod PROGRAM = "rebasecnv.py : " USAGE = "\n\tUSAGE: rebasecnv.py --infile infile --outfile outfile [-outfmt name|number ] [--prototypes] [--commercial] [--ends 5|3|b] [--symmetry a|s]" BM = Birchmod(PROGRAM, USAGE) DEBUG = True class Parameters: """ Wrapper class for command line parameters """ def __init__(self): """ Initializes arguments: IFN="" OFN="" OUTFMT="bairoch" PROTOTYPES=False COMMERCIAL=False ENDS=["5","3","b"] SYMMETRY=["a","s"] PID = str(os.getpid()) Then calls read_args() to fill in their values from command line """ self.IFN = "" self.OFN = "" self.OUTFMT="bairoch" self.PROTOTYPES=False self.COMMERCIAL=False self.ENDS=["5","3","b"] self.SYMMETRY=["a","s"] self.PID = str(os.getpid()) self.read_args() def read_args(self): """ Read command line arguments into a Parameter object """ parser = OptionParser() parser.add_option("--infile", dest="infile", action="store", type="string", default="", help="input file in Bairoch format") parser.add_option("--outfile", dest="outfile", action="store", type="string", default="", help="output file") parser.add_option("--outfmt", dest="outfmt", action="store", type="string", default="bairoch", help="name or number of output format") parser.add_option("--prototypes", dest="prototypes", action="store_true", default=False, help="Write prototype enzymes only to output") parser.add_option("--commercial", dest="commercial", action="store_true", default=False, help="Write commercially available enzymes only to output") parser.add_option("--ends", dest="ends", action="store", type="string", default="53b", help="Type of ends") parser.add_option("--symmetry", dest="symmetry", action="store", type="string", default="as", help="asymmetric or symmetric sites") (options, args) = parser.parse_args() self.IFN = options.infile self.OFN = options.outfile self.OUTFMT = options.outfmt self.PROTOTYPES = options.prototypes self.COMMERCIAL = options.commercial self.ENDS=list(options.ends) #break up string into a list of characters self.SYMMETRY=list(options.symmetry) #break up string into a list of characters if DEBUG : print('------------ Parameters from command line ------' + '\n') print(' IFN: ' + self.IFN) print(' OFN: ' + self.OFN) print(' OUTFMT: ' + self.OUTFMT) print(' PROTOTYPES: ' + str(self.PROTOTYPES)) print(' COMMERCIAL: ' + str(self.COMMERCIAL)) print(' ENDS: ' + str(self.ENDS)) print(' SYMMETRY: ' + str(self.SYMMETRY)) class Enzyme : def __init__(self): """ Initialize enzyme """ self.LINES = [] self.ID = "" self.PROTOTYPE = False self.COMMERCIAL = False self.ENDS="" self.SYMMETRY="" def readenz(self,INFILE,LINE,OUTFILE) : """ Read the next enzyme from the file. """ while (LINE != "") and (not LINE.startswith('ID')) : #OUTFILE.write(LINE) LINE=INFILE.readline() while not LINE.startswith('//') : self.LINES.append(LINE) LINE = INFILE.readline() def getID(self,ELINE) : """ Get the ID from ELINE. """ return ELINE[5:].strip() def getCOMMERCIAL(self,ELINE) : """ Get COMMERCIAL from ELINE. """ COM=ELINE[5:].strip() if COM == "." : return False else : return True def getPROTO(self,ELINE) : """ Get PROTOTYPE from ELINE. """ PID = ELINE[5:].strip() if PID == self.ID : return True def writeenz(self,OUTFILE) : """ Write enzyme to output file. """ OUTFILE.write('\n') for LINE in self.LINES : OUTFILE.write(LINE) OUTFILE.write('//' + '\n') #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ P = Parameters () INFILE=open(P.IFN,'r') OUTFILE=open(P.OFN,'w') #Copy header comment lines from infile to outfile LINE = INFILE.readline() while LINE.startswith('CC') : OUTFILE.write(LINE) LINE = INFILE.readline() # Read an enzyme at a time. For each enzyme, find out whether it # meets criteria for prototypes, commercial, symmetry and ends. # If so, print the enzyme to the output. E = Enzyme () while LINE != "" : E.readenz(INFILE,LINE,OUTFILE) #OUTFILE.write(LINE) # Get criteria information from the enzyme OKAY = True for ELINE in E.LINES : FIELD=ELINE[0:2] if FIELD == 'ID' : E.ID = E.getID(ELINE) elif FIELD == 'PT' : E.PROTOTYPE = E.getPROTO(ELINE) elif FIELD == 'CR' : E.COMMERCIAL = E.getCOMMERCIAL(ELINE) # Set OKAY if criteria from enzyme E match criteria from command line parameters P if P.PROTOTYPES and not E.PROTOTYPE : OKAY = False if P.COMMERCIAL and not E.COMMERCIAL : OKAY = False # If enzyme meets all criteria, print it to output if OKAY : if DEBUG: print(E.ID) E.writeenz(OUTFILE) # Get the next enzyme LINE = INFILE.readline() E.__init__() INFILE.close() OUTFILE.close() BM.exit_success() if (BM.documentor() or "-test" in sys.argv): pass else: main()