#!/usr/bin/env python """ May 31, 2013, Dr. Brian Fristensky, University of Manitoba Description: This is a front end for mrtrans. It makes sure that the names of the sequences in PROTFILE and DNAFILE are the same, and re-orders the sequences in DNAFILE, if necessary, to be in the same order as in PROTFILE. This script assumes that sequence names in PROTFILE are IDENTICAL to the corresponding names in DNAFILE. Synopsis: mrtrans.py protfile dnafile Files: protfile fasta file with multiply aligned proteins dnafile fasta file with unaligned DNA sequences @modified: May 31, 2013 @author: Brian Fristensky @contact: frist@cc.umanitoba.ca """ import sys import os blib = os.environ.get("BIRCHPYLIB") sys.path.append(blib) from birchlib import Birchmod from birchlib import Argument PROGRAM = "mrtrans.py : " USAGE = "\n\tUSAGE: mrtrans.py protfile dnafile" BM = Birchmod(PROGRAM, USAGE) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Parameters: "Wrapper class for command line parameters" def __init__(self): """ Initializes arguments: PFN="" DFN="" Then calls read_args() to fill in their values from command line """ self.PFN = "" self.DFN = "" self.read_args() def read_args(self): """ Read command line arguments into a Parameter object """ length = len(sys.argv) protfile = Argument("", str, BM) dnafile = Argument("", str, BM) protfile.set_position(length-2) dnafile.set_position(length-1) try: self.PFN = protfile.fetch() self.DFN = dnafile.fetch() except ValueError: BM.printusage() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class FILE: "Wrapper class for files" def __init__(self, FILENAME, MODE): """ @param FILENAME: The name of the file @type FILENAME: str @param MODE: Type of file operation (read/write) @type MODE:str : (r, w, rw) """ self.FN = FILENAME self.LINE = "" # most recent line read try: self.F = open(FILENAME, MODE) except: BM.fileError(FILENAME) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Sequence: def __init__(self): """ Holds name and sequence """ self.Name = "" self.Seq="" class SeqData: def __init__(self): """ Holds sequences and associated data """ self.SeqLst = [] #list of sequences self.NumSeq = 0 # number of sequences self.SeqLen = 0 #length of sequences (all must be the same) self.NumEnz = 0 #number of rest. enzymes, primrily used by RESTML # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ #---------- Set global variables P = Parameters () print "Protein file: " + P.PFN print "DNA file: " + P.DFN NAMEFLAG = '>' # 1st character on the name line, indicating # the beginning of the next data list # Read the protein alignment and unaligned DNA files # Remove extraneous fields and characters from names so that they can be compared # Make sure that, for every protein in protfile, there is a sequence with the # identical name in dnafile. # Run mrtrans, sending output to a file # Remove extraneous characters from the mrtrans output file BM.exit_success() if (BM.documentor() or "-test" in sys.argv): pass else: main()