#!/usr/bin/env python3 import os import os.path import phylip import shutil import subprocess import sys import time #Version Feb. 17, 2021 # Run protpars as a command #Synopsis: protpars.csh infile utree_ufn method replicates percent blocksize # jumble numjum outgroup parstype threshold gc termout printdata outfile treefile alltreefile #Convert arguments to variables INFILE = sys.argv[1] # blpalign: If no file has been chosen for UFN, UFN will be blank. # Simply substituting %UFN% into the command for protpars would # therefore result in a missing parameter. Here we concatenate # UTREE and UFN in the form UTREE_UFN. We can read both parameters from the UTREE variable, # and UFN is no longer passed as a separate variable. UTREEPARAM = sys.argv[2] UTREE = UTREEPARAM[0] UFN = UTREEPARAM[1:] METHOD = sys.argv[3] REPLICATES = sys.argv[4] PERCENT = sys.argv[5] BLOCKSIZE = sys.argv[6] JUMBLE = sys.argv[7] NUMJUM = sys.argv[8] OUTGROUP = sys.argv[9] PARSTYPE = sys.argv[10] THRESHOLD = sys.argv[11] GC = sys.argv[12] TERMOUT = sys.argv[13] ALLTREES = sys.argv[14] PRINTDATA = sys.argv[15] OUTFILE = sys.argv[16] TREEFILE = sys.argv[17] ALLTREEFILE = sys.argv[18] # Make a temporary directory in which to run the program STARTDIR = os.getcwd() TEMPDIR = 'PROTPARS.' + str(os.getpid()) os.mkdir(TEMPDIR) shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.original')) if UTREE == 'y': # Turn off bootstrapping when evaluating a user tree. # If bootstrapping is on, an empty file may be generated # which could cause drawtree or drawgram to loop infinitely METHOD = 'n' # protpars does NOT want the first line to have a number. # This is distinct from proml, which requires that the first line # of intree tell the number of trees. So we copy the file as is. shutil.copyfile(UFN, os.path.join(TEMPDIR, "intree")) # From here on, everything is done in TEMPDIR os.chdir(TEMPDIR) # Phylip can't deal with sequence names longer than 10 characters. # Therefore, we have to replace the names with randomly assigned names, and save the # original names and random names in idkeys.tsv. We can use idkey.tsv at the end # to restore the original names to the output files. subprocess.call(["uniqid.py","--encode", "infile.original","infile.encoded","idkeys.tsv"]) subprocess.call(["phylcnv.py","-inf", "fasta", "-outf", "pint", "infile.encoded", "infile.temp"]) # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile.temp"]) if UTREE == 'y' : subprocess.call(["uniqid.py","--encodesame", "intree","intree.temp","idkeys.tsv"]) shutil.move("intree.temp","intree") # Debug statement. #p_testinfile = subprocess.call(["nedit", "intree"]) msgfile_h = open('MSGFILE', 'a') msgfile_h.write("--------------------- PROTPARS - PROTEIN PARSIMONY METHODS ---------------------\n") msgfile_h.write("\n") # Percent of characters to sample if int(PERCENT) < 5 or int(PERCENT) > 100: PERCENT = 100 #----------------- generated resampled datasets, if specified ----- # Choose resampling method if METHOD == "n": msgfile_h.write(" \n") shutil.copyfile('infile.temp', 'infile') if METHOD in ("b", "d"): phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') elif METHOD in ("ps", "po", "pw"): phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile"]) #----------------- generate keyboard input to send to program ----- termout_h = open(TERMOUT, 'w') RSEED = phylip.phylip_random() msgfile_h.write("PROTPARS: SEED= " + str(RSEED) + '\n') # This statement reads from infile, so we have to run it before # running TREEPROGRAM, which also reads infile. NUMSEQ = phylip.get_numseq("infile") # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. comfile_h = open('Protparsfile', 'w') if UTREE == 'y': comfile_h.write('u\n') # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. if METHOD != "n": JUMBLE = "J" if JUMBLE == "J": phylip.jumble(comfile_h, msgfile_h, NUMJUM) # Outgroup OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h) # Threshold or ordinary parsimony if PARSTYPE == 't': if int(THRESHOLD) < 2: THRESHOLD = 2 comfile_h.write(PARSTYPE + '\n') comfile_h.write(str(THRESHOLD) + '\n') msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + str(THRESHOLD) + '\n') if METHOD in ("b", "d"): comfile_h.write('m\n') comfile_h.write('w\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') elif METHOD in ("ps", "po", "pw"): comfile_h.write('m\n') comfile_h.write('d\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') # Genetic code if GC == "u" : msgfile_h.write('Using Genetic Code: UNIVERSAL\n') elif GC == "m" : msgfile_h.write('Using Genetic Code: MITOCHONDRIAL\n') comfile_h.write("c\n") comfile_h.write("m\n") elif GC == "v" : msgfile_h.write('Using Genetic Code: VERTEBRATE MITOCHONDRIAL\n') comfile_h.write("c\n") comfile_h.write("v\n") elif GC == "f" : msgfile_h.write('Using Genetic Code: FLY MITOCHONDRIAL\n') comfile_h.write("c\n") comfile_h.write("f\n") elif GC == "y" : msgfile_h.write('Using Genetic Code: YEAST MITOCHONDRIAL\n') comfile_h.write("c\n") comfile_h.write("y\n") else: msgfile_h.write('Using Genetic Code: UNIVERSAL\n') # Should sequence data be printed? if PRINTDATA == 'y' and METHOD == 'n': comfile_h.write('1\n') # When resampling or jumbling, turn off printing trees to outfile if METHOD in ('b', 'd', 'ps', 'po', 'pw'): comfile_h.write("3\n") #accept current settings and do the analysis comfile_h.write("y\n") comfile_h.close() #-------- Run protpars ----------- os.nice(4) start_time = time.time() comfile_h = open('Protparsfile', 'r') p_PROGRAM = subprocess.Popen(['protpars'], stdin=comfile_h) p_PROGRAM.wait() comfile_h.close() termout_h.close() msgfile_h.close() time_elapsed = time.time() - start_time outfile_h = open('outfile', 'a') outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds") outfile_h.close() os.nice(0) #----------- Return results to calling directory---------------- # When using resampling or jumbling, filter the outtree through # consense to generate an unrooted consensus tree. if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw': outfile_h = open('outfile', 'a') outfile_h.write(" \n") outfile_h.write('-------------------------------------------\n') outfile_h.close() #subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', os.path.join(STARTDIR, TREEFILE)]) #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True) subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"]) shutil.move("outfile.decoded","outfile") subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense.tmp', 'outtree.consense']) subprocess.call(["uniqid.py","--decode", "outfile.consense.tmp","outfile.consense","idkeys.tsv"]) subprocess.call(["uniqid.py","--decode", "outtree.consense","outtree.decoded","idkeys.tsv"]) shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE)) if ALLTREES == 'y': subprocess.call(["uniqid.py","--decode", "outtree","alltrees.decoded","idkeys.tsv"]) shutil.move('alltrees.decoded', os.path.join(STARTDIR, ALLTREEFILE)) else: #shutil.move('outtree', os.path.join(STARTDIR, TREEFILE)) #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"]) shutil.move("outfile.decoded","outfile") subprocess.call(["uniqid.py","--decode", "outtree","outtree.decoded","idkeys.tsv"]) shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) os.chdir(STARTDIR) shutil.rmtree(TEMPDIR) print('protpars Completed')