#!/usr/bin/env python3 import os import os.path import phylip import shutil import subprocess import sys import time #Version February 17, 2021 # Run dnapars as a command #Synopsis: dnapars.py infile soption method replicates percent blocksize # jumble numjum outgroup parstype threshold transversion termout alltrees printdata outfile treefile alltreefile #Convert arguments to variables INFILE = sys.argv[1] # blnalign: If no file has been chosen for UFN, UFN will be blank. # Simply substituting %UFN% into the command for dnapars would # therefore result in a missing parameter. Here we concatenate # UTREE and UFN in the form UTREE_UFN. We can read both parameters from the UTREE variable, # and UFN is no longer passed as a separate variable. UTREEPARAM = sys.argv[2] UTREE = UTREEPARAM[0] UFN = UTREEPARAM[1:] SOPTION = sys.argv[3] METHOD = sys.argv[4] REPLICATES = sys.argv[5] PERCENT = int(sys.argv[6]) BLOCKSIZE = sys.argv[7] JUMBLE = sys.argv[8] NUMJUM = sys.argv[9] OUTGROUP = sys.argv[10] PARSTYPE = sys.argv[11] THRESHOLD = sys.argv[12] TRANSVERSION = sys.argv[13] TERMOUT = sys.argv[14] ALLTREES = sys.argv[15] PRINTDATA = sys.argv[16] OUTFILE = sys.argv[17] TREEFILE = sys.argv[18] ALLTREEFILE = sys.argv[19] # Make a temporary directory in which to run the program STARTDIR = os.getcwd() TEMPDIR = 'DNAPARS.' + str(os.getpid()) os.mkdir(TEMPDIR) shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.original')) if UTREE == 'y': # Turn off bootstrapping when evaluating a user tree. # If bootstrapping is on, an empty file may be generated # which could cause drawtree or drawgram to loop infinitely METHOD = 'n' # Make sure that treefile begins with number of trees on first # line of file. If first line in file has parentheses, the # number must be added. phylip.ufn(UFN, TEMPDIR) # From here on, everything is done in TEMPDIR os.chdir(TEMPDIR) # Phylip can't deal with sequence names longer than 10 characters. # Therefore, we have to replace the names with randomly assigned names, and save the # original names and random names in idkeys.tsv. We can use idkey.tsv at the end # to restore the original names to the output files. subprocess.call(["uniqid.py","--encode", "infile.original","infile.encoded","idkeys.tsv"]) subprocess.call(["phylcnv.py","-inf", "fasta", "-outf", "pint", "infile.encoded", "infile.temp"]) # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile.temp"]) if UTREE == 'y' : subprocess.call(["uniqid.py","--encodesame", "intree","intree.temp","idkeys.tsv"]) shutil.move("intree.temp","intree") # Debug statement. #p_testinfile = subprocess.call(["nedit", "intree"]) msgfile_h = open('MSGFILE', 'a') msgfile_h.write("--------------------- DNAPARS - DNA PARSIMONY METHODS ---------------------\n") msgfile_h.write("\n") #----------------- generated resampled datasets, if specified ----- # Choose resampling method # Percent of characters to sample if PERCENT < 5 or PERCENT > 100: PERCENT = 100 if METHOD == "n": msgfile_h.write(" \n") shutil.copyfile('infile.temp', 'infile') if METHOD in ("b", "d"): phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') elif METHOD in ("ps", "po", "pw"): phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile"]) #------------------------------- DNAPARS --------------------------------- termout_h = open(TERMOUT, 'w') RSEED = phylip.phylip_random() msgfile_h.write("DNAPARS: SEED= " + str(RSEED) + "\n") # This statement reads from infile, so we have to run it before # running TREEPROGRAM, which also reads infile. NUMSEQ = phylip.get_numseq("infile") os.nice(4) start_time = time.time() #----------------- generate keyboard input to send to program ----- # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. comfile_h = open('Dnaparsfile', 'w') if UTREE == 'y': comfile_h.write('u\n') # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. if METHOD != "n": JUMBLE = "J" if JUMBLE == "J": phylip.jumble(comfile_h, msgfile_h, NUMJUM) if METHOD in ("b", "d"): comfile_h.write('m\n') comfile_h.write('w\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') elif METHOD in ("ps", "po", "pw"): comfile_h.write('m\n') comfile_h.write('d\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') # Search option if SOPTION == "m": msgfile_h.write('Search option: THOROUGH\n') elif SOPTION == "l": msgfile_h.write('Search option: LESS THOROUGH\n') comfile_h.write('s\n') comfile_h.write('n\n') elif SOPTION == "r": msgfile_h.write('Search option: REARRANGE ON JUST ONE BEST TREE\n') comfile_h.write('s\n') comfile_h.write('y\n') # Outgroup OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h) # Threshold or ordinary parsimony if PARSTYPE == 't': if int(THRESHOLD) < 2: THRESHOLD = 2 comfile_h.write(PARSTYPE + '\n') comfile_h.write(str(THRESHOLD) + '\n') msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + str(THRESHOLD) + '\n') # Count all steps: a; Count only transversions: t if TRANSVERSION == 't': comfile_h.write('n\n') # Should sequence data be printed? if PRINTDATA == 'y' and METHOD == 'n': comfile_h.write('1\n') # When resampling or jumbling, turn off printing trees to outfile if METHOD in ('b', 'd', 'ps', 'po', 'pw'): comfile_h.write('3\n') #accept current settings and do the analysis comfile_h.write('y\n') comfile_h.close() #-------- Run dnapars using keyboard input from Dnaparsfile ----------- comfile_h = open('Dnaparsfile', 'r') p_PROGRAM = subprocess.Popen(['dnapars'], stdin=comfile_h) p_PROGRAM.wait() comfile_h.close() termout_h.close() msgfile_h.close() time_elapsed = time.time() - start_time os.nice(0) outfile_h = open('outfile', 'a') outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds") outfile_h.close() #----------- Return results to calling directory---------------- # When using resampling or jumbling, filter the outtree through # consense to generate an unrooted consensus tree. if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw': outfile_h = open('outfile', 'a') outfile_h.write(" \n") outfile_h.write('-------------------------------------------\n') outfile_h.close() #subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', os.path.join(STARTDIR, TREEFILE)]) #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True) subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"]) shutil.move("outfile.decoded","outfile") subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense.tmp', 'outtree.consense']) subprocess.call(["uniqid.py","--decode", "outfile.consense.tmp","outfile.consense","idkeys.tsv"]) subprocess.call(["uniqid.py","--decode", "outtree.consense","outtree.decoded","idkeys.tsv"]) shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE)) if ALLTREES == 'y': subprocess.call(["uniqid.py","--decode", "outtree","alltrees.decoded","idkeys.tsv"]) shutil.move('alltrees.decoded', os.path.join(STARTDIR, ALLTREEFILE)) else: #shutil.move('outtree', os.path.join(STARTDIR, TREEFILE)) #phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) subprocess.call(["uniqid.py","--decode", "outfile","outfile.decoded","idkeys.tsv"]) shutil.move("outfile.decoded","outfile") subprocess.call(["uniqid.py","--decode", "outtree","outtree.decoded","idkeys.tsv"]) shutil.move("outtree.decoded", os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) os.chdir(STARTDIR) shutil.rmtree(TEMPDIR) print('DNAPARS Completed')