#!/usr/local/bin/python # Jan. 4, 2006, Dr. Brian Fristensky, University of Manitoba # Description: Convert a file of trees to a GDE # flat file. # Synopsis: tree2flat.py infile outfile # Files: infile file of trees in Phylip tree format # outfile GDE flat file, containing one or more # trees import sys import os import string import re # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read a file containing one or more trees, where a tree # can be defined by the regular expression # # (.*); # # In practice, it is more complicated, because we need to # strip off leading and trailing whitespace, including # newline characters. The quickest way seems to be to # read in lines and concatenate them into big lines, stripping # whitespace as we go. When a line contains a semicolon (;), # we break it into two lines and begin a new tree. # def GETTREES(FN) : TREES = [] FILE = open(FN,'r') # Read the entire file into a single, very long line BIGLINE = "" LINE = FILE.readline() while LINE != '': BIGLINE = BIGLINE + LINE.strip() LINE = FILE.readline() FILE.close() # Create a list containing all trees in BIGLINE #I don't know why these lines don't work # p = re.compile('\(.*\)\;') # p = re.compile('\(.*\);') # TREES = p.findall(BIGLINE) # This loses the ;, so we have to add it back when # writing to the output file TREES = BIGLINE.split(';') return TREES # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # For each tree in TREES, write an entry in the form # # "name # tree # # where the name is the input file name, followed by a number, # and tree is the tree on a single line def WRITETREES(OUTFILE,IFN,TREES) : # Create a base name for the trees, using IFN. # Truncate the file extension, if any, and truncate # to 8 characters, to leave some room so that we # can add a number to the name for each tree I = IFN.rfind('.') if I > -1 : TREENAME = IFN[0:I] else : TREENAME = IFN if len(TREENAME) > 8 : TREENAME = TREENAME[0:8] # Write the trees J = 1 for T in TREES: # BIGLINE.split(;) can create an extra blank line # at the end of a file. It is therefore necessary # to filter out empty lines. OUTTREE = T.strip() if len(OUTTREE) > 0 : LINE = '\"' + TREENAME + str(J) + '\n' OUTFILE.write(LINE) OUTFILE.write(OUTTREE + ';\n') J = J + 1 #======================== MAIN PROCEDURE ========================== #---------- Set global variables IFN = sys.argv[1] OFN = sys.argv[2] OUTFILE = open(OFN,'w') # Read in list of tokens if os.path.exists(IFN) : TREES = GETTREES(IFN) #Write the list as a single line of comma-separated # values WRITETREES(OUTFILE,IFN,TREES) OUTFILE.close()