#!/usr/bin/env python3 import os import os.path import sys # usage: csv2phyl.py csvfile # csv2phyl.py {for piping output to another command} # where $JOBID.NAMEFILE is a file containing the name field (ie. # the first field) from the .csv file. if len(sys.argv) < 2: INFILE = sys.stdin OUTFILE = sys.stdout else: INFILE = open(sys.argv[1], 'r') # Create an output filename OUTFILE by truncating the .CSV or .csv # file extension OUTFILE = open(os.path.splitext(os.path.basename(sys.argv[1]))[0] + '.phyl', 'w') # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Phylip_markers : "Wrapper class for molecular markers" def __init__(self) : self.Name = "" self.Marker = "" # Read the inputfile, stripping out double quote characters that some csv files use to wrap strings # We also need to strip off newline and carriage return characters at the end of each line Mdata = [] for Line in INFILE: tmpline = Line.strip().translate(None, '"') tmplist = tmpline.partition(',') tmpmarker = Phylip_markers() tmpmarker.Name = tmplist[0] tmpmarker.Marker = tmplist[2].translate(None,',') Mdata.append(tmpmarker) INFILE.close() # Find out how many lines and columns are present in the $JOBID.DATAFILE # Phylip needs these numbers for the first line of the phylip input file LINENUM = len(Mdata) NUMCOL = len(Mdata[0].Marker) #all lines are assumed same length # Create a Phylip-format input file by writing LINES and COLS onto # the first line, and pasting the padded NAMES and DATA onto the # remaining lines. OUTFILE.write(str(LINENUM) + ' ' + str(NUMCOL) + '\n') for M in Mdata: outname = M.Name.ljust(10, ' ')[0:10] OUTFILE.write(outname + M.Marker + '\n')