#!/usr/bin/env python3

import os
import os.path
import sys

# usage: csv2phyl.py csvfile
#        csv2phyl.py          {for piping output to another command} 
#  where $JOBID.NAMEFILE is a file containing the name field (ie. 
# the first field) from the .csv file.

if len(sys.argv) < 2:
    INFILE = sys.stdin
    OUTFILE = sys.stdout
else:
    INFILE = open(sys.argv[1], 'r')
    # Create an output filename OUTFILE by truncating the .CSV or .csv
    # file extension
    OUTFILE = open(os.path.splitext(os.path.basename(sys.argv[1]))[0] + '.phyl', 'w')

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Phylip_markers :
      "Wrapper class for molecular markers"
     
      def __init__(self) :
          self.Name = ""
	  self.Marker = ""
	  

# Read the inputfile, stripping out double quote characters that some csv files use to wrap strings
# We also need to strip off newline and carriage return characters at the end of each line
Mdata = []
for Line in INFILE:
    tmpline = Line.strip().translate(None, '"')
    tmplist = tmpline.partition(',')
    tmpmarker = Phylip_markers()
    tmpmarker.Name = tmplist[0]
    tmpmarker.Marker = tmplist[2].translate(None,',')
    Mdata.append(tmpmarker)     
INFILE.close()

# Find out how many lines and columns are present in the $JOBID.DATAFILE
# Phylip needs these numbers for the first line of the phylip input file
LINENUM = len(Mdata)
NUMCOL = len(Mdata[0].Marker) #all lines are assumed same length


# Create a Phylip-format input file by writing LINES and COLS onto
# the first line, and pasting the padded NAMES and DATA onto the
# remaining lines.
OUTFILE.write(str(LINENUM) + '  ' + str(NUMCOL) + '\n')

for M in Mdata:
    outname = M.Name.ljust(10, ' ')[0:10]
    OUTFILE.write(outname + M.Marker + '\n')