#!/bin/sh # usage: csv2phyl.sh csvfile # csv2phyl.sh {for piping output to another command} # where $JOBID.NAMEFILE is a file containing the name field (ie. # the first field) from the .csv file. unset noclobber JOBID=$$ # RM_CMD - command to be used for removing files and directories if [ -f /usr/bin/rm ] then RM_CMD=/usr/bin/rm else if [ -f /bin/rm ] then RM_CMD=/bin/rm else RM_CMD=rm fi fi INFILE=$1 if [ "$#" = "1" ] then PIPEOUTPUT=1 OUTFILE=$JOBID.phyl else PIPEOUTPUT=0 # Create an output filename OUTFILE by truncating the .CSV or .csv # file extension BASENAME=`echo $INFILE | sed -e "s/\.[cC][sS][vV]$//"` OUTFILE=$BASENAME.phyl fi # Create two files: one file with just the names, # and another file with the remaining data ie. the markers cut -f1 -d"," < $INFILE > $JOBID.NAMEFILE1 cut -f2- -d"," < $INFILE > $JOBID.DATAFILE1 # Create a new file by padding the names in $JOBID.NAMEFILE with # to a width of 10 characters, as required by Phylip. sed -e 's/.*/& /' < $JOBID.NAMEFILE1 | cut -c 1-10 > $JOBID.NAMEFILE2 # Find out how many lines and columns are present in the $JOBID.DATAFILE # Phylip needs these numbers for the first line of the phylip input file LINENO=`wc -l $JOBID.DATAFILE1 | tr -s ' ' | cut -f2 -d ' '` # Calculating the number of columns in $JOBID.DATAFILE is a bit more tricky. # We first create $JOBID.DATAFILE2 by stripping out all the commas from # $JOBID.DATAFILE1. Next, we create a $JOBID.TESTFILE containing the first line # of $JOBID.DATAFILE2. The number of characters in $JOBID.TESTFILE should be the # number of columns in $JOBID.DATAFILE2 # Delete carriage return and linefeed characters that might be # at the end of the line when writing $JOBID.TESTFILE tr -d "," < $JOBID.DATAFILE1 > $JOBID.DATAFILE2 head -1 < $JOBID.DATAFILE2 | tr -d '\012' | tr -d '\015' > $JOBID.TESTFILE COLNO=`wc -m $JOBID.TESTFILE | tr -s ' ' | cut -f2 -d ' '` # Create a Phylip-format input file by writing LINES and COLS onto # the first line, and pasting the padded NAMES and DATA onto the # remaining lines. echo $LINENO $COLNO > $OUTFILE paste $JOBID.NAMEFILE2 $JOBID.DATAFILE2 >> $OUTFILE if [ "$PIPEOUTPUT" = "1" ] then cat $OUTFILE fi # Clean up $RM_CMD -f $JOBID.*