#!/usr/bin/perl
#######################################################
#
# Written by Maria Lantin (lantin@cs.sfu.ca), 1996/11/01
#
# rsort was written to sort ace dump but
# is general enough to be used for
# other purposes.
#
# Bug reports and suggestions should be sent to
# lantin@cs.sfu.ca
#
# Added to ACEDB wtools/ directory by RD 961108
#
# $Id: rsort,v 1.1 1996/12/02 00:50:23 rd Exp $
#######################################################

use FileHandle;

ProcessOptions();

# Set the record seperator to be an empty line
# and read the file into a 2-dimensional array where
# each column is a field (line) of the record.
$infile->input_record_separator("");
$recnums = 0;
while (<$infile>) {
    push @records, [ split(/\n/) ];
}

# Sort the the records by each sort field specified in the 
# -f options.  If no fields are specified sortfields = "1"
for ($i = $#sortfields; $i >= 0; $i--) {
    $sortfield = $sortfields[$i];
    @records = sort byfield @records;
}

# reverse the order of the records if the '-d' option was specified
if ($opt_d) {
    @records = reverse @records;
}

# output the records
$outfile->output_field_separator("\n");
$outfile->output_record_separator("\n\n");
foreach $record ( @records ) {
    print $outfile @$record;
}

# Sorting routine used by the "sort" command above.  It is passed two
# records $a and $b and it must return -1 if $a < $b, +1 if $a > $b, or
# 0 if $a == $b
sub byfield {
    local($value1, $value2, $val1, $val2, $field, $sortf, $format);

    # A sortfield is of the form field[a] or field[n].  The first
    # form says to sort on field alphanumerically while the second
    # says to sort on field numerically.  The default is [a] if
    # it's not specified.  This next line parses out the field and
    # the type of sort (alphanumeric or numeric).
    $sortfield =~ /([^\[]*)(\[([an])\])?/, $sortf = $1, $format = $3;

    # if we didn't get a format, assume 'a'
    if (!$format) {
        $format = "a";
    }
    
    # if we're simply sorting on a field number then make value1 be 
    # the field we're sorting on for $a and value2 be the corresponding
    # field for $b.  It is assumed that the first field is 1.
    if ($sortf =~ /^[0-9]+$/) { # The sort field is numeric
        $value1 = $$a[$sortf-1];
        $value2 = $$b[$sortf-1];
    } else { 
        # the sort field is a string so we assume that the field name will
        # be the first string on the line.  We parse for it and make value1
        # be the rest of the line after the field name for $a and
        # value2 be the corresponding value for $b
        foreach $field (@$a) {
            $field =~ /^\s*$sortf\s+(.*)/ && ($value1 = $1, last) ;
        }
        foreach $field (@$b) {
            $field =~ /^\s*$sortf\s+(.*)/ && ($value2 = $1, last) ;
        }
    }

    if ($format eq "n") { 
        # We are sorting numerically so we must do our best to find a number
        # in the values recorded in the above statements. val1 and val2
        # become the numbers we will be comparing for $a and $b 
        # respectively.  If there are 2 numbers on the field, only the
        # first one will be picked out.
        $value1 =~ /(\d+)/ && ($val1 = $1);
        $value2 =~ /(\d+)/ && ($val2 = $1);
        $val1 <=> $val2;
    } else {
        # We are comparing alphanumerically so convert everything to
        # lowercase and compare the strings.
        lc($value1) cmp lc($value2);
    }
}

sub Usage {
    # Prints out the usage string for this utility
    
    print "This utility sorts multiline records\n";
    print "Usage: rsort [-f fieldlist] [-o outfile] [-v] [-h] [-d] [infile]\n";
    print "Options:\n";
    print "\t-d            sorts the records in descending order\n";
    print "\t-f fieldlist  sorts the record by the comma separated field list.\n";
    print "\t              A field consists of a field name or number\n";
    print "\t              followed by an optional format specifier.\n";
    print "\t              A format specifier is one of [a] or [n] where\n";
    print "\t              the first is used to sort alphanumerically (default)\n";
    print "\t              and the second is used to sort numerically.\n";
    print "\t-o file       puts sorted record into outfile\n";
    print "\t-v -h         print this message and exit\n\n";
    print "Examples:\n";
    print "\trsort -fEmbryo_division_time[n] -o sortedcells.ace cell.ace\n";
    print "\t  will sort cell.ace by Embryo_division_time numerically and\n";
    print "\t  will put the newly sorted records in sortedcells.ace\n";
    print "\trsort -f1,Embryo_division_time[n] cell.ace\n";
    print "\t  will sort cell.ace by the first field and numerically by\n";
    print "\t  the Embryo_division_time field.  The output will go\n";
    print "\t  to standard output.\n";
}

# Procedure to process the options given on the command line.
sub ProcessOptions {
    use Getopt::Long;
    GetOptions("v","h","d","f=s","o=s");

    # help
    if ($opt_v || $opt_h) { 
        Usage();
        exit(1);
    }
    # output file
    if ($opt_o) { 
        $outfile = new FileHandle($opt_o, "w");
    } else {
        $outfile = STDOUT;
    }
    # field list
    if ($opt_f) { 
        @sortfields = split(/,/,$opt_f);
    } else {
        @sortfields = 1;
    }
    
    # input file
    if ($#ARGV > 0) {
        Usage();
    } else {
        if ($#ARGV == 0) {
            $infile = new FileHandle $ARGV[0];
        } else {
            $infile = STDIN;
        }
    }
}