#!/usr/bin/env perl

=head1 NAME

parseNewblerMetrics.pl

=head1 SYNOPSIS

  parseNewblerMetrics.pl [options] <454NewblerMetrics.txt>

  Options:
  -sff <file>  Name of output sff file (optional; if omitted, -lib must be specified.)
  -lib <file>  Name of output library file (optional; if omitted, -sff must be specified.)
  -h help message (optional)

=head1 DESCRIPTION

This script parses the 454NewblerMetrics.txt file generated by the Newbler
assembler to produce a file containing sff locations and/or a file containing
the library insert size and std deviation.

The format of the sff file is as follows, where each item is separated by a tab:

  1. sff name
  2. sff file location
  3. type (P-paired, U-unpaired)

The format of the library file is as follows, where each item is separated by a tab:

  1. library name
  2. insert size
  3. standard deviation

=head1 VERSION

$Revision: 1.6 $

$Date: 2009-08-26 17:18:15 $

=head1 AUTHOR(S)

Stephan Trong

=head1 HISTORY

=over

=item *

S.Trong 2008/10/16 creation

=back

=cut

use strict;
use warnings;
use Pod::Usage;
use Getopt::Long;
use Carp;
use Carp qw(cluck);
use Cwd;
use Cwd qw(abs_path);
use File::Path;
use File::Basename;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";
use PGF::Utilities::Logger;
use vars qw( $optHelp $optSffFile $optLibFile );

#============================================================================#
# INPUT VALIDATION
#============================================================================#
if( !GetOptions(
        "sff=s"=>\$optSffFile,
        "lib=s"=>\$optLibFile,
        "h"=>\$optHelp,
    )
) {
    printhelp();
}

printhelp(2) if $optHelp;


#============================================================================#
# INITIALIZE VARIABLES
#============================================================================#
my $DEBUG = 0;
my $newblerMetricsFile = $ARGV[0];
my $OBJ_LOGGER = PGF::Utilities::Logger->new();

#============================================================================#
# VALIDATE INPUTS
#============================================================================#
if ( @ARGV != 1 ) {
    print STDERR "The 454NewblerMetrics.txt file must be specified.\n";
    printhelp();
}

if ( !$optSffFile && !$optLibFile ) {
    print STDERR "You need to specify either -sff <file>, -lib <file> or both.\n";
    printhelp();
}

if ( !-s $newblerMetricsFile ){
    print STDERR "The input file $newblerMetricsFile does not exist or is zero size.\n";
    exit 1;
}

#============================================================================#
# MAIN
#============================================================================#

unless( open FH, $newblerMetricsFile ) {
    my $msg = "ERROR: failed to open file $newblerMetricsFile: $!";
    confess "$msg\n";
}

my @unpairedFilePaths = ();
my @pairedFilePaths = ();
my @libraries = ();
my $blockCt = 0;
my $blockName = '';

while (my $line = <FH>) {
    chomp $line;
    next if !length $line;
    
    # look for block name
    if ( $line =~ /^\s*(\w+)$/ ) {
        $blockName = $1;
    # look for start of block '{'
    } elsif ( $line =~ /\{$/ ) {
        $blockCt++;
    # look for end of block '}'
    } elsif ( $line =~ /\}$/ ) {
        $blockCt--;
    }
    
    # conditions to get data from block ...
    #
    if ( $blockName eq 'runData' && $blockCt == 1 ) {
        @unpairedFilePaths = parseDataFromBlock(\*FH, "file");
        $blockName = '';
        $blockCt--;
    }
    if ( $blockName eq 'pairedReadData' && $blockCt == 1 ) {
        @pairedFilePaths = parseDataFromBlock(\*FH, "file");
        $blockName = '';
        $blockCt--;
    }
    if ( $blockName eq 'pairedReadStatus' && $blockCt == 2 ) {
        @libraries = parseDataFromBlock(\*FH, "library");
        $blockName = '';
        $blockCt--;
    }
    
}
close FH;

# Create sff file if -sff option was used.
#
if ( $optSffFile ) {
    createSffInfoFile($optSffFile, \@pairedFilePaths, \@unpairedFilePaths);

    # Check for creation of sff file.
    #
    if ( -s $optSffFile ) {
        print "Created $optSffFile\n";
    } else {
        print STDERR "WARNING: $optSffFile does not exists or is zero size!\n";
    }
}

# Create library file if -lib option was used.
#
if ( $optLibFile ) {
    createLibraryInfoFile($optLibFile, @libraries);

    # Check for creation of library file.
    #
    if ( -s $optLibFile ) {
        print "Created $optLibFile\n";
    } else {
        print STDERR "WARNING: $optLibFile does not exists or is is zero size!\n";
    }
}

exit 0;
    
#============================================================================#
# SUBROUTINES
#============================================================================#
sub parseDataFromBlock {
    
    my $fh = shift;
    my $getFromBlock = shift || ''; # optional (this is the block name to get
                                    # the attributes for.  If not defined,
                                    # then attributes returned are from the
                                    # parent block only.
    
    my @results = ();
    my %attributes = ();
    my $blockCt = 0;
    my $blockName = '';
    
    while (my $line = <$fh>) {
        chomp $line;
        next if !length $line;
        if ( $line =~ /\{$/ ) {
            $blockCt++;
        } elsif ( $line =~ /\}$/ ) {
            $blockCt--;
        }
        
        if ( $getFromBlock && %attributes && $blockCt == 0 ){
            push @results, {%attributes};
            %attributes = ();
            $blockName = '';
        } elsif ( %attributes && $blockCt < 0 ) {
            push @results, {%attributes};
            %attributes = ();
            $blockName = '';
        }
            
        if ( $line =~ /^\s*$getFromBlock$/ && $getFromBlock ) {
            $blockName = $getFromBlock;
        }
        if ( $line =~ /^\s*(\w+)\s*=\s*(.+);/ &&
            ( (!$getFromBlock && $blockCt == 0 ) ||
              ($getFromBlock && $blockName && $blockCt == 1)
            )
        ) {
            $attributes{$1} = $2;
            $attributes{$1} =~ s/^"|"$//g; # remove double quotes
        }
        
        last if $blockCt < 0;
    }
    
    return @results;
    
}

#============================================================================#
sub parsePaths {
    
    my $fh = shift;
    
    my @paths = ();
    my $block = 0;
    
    while (my $line = <$fh>) {
        if ( $line =~ /\{/ ) {
            $block++;
        } elsif ( $block == 1 && $line =~ /path\s+=\s+\"(\S+)\"/ ) {
            push @paths, $1;
        } elsif ( $line =~ /\}/ ) {
            $block--;
        }
        last if $block < 0;
    }
    
    return @paths;
    
}
    
#============================================================================#
sub createSffInfoFile {
    
    my $file = shift;
    my $A_pairedFilePaths = shift;
    my $A_unpairedFilePaths = shift;
    
    unless( open FH, ">$file" ) {
        my $msg = "ERROR: failed to create sff file $file: $!";
        confess "$msg\n";
    }
    
    writeToSffFile(\*FH, $A_pairedFilePaths, 'P');
    writeToSffFile(\*FH, $A_unpairedFilePaths, 'U');
    
    close FH;

}
    
#============================================================================#
sub writeToSffFile {
    
    my $fh = shift;
    my $A_sffPaths = shift;
    my $type = shift || '';
    
    foreach my $refPath (@$A_sffPaths) {
        if ( !defined $refPath || !exists $refPath->{path} ) {
            confess "ERROR: sff reference does not contain path.\n";
        }
        my $sffpath = $refPath->{path};
        my $sffname = basename($sffpath);
        
        print FH "$sffname\t$refPath->{path}\t$type\n";
    }
    
}
    
#============================================================================#
sub createLibraryInfoFile {
    
    my $file = shift;
    my @libraries = @_;
    
    unless( open FH, ">$file" ) {
        my $msg = "ERROR: failed to create library file $file: $!";
        confess "$msg\n";
    }
    
    foreach my $refLib (@libraries) {
        print FH "$refLib->{libraryName}\t".
                 "$refLib->{pairDistanceAvg}\t".
                 "$refLib->{pairDistanceDev}\n";
    }
    
    close FH;
    
}

#============================================================================#
sub printhelp {
    my $verbose = shift || 1;
    pod2usage(-verbose=>$verbose);
    exit 1;
}

#============================================================================#
