#!/usr/bin/env perl

=head1 NAME

createSubProject.pl

=head1 SYNOPSIS
  
  createSubProject.pl -sffinfo    <list of lib info>
                      -odir       <outputdir for sff file creation unpaired>
                      -odirPair   <outputdir for sff file creation paired>
                      -help

                      readlistfile1 readlistfile2 ... etc

  Note:all given options are required except help

    SFFFILE_PATH environmental variable path to sffinfo executable must be set 

=head1 DESCRIPTION

    This script takes as input a readlist to retrieve

=head1 VERSION

$Revision: 1.8 $

$Date: 2009-08-26 17:18:15 $

=head1 HISTORY

=over

=item *

Brian Foster 2008/11/14 Creation

=back

=cut

# Includes



use strict;
use Carp;
use vars qw( $RealBin $optsffinfo $optodir $optodirPair $opth );
use FindBin qw($RealBin);

use Getopt::Long;
use Pod::Usage;
use lib "$RealBin/../lib";
use File::Path;
use File::Basename;
use File::Temp qw(tempdir);
use PGF::Newbler::Scaffolds454;
use PGF::Utilities::RunProcess qw(runProcess);

#============================================================================#
# INPUT VALIDATION
#============================================================================#

if( !GetOptions(
		"sffinfo=s"=>\$optsffinfo,
		"odir=s"=>\$optodir,
		"odirPair=s"=>\$optodirPair,
		"h"=>\$opth,
		)
) {
    printHelp();
}

pod2usage(-verbose=>2) and exit 1 if defined $opth;

# Check ARGVs
#
if (@ARGV < 1){
    print STDERR "no input read files given\n";
    printHelp();
}
else{
    foreach my $file (@ARGV){
	if (! -e $file){
	    print STDERR "$file given on command line not accessible\n";
	    printHelp();
	}
    }
}

# Check environmental var
#
my $sffInfoPath;
if ( defined $ENV{SFFFILE_PATH} ) {
    $sffInfoPath = $ENV{SFFFILE_PATH};
    if ( ! -x $sffInfoPath ) {
	print STDERR "$sffInfoPath not found or not executable\n";
	printHelp();
    }
} 
else { 
    print STDERR "Can't find the location of sffinfo using SFFFILE_PATH environmental variable\n";
    printHelp();
}


# Validate all options
#
if (! defined $optsffinfo|| ! defined $optodir || ! defined $optodirPair){
    printHelp();
}
if (! -s $optsffinfo){
    print STDERR "$optsffinfo is not found or is zero size\n";
    printHelp();    
}
if ( ! -d $optodir){
    mkpath ($optodir);
}
if ( ! -w $optodir){
    print STDERR $optodir, "Not writable\n";
    printHelp();
}
if ( ! -d $optodirPair){
    mkpath ($optodirPair);
}
if ( ! -w $optodirPair){
    print STDERR $optodirPair, "Not writable\n";
    printHelp();
}

#============================================================================#
# MAIN
#============================================================================#

# get sff info
#
my (%sff2loc,$sff,@F);
open (IN , $optsffinfo) or confess "can't open $optsffinfo\n";
while(<IN>){
    chomp;
    @F = (split/\s+/,$_);
    ($sff=$F[0])=~s/\.sff$//;
    $sff=~s/^.*\.//;

    $sff2loc{$sff}{loc} = $F[1];
    $sff2loc{$sff}{pair} = $F[2];
}
close(IN);

# get read/sff info
#
my %sff;
my (@F,$read);
foreach my $file (@ARGV){
    open (IN , $file) or confess "can't open $file\n";
    while(<IN>){
	chomp;
	@F = (split/\s+/,$_);
	($read = $F[0])=~ s/^(\w+?)(?:\_|\.).*$/$1/;
	($sff = $read) =~ s/^(\w{9,9}).*$/$1/;
	next if (! exists $sff2loc{$sff} ) ;
	$sff{$sff}{$read}++;
    }
    close(IN);
}

# get sff sub file
#
my $DEBUG = 0;
my $cleanupTmpDir = $DEBUG ? 0:1;
my $tmpFilePath = tempdir( CLEANUP=>$cleanupTmpDir );
print "tmpDir: $tmpFilePath\n" if $DEBUG;
my ($tmpReadFile,$cmd,$outputFile);
foreach my $sff (keys %sff){
    ($tmpReadFile = "${tmpFilePath}/" . basename($sff2loc{$sff}{loc})) =~ s/\.sff$//;
    open (OUT, ">$tmpReadFile") or confess "Can't open $tmpReadFile\n";
    print OUT join("\n",keys %{$sff{$sff}}),"\n";
    close(OUT);
    
    if ($sff2loc{$sff}{pair} eq "P"){
	$outputFile = "$optodirPair/" . basename($sff2loc{$sff}{loc});
    }
    elsif($sff2loc{$sff}{pair} eq "U"){
	$outputFile = "$optodir/" . basename($sff2loc{$sff}{loc});	
    }
    else{
	confess "could not determine if paired or unpaired\n";
    }
    $cmd = "$sffInfoPath -o $outputFile -i $tmpReadFile " . $sff2loc{$sff}{loc};
    print STDERR "$cmd\n" if ($DEBUG);
    my %processInfo = runProcess(
				 $cmd,
				 { -stdout=>1, # capture stdout (default=1),
				       -stderr=>1, # capture stderr (default=1),
				       #-stdoutFile=>filename, (redirects stdout to specified file, std out string will not be return)
				       #-stderrFile=>filename, (redirects stderror to specified file, std error string will not be return)
				       -checkExecutable=>1, # check if command is executable (default=1)
				       -verbose=>0 # prints stdout and stderr to screen (default=0)
				   } # note that this argument is optional.
				 );
    confess "stderr=$processInfo{stderrMessage}\n" if ($processInfo{exitCode});
}

#============================================================================#
sub printHelp {
    
    my $optVerbose = shift || 1;
    pod2usage(-verbose=>$optVerbose);
    exit 1;
}

#============================================================================#

exit 0;
