#!/usr/bin/env perl

=head1 NAME

createGapResProject.pl - wrapper program to prepare and creates sub project
directories for gap resolution.

=head1 SYNOPSIS

  createGapResProject.pl [options] <aceFile> <454Scaffold.txt> <454NewblerMetrics.txt>

  Options:
  -od <directory>  Output directory (optional; default is current working directory).
  -h               Detailed message (optional)

=head1 DESCRIPTION

This program reads an agp formatted file (i.e., 454Scaffold.txt) and constructs
a sub project directory for each gap in all scaffolds that are larger than N
(configurable) and have more than one contig.  The two contigs spanning the
gap must have at least M (configurable) number of linking reads for creation
of the directory.  Each directory contains a scaffinfo.txt file containing
the information about the contig and gap size, formatted as such, whereby each
item is separated by a tab, and represents one entry per line:

  1. gap name
  2. gap size
  3. left contig length
  4. left contig name
  5. right contig length
  6. right contig name
  7. scaffold name

The program also creates the directory 'assemInfo' within the specified
output directory containing the following files:

  * gapdirs.txt - list of sub project directories.

  * libinfo.txt - library name, insert size and standard deviation.  For more
  information on the format of this file, refer to the parseNewblerMetrics.pl
  documentation.
  
  * sffinfo.txt - sff name and location. For more information on the format of
  this file, refer to the parseNewblerMetrics.pl
  
  * readinfo.txt - read pairing information.  For more information on the format
  of this file, refer to the newblerAce2ReadPair.pl documentation.
  
  * contigOrientation.txt - file containing contig name and orientation (+/-)
  delimited by a tab, one per line.
  
The program acts as a wrapper that calls the following executables:

  1. newblerAce2ReadPair.pl
  2. parseNewblerMetrics.pl
  3. createSubProject.pl
  
A default config file named gapRes.config residing in
<installPath>/config is used to specify the following parameters:

  createSubProject.minReadLinkage=minimum number of read links across gapped contigs
  createSubProject.minScaffSize=minimum scaffold size for sub project creation
  createSubProject.ignoreCircularGap=0|1 used to create sub project for circularized genomes

  script.readInfoFileGenerator=newblerAce2ReadPair.pl
  script.parseNewblerMetrics=parseNewblerMetrics.pl
  script.createSubProject=createSubProject.pl
  
If the value of the config parameter script.nnnn doesn't contain file paths, 
createGapResProject.pl assumes that the script name resides in the
<installPath>/bin directory.

=head1 VERSION

$Revision: 1.6 $

$Date: 2009-08-26 17:18:14 $

=head1 AUTHOR(S)

Stephan Trong

=head1 HISTORY

=over

=item *

S.Trong 2008/10/28 creation

=item *

S.Trong 2009/08/05 - added ability to skip and create warnings file if sub project fails.

=back

=cut

use strict;
use warnings;
use Pod::Usage;
use Cwd;
use Cwd qw(abs_path);
use Carp;
use Carp qw(cluck);
use Getopt::Long;
use File::Path;
use File::Copy;
use File::Basename;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";
use PGF::Utilities::Properties;
use PGF::Utilities::RunProcess qw(runProcess);
use PGF::Utilities::Logger;
use PGF::Newbler::Scaffolds454;
use PGF::GapResolution::Warnings;
use vars qw( $optHelp $optOutputDir );

#============================================================================#
# INPUT VALIDATION
#============================================================================#
my $programExecution = abs_path(dirname($0))."/".basename($0)." @ARGV";

if( !GetOptions(
        "od=s"=>\$optOutputDir,
        "h"=>\$optHelp,
    )
) {
    printhelp(1);
}

printhelp(2) if $optHelp;

#============================================================================#
# INITIALIZATION
#============================================================================#

my $DEBUG = 0;
my $configFile = defined $ENV{GAPRES_CONFIG} ?
    $ENV{GAPRES_CONFIG} : "$RealBin/../config/gapRes.config";
my $OBJ_PROPS = PGF::Utilities::Properties->new(-configFile=>$configFile);
   $OBJ_PROPS->setExceptionIfEntryNotFound(1); # confess if entry in config file
                                               # is not found.
my $OBJ_LOGGER = PGF::Utilities::Logger->new();
my $outputDir = $optOutputDir ? $optOutputDir : getcwd;
   $outputDir =~ s/\/+$//g; # remove trailing '/'.
my $logfile = "$outputDir/".basename($0) . ".log";
my $OBJ_WARNINGS = PGF::GapResolution::Warnings->new(
    path=>$outputDir, logger=>$OBJ_LOGGER);
my $assemInfoDir = "$outputDir/".
    $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
my $readinfoFile = "$assemInfoDir/".
    $OBJ_PROPS->getProperty("createGapResProject.readInfoFileName");
my $libinfoFile = "$assemInfoDir/".
    $OBJ_PROPS->getProperty("createGapResProject.libInfoFileName");
my $sffinfoFile = "$assemInfoDir/".
    $OBJ_PROPS->getProperty("createGapResProject.sffInfoFileName");
my $subProjectFoF = "$assemInfoDir/".
    $OBJ_PROPS->getProperty("createGapResProject.subProjectFoFName");
my $contigOrientationFile = "$assemInfoDir/".
    $OBJ_PROPS->getProperty("createGapResProject.contigOrientationFileName");

if ( @ARGV != 3 ) {
    my $errMsg = "Required input parameters are missing in command line.";
    print STDERR "$errMsg\n";
    printhelp(1);
}
    
my ($aceFile, $scaffoldFile, $newblerMetricsFile) = @ARGV;

#============================================================================#
# VALIDATE INPUTS
#============================================================================#
my $errMsg = '';

# Set path for logging.
#
setFileForLogging($logfile);

# Log execution into log file.
#
logExecution($programExecution);

# Validate inputs.
#
if ( !-s $aceFile ) {
    $errMsg = "Input ace file $aceFile does not exist or has zero size.";
}

if ( !length $scaffoldFile || !-e $scaffoldFile ) {
    $errMsg = "Input scaffold file $scaffoldFile does not exist or has zero size.";
}

if ( !length $newblerMetricsFile || !-e $newblerMetricsFile) {
    $errMsg = "Input newbler metrics file $newblerMetricsFile does not exist or has zero size.";
}

if ( $errMsg  ){
    print STDERR "$errMsg. Use the -h option for more info.\n";
    logError($errMsg);
    exit 1;
}

#============================================================================#
# MAIN
#============================================================================#

# Create output directory.
#
createDirectory($outputDir);

# Create assemInfo directory.
#
createDirectory($assemInfoDir);

# Create contig orientation file.
#
createContigOrientationFile($scaffoldFile, $contigOrientationFile);

# Run script to create readinfo.txt file
#
runReadInfoFileGenerator($aceFile, $readinfoFile, $contigOrientationFile);

# Run script to create libinfo.txt and sffinfo.txt files.
#
runParseNewblerMetrics($newblerMetricsFile, $libinfoFile, $sffinfoFile);

# Run script to create sub project directories.
#
runCreateSubProjectDirectories($outputDir, $scaffoldFile, $readinfoFile,
    $subProjectFoF);

# If warning messages present, then save in .warnings.out file.
#
$OBJ_WARNINGS->createFile() if $OBJ_WARNINGS->getNumberOfWarnings;

exit 0;

#============================================================================#
# SUBROUTINES
#============================================================================#
sub createContigOrientationFile {
    
    my $scaffoldFile = shift;
    my $outputFile = shift;
    
    my $objScaffold = PGF::Newbler::Scaffolds454->new($scaffoldFile);
    $objScaffold->createContigOrientationFile($outputFile);
    
}

#============================================================================#
sub runReadInfoFileGenerator {
    
    my $aceFile = shift;
    my $outputFile = shift;
    my $contigOrientationFile = shift;
    
    my $script = getScript("script.readInfoFileGenerator");
    my $options = '';
       $options .= "-cf $contigOrientationFile" if -s $contigOrientationFile;
    my $cmd = "$script $options $aceFile $outputFile";
    
    my %processInfo = runCommand($cmd);
    
    checkProcess(%processInfo);
    
    if ( !-s $outputFile ) {
        my $errMsg = "Failed to create file $outputFile when trying to run $script.";
        print STDERR "$errMsg\n";
        logError($errMsg, 1);
    }
    
}
    
#============================================================================#
sub runParseNewblerMetrics {
    
    my $newblerMetricsFile = shift;
    my $libinfoFile = shift;
    my $sffinfoFile = shift;
    
    my $script = getScript("script.parseNewblerMetrics");
    my $cmd = "$script -sff $sffinfoFile -lib $libinfoFile $newblerMetricsFile";
    
    my %processInfo = runCommand($cmd);
    
    checkProcess(%processInfo);
    
    if ( !-s $sffinfoFile ) {
        my $errMsg = "Failed to create file sff file $sffinfoFile when executing $cmd.";
        print STDERR "$errMsg\n";
        logError($errMsg, 1);
    }
    
    if ( !-s $libinfoFile ) {
        my $errMsg = "Failed to create file lib file $libinfoFile when executing $cmd.";
        print STDERR "$errMsg\n";
        logError($errMsg, 1);
    }
    
}
    
#============================================================================#
sub runCreateSubProjectDirectories {

    my $outputDir = shift;
    my $scaffoldFile = shift;
    my $readinfoFile = shift;
    my $subProjectFoF = shift;
    
    my $script = getScript("script.createSubProject");
    my $minReadLinkage = $OBJ_PROPS->getProperty("createSubProject.minReadLinkage");
    my $minScaffSize = $OBJ_PROPS->getProperty("createSubProject.minScaffSize");
    my $ignoreCircularGap = $OBJ_PROPS->getProperty("createSubProject.ignoreCircularGap");
    my $scaffOutputFile = "scaffinfo.txt";
    my $params = "-is $scaffoldFile ".
                 "-ip $readinfoFile ".
                 "-od $outputDir ".
                 "-rl $minReadLinkage ".
                 "-ms $minScaffSize ".
                 "-ic $ignoreCircularGap ".
                 "-of $subProjectFoF ".
                 "-sf $scaffOutputFile";
    my $cmd = "$script $params";
    
    my %processInfo = runCommand($cmd);
    
    checkProcess(%processInfo);
    
}

#============================================================================#
sub logOutputResults {
    
    my $outputFile = shift;
    
    $outputFile = abs_path($outputFile);
    
    my $msg = -e $outputFile ? "Created $outputFile" :
        "Failed to create $outputFile";
    logOutput($msg);
    print "$msg\n";
    
}

#============================================================================#
sub setFileForLogging {
    
    my $logFile = shift;
    
    $OBJ_LOGGER->setLogOutFileAppend($logFile);
    $OBJ_LOGGER->setLogErrorFileAppend($logFile);
    
}

#============================================================================#
sub createDirectory {
    
    my $dir = shift;
    
    if ( !-d $dir ) {
        eval { mkpath($dir, {mode=>0755}) };
        if ($@) {
            my $errMsg = "ERROR: failed to create directory $dir: $@\n";
            logError($errMsg, 1);
        }
    }
}

#============================================================================#
sub getScript {
    
    my $scriptType = shift;
    
    my $script = $OBJ_PROPS->getProperty("$scriptType");
       $script = "$FindBin::RealBin/$script" if
            $script !~ /\//; # add path to script if not specified in config file
            
    # Check if script exists.
    #
    if ( !-e $script ) {
        my $errMsg = "ERROR: cannot find script $script defined in config file as $scriptType.\n";
        print $errMsg;
        logError($errMsg, 1);
    }
    
    return $script;
    
}

#============================================================================#
sub checkProcess {
    
    my %processInfo = @_;
    
    if ( $processInfo{exitCode} ) {
        my $errMsg = '';
        if ( defined $processInfo{logStdoutMessage} &&
            $processInfo{logStdoutMessage} ) {
            $errMsg .= $processInfo{stdoutMessage};
        }
        $errMsg .= $processInfo{stderrMessage};
        logError($errMsg,1);
    }
    
}

#============================================================================#
sub runCommand {
   
    my $cmd = shift;
    
    # Execute command, capture stderr, stdout, exitcode
    #
    my $errMessage = "CMD: $cmd\n";
    
    print "Running $cmd ...\n\n";
    logOutput($cmd);
    
    my %processInfo = runProcess($cmd,
        {-checkExecutable=>0,
        }
    );
    
    return %processInfo;
    
}

#============================================================================#
sub logExecution {
    
    my $programExecution = shift;
    
    my $msg = "Command: ".$programExecution."\n".
              "Current directory: ".getcwd;
    logOutput($msg);
}

#============================================================================#
sub logError {
    my $message = shift;
    my $confess = shift || 0;
    
    $OBJ_LOGGER->logError($message);
    $OBJ_WARNINGS->add($message);
    
    if ( $confess ) {
        $OBJ_WARNINGS->createFile() if $OBJ_WARNINGS->getNumberOfWarnings;
        confess $message;
    }
    
}
    
#============================================================================#
sub logOutput {
    my $message = shift;
    
    $OBJ_LOGGER->logOut($message);
}
    
#============================================================================#
sub printhelp {
    my $verbose = shift || 1;
    pod2usage(-verbose=>$verbose);
    exit 1;
}

#============================================================================#
