#!/usr/bin/env perl

=head1 NAME

=head1 SYNOPSIS

  gapResParallel.pl [options] <aceFile> <454Scaffolds.txt> <454AllContigs.fna>
  <454AllContigs.qual> <gapDir> <outputValidationFile>

  Options:
  -od <dir>    output directory (optional; default is current dir) 
  -pd <dir>    absolute full path to phd_dir for Sanger data (optional)
  -pn <label>  primer naming (optional)
  -c <file>    config file (optional; default is either specified using environment variable GAPRES_CONFIG or in installation path)
  -ld <dir>   log dir (optional; default is current working dir)
  -h          detailed message (optional)

=head1 DESCRIPTION

Wrapper to execute the Gap Resolution s/w components for use in parallel
processing mode.

=head1 DEPENDENCIES

The following scripts (configurable in config file) must exist in the same
path as runGapResolution.pl unless the path to the script is defined in the config
file:

  * getSubProjReads.pl
  * newblerAssemSubProject.pl
  * validateSubProject.pl
  * createSubProjectPrimers.pl
  * createAnchorTagsForAce.pl
  * renameGapPrimers.pl

A default config file named gapRes.config residing in
<installPath>/config is used to specify the following parameters:

  script.getSubProjReads=getSubProjReads.pl
  script.newblerAssemSubProject=newblerAssemSubProject.pl
  script.validateSubProject=validateSubProject.pl
  script.createSubProjectFakes=createSubProjectFakes.pl
  script.createSubProjectPrimers=createSubProjectPrimers.pl
  script.createAnchorTagsForAce=createAnchorTagsForAce.pl
  script.createAnchorTagsForSubprojectAce=createAnchorTagsForSubprojectAce.pl
  runGapResolution.addAnchorTagsToMainAceFile=1
  runGapResolution.addAnchorTagsToSubprojectAceFile=1
  
  The specify individual software components to run, modify the following
  parameters in the config file, setting 1 to execute, 0 to skip.
  
  execute.getSubProjectReads.pl=1
  execute.newblerAssemSubProject.pl=1
  execute.validateSubProject.pl=1
  execute.createSubProjectPrimers.pl=1 
  execute.createAnchorTagsForAce.pl=1
  execute.createAnchorTagsForSubprojectAce.pl=1
  
To specify your own config file, set the environment variable GAPRES_CONFIG to
the path and name of the file.

=head1 VERSION

$Revision: 1.3 $

$Date: 2010-03-07 01:01:56 $

=head1 AUTHOR(S)

Stephan Trong

=head1 HISTORY

=over

=item *

S.Trong 2010/01/05 creation

=back

=cut

use strict;
use warnings;
use Pod::Usage;
use Cwd;
use Cwd qw(abs_path);
use Carp;
use Carp qw(cluck);
use Getopt::Long;
use File::Path;
use File::Copy;
use File::Basename;
use File::Temp qw(tempfile tempdir);
use FindBin qw($RealBin);
use lib "$RealBin/../lib";
use PGF::Utilities::Properties;
use PGF::Utilities::RunProcess qw(runProcess);
use PGF::Utilities::Logger;
use PGF::Utilities::FileUtility qw(getRealPath);
use PGF::GapResolution::Warnings;
use vars qw( $optHelp $optOutputDir $optPhdDir $optConfigFile $optLogDir
    $optPrimerNaming $optContigReadInfoFileOfFiles);

#============================================================================#
# INPUT VALIDATION
#============================================================================#
my $programExecution = abs_path(dirname($0))."/".basename($0)." @ARGV";

if( !GetOptions(
        "od=s"=>\$optOutputDir,
        "pd=s"=>\$optPhdDir,
        "c=s"=>\$optConfigFile,
        "ld=s"=>\$optLogDir,
        "pn=s"=>\$optPrimerNaming,
        "cf=s"=>\$optContigReadInfoFileOfFiles,
        "h"=>\$optHelp,
    )
) {
    printhelp(1);
}

printhelp(2) if $optHelp;

if ( @ARGV != 6 ) {
    my $errMsg = "Required input parameters are missing in command line.";
    print STDERR "$errMsg\n";
    printhelp(1);
}

# If -c option to define your own config file is specified, then
# set environment variable to this file.
#
if ($optConfigFile ) {
    if ( -s $optConfigFile) {
        $ENV{GAPRES_CONFIG} = getRealPath($optConfigFile);
    } else {
        print STDERR "Cannot find your specified config file $optConfigFile!\n";
        exit 1;
    }
}

#============================================================================#
# INITIALIZE VARIABLES
#============================================================================#

my $DEBUG = 1;
my $configFile = defined $ENV{GAPRES_CONFIG} ?
    $ENV{GAPRES_CONFIG} : "$RealBin/../config/gapRes.config";
my $OBJ_PROPS = PGF::Utilities::Properties->new(-configFile=>$configFile);
   $OBJ_PROPS->setExceptionIfEntryNotFound(1); # confess if entry in config file
                                               # is not found.
my $OBJ_LOGGER = PGF::Utilities::Logger->new();
my $outputDir = $optOutputDir ? $optOutputDir : getcwd;
   $outputDir = abs_path($outputDir);
my $OBJ_WARNINGS = PGF::GapResolution::Warnings->new(
    path=>$outputDir, logger=>$OBJ_LOGGER);
my $cleanup = $DEBUG ? 0:1;
my $logDir = $optLogDir ? $optLogDir : cwd;

my $aceFile = $ARGV[0];
my $scaffoldFile = $ARGV[1];
my $contigsFasta = $ARGV[2];
my $contigsQual = $ARGV[3];
my $gapDir = $ARGV[4];
my $validationFile = $ARGV[5];

#============================================================================#
# VALIDATE INPUTS
#============================================================================#

my $errMsg = '';
    
# Check for valid inputs.
#
if ( !-s $aceFile ) {
    $errMsg .= "The input ace file $aceFile does not exist or is zero size.\n";
}
    
if ( !-s $scaffoldFile ) {
    $errMsg .= "The input scaffold file $scaffoldFile does not exist or is zero size.\n";
}

if ( !-s $contigsFasta ) {
    $errMsg .= "The input contigs fasta file $contigsFasta does not exist or is zero size.\n";
}

if ( !-s $contigsQual ) {
    $errMsg .= "The input contigs qual file $contigsQual does not exist or is zero size.\n";
}

if ($optPhdDir && ! -e $optPhdDir){
    $errMsg .= "phdDir $optPhdDir  does not exist\n";
}

if ($optContigReadInfoFileOfFiles && ! -e $optContigReadInfoFileOfFiles){
    $errMsg .= "File $optContigReadInfoFileOfFiles does not exist\n";
}

if ( $errMsg  ){
    print STDERR "$errMsg.\nUse the -h option for more info.\n";
    logError($errMsg,1);
}

#============================================================================#
# INITIALIZATION
#============================================================================#

my $logfile = basename($0).".log";

# Set path for logging.
#
setFileForLogging($logfile);

# Log execution into log file.
#
logExecution($programExecution);

#============================================================================#
# MAIN
#============================================================================#

print "\n";

# Create gap dir file.
#
my $gapDirFile = "$gapDir/gapdir.txt.tmp";
createGapDirFile($gapDirFile, $gapDir);

# Capture reads for each gap dir.
#
runGetSubProjectReads($logDir, $contigsFasta, $contigsQual, $gapDirFile,
    $optContigReadInfoFileOfFiles) if runThisComponent("getSubProjectReads.pl");

# Reassemble each gap dir.
#
runNewblerAssemSubproject($logDir, $gapDirFile)
    if runThisComponent("newblerAssemSubProject.pl");

# Perform gap dir validation.
#
runValidateSubProject($logDir, $validationFile, $gapDirFile)
    if runThisComponent("validateSubProject.pl");
    
if ( -s $validationFile ) {
    # Create fake reads for closed gaps.
    #
    runCreateSubProjectFakes($logDir, $validationFile)
        if runThisComponent("createSubProjectFakes.pl");
    
    # Create primer definition file for designing PCR primers.
    #
    runCreateSubProjectPrimerInfo($logDir, $validationFile)
        if runThisComponent("createSubProjectPrimerInfo.pl");

    # Create primers for each gap dir
    #
    runCreateSubProjectPrimers($logDir, $gapDirFile)
        if runThisComponent("createSubProjectPrimers.pl");
            
    # Rename primers in primers.txt file
    #
    if ( $optPrimerNaming ) {
        renamePrimers($optPrimerNaming);
    }
}


# Tag main ace file with all anchor sequence locations from gap dirs.
#
if ( $OBJ_PROPS->getProperty("runGapResolution.addAnchorTagsToMainAceFile") &&
    runThisComponent("createAnchorTagsForAce.pl") ) {
    addAnchorTagsForMainAce($logDir, $aceFile, $scaffoldFile, $gapDirFile);
}

if ( $OBJ_PROPS->getProperty("runGapResolution.addAnchorTagsToSubprojectAceFile") &&
    runThisComponent("createAnchorTagsForSubprojectAce.pl") ) {
    addAnchorTagsForSubProjectAce($logDir, $gapDirFile);
}

unlink( $gapDirFile ) if -e $gapDirFile;

# If warning messages present, then save in .warnings.out file.
#
if ( $OBJ_WARNINGS ) {
    my $msg = join "", $OBJ_WARNINGS->getall();
    logOutput($msg,1);
}

exit 0;

#============================================================================#
# SUBROUTINES
#============================================================================#
sub createGapDirFile {
    
    my $file = shift;
    my $gapDir = shift;
    
    unless( open OFILE, ">$file" ) {
        my $errMsg = "ERROR: failed to create file $file.\n";
        logError($errMsg, 1);
    }
    print OFILE $gapDir;
    close OFILE;
}

#============================================================================#
sub createValidationFile {
    
    my $outputFile = shift;
    my $gapDir = shift;

    my $validationFile = $OBJ_PROPS->getProperty("validateSubProject.validationSummaryFile");
    unless( open IFILE, $validationFile ) {
        my $errMsg = "ERROR: failed to read file $validationFile.\n";
        logError($errMsg, 1);
    }
    
    my $validationDir = '';
    while (my $entry = <IFILE>) {
        chomp $entry;
        next if !length $entry;
        if ( $entry =~ /^(\S+)/ && $1 eq $gapDir ) {
            $validationDir = $1;
            last;
        }
    }
    close IFILE;
    
    if ( length $validationDir ) {
        unless( open OFILE, ">$outputFile" ) {
            my $errMsg = "ERROR: failed to create file $outputFile.\n";
            logError($errMsg, 1);
        }
        print $validationDir;
        close OFILE;
    }
    
}
    
#============================================================================#
sub runThisComponent {
    my $componentName = shift;
    
    #
    # Check if component should be executed based on what's defined in the
    # config file with the prefix execute
    #
    
    my $runComponent = 0;
    my $findComponent = $OBJ_PROPS->getProperty("execute.$componentName");
       $findComponent =~ s/\s+$//;
    
    # If component is not defined in config file or if component is defined
    # and is set to 1, then return 1 (yes, run the component).
    #
    if ( !length $findComponent ||
        ( length $findComponent && $findComponent ) ) {
        $runComponent = 1;
    }
    return $runComponent;
}
    
#============================================================================#
sub runGetSubProjectReads {
    
    my $logDir = shift;
    my $contigsFasta = shift;
    my $contigsQual = shift;
    my $gapDirFile = shift;
    my $contigReadInfoFileOfFiles = shift || '';
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    my $readInfoFile = $OBJ_PROPS->getProperty("createGapResProject.readInfoFileName");
    my $libInfoFile = $OBJ_PROPS->getProperty("createGapResProject.libInfoFileName");
               
    # If file names specified in config file doesn't contain paths, then
    # prepend assemInfo path to it.
    #
    $readInfoFile = "$assemDir/$readInfoFile" unless $readInfoFile =~ /\//;    
    $libInfoFile = "$assemDir/$libInfoFile" unless $libInfoFile =~ /\//;    
    
    # Run createGapResProject.pl
    #
    my $script = getScript("script.getSubProjReads");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile ";
       $params .= "-cf $contigReadInfoFileOfFiles " if $contigReadInfoFileOfFiles;
       $params .= "$readInfoFile $gapDirFile $libInfoFile $contigsFasta $contigsQual";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
}

#============================================================================#
sub runNewblerAssemSubproject {
    
    my $logDir = shift;
    my $gapDirFile = shift;
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    my $sffInfoFile = $OBJ_PROPS->getProperty("createGapResProject.sffInfoFileName");
               
    # If file names specified in config file doesn't contain paths, then
    # prepend assemInfo path to it.
    #
    $sffInfoFile = "$assemDir/$sffInfoFile" unless $sffInfoFile =~ /\//;    
    
    # Run createGapResProject.pl
    #
    my $script = getScript("script.newblerAssemSubProject");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = $optPhdDir ? "-p $optPhdDir " : '' ;
       $params .= " -log $logFile -warn $warningsFile ";
       $params .= " $gapDirFile $sffInfoFile";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
}

#============================================================================#
sub runValidateSubProject {
    
    my $logDir = shift;
    my $validationFile = shift;
    my $gapDirFile = shift;
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    my $libInfoFile = $OBJ_PROPS->getProperty("createGapResProject.libInfoFileName");
    my $sffInfoFile = $OBJ_PROPS->getProperty("createGapResProject.sffInfoFileName");
    
    # If file names specified in config file doesn't contain paths, then
    # prepend assemInfo path to it.
    #
    $libInfoFile = "$assemDir/$libInfoFile" unless $libInfoFile =~ /\//;
    $sffInfoFile = "$assemDir/$sffInfoFile" unless $sffInfoFile =~ /\//;
    
    # Run createGapResProject.pl
    #
    my $script = getScript("script.validateSubProject");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile ".
                 "-o $validationFile $gapDirFile $libInfoFile $sffInfoFile";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
    logOutputFileCreation($validationFile);
    
}

#============================================================================#
sub runCreateSubProjectFakes {
    
    my $logDir = shift;
    my $validationFile = shift;
    
    # Run createSubProjectFakes.pl
    #
    my $script = getScript("script.createSubProjectFakes");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile $validationFile";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
}

#============================================================================#
sub runCreateSubProjectPrimerInfo {
    
    my $logDir = shift;
    my $validationFile = shift;
    
    # Run createSubProjectFakes.pl
    #
    my $script = getScript("script.createSubProjectPrimerInfo");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile $validationFile";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
}

#============================================================================#
sub runCreateSubProjectPrimers {
    
    my $logDir = shift;
    my $gapDirFile = shift;
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    my $primersFile = $OBJ_PROPS->getProperty("createSubProjectPrimers.allProjectPrimersFile");
    
    # Run createGapResProject.pl
    #
    my $script = getScript("script.createSubProjectPrimers");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile -if $gapDirFile";
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
    logOutputFileCreation($primersFile);
    
}

#============================================================================#
sub addAnchorTagsForMainAce {
    
    my $logDir = shift;
    my $aceFile = shift;
    my $scaffoldFile = shift;
    my $gapDirFile = shift;
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    my $outputTagFile =
        $OBJ_PROPS->getProperty("createAnchorTagsForAce.outputFile");
    my $anchorTagType = 
        $OBJ_PROPS->getProperty("createAnchorTagsForAce.tagType");
    my $anchorLength = 
        $OBJ_PROPS->getProperty("idRepeatBoundary.uniqueAnchorLength");
    my $boundaryFileExtension = 
        $OBJ_PROPS->getProperty("idContigRepeats.boundaryFileExtension");
    my $allowTransferOfTags = 
        $OBJ_PROPS->getProperty("createAnchorTagsForAce.allowTransferOfTags");
    my $scaffInfoFileName = 
        $OBJ_PROPS->getProperty("idContigRepeats.scaffFileName");
        
    # Run addAnchorTagsForAce.pl
    #
    my $script = getScript("script.createAnchorTagsForAce");
    
    $outputTagFile = "$logDir/anchorTags.$$";
    
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile ".
                 "-a $aceFile $scaffoldFile $gapDirFile $outputTagFile";
       
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
    logOutputFileCreation($outputTagFile);
     
}

#============================================================================#
sub addAnchorTagsForSubProjectAce {
    
    my $logDir = shift;
    my $gapDirFile = shift;
    
    my $assemDir = $OBJ_PROPS->getProperty("createGapResProject.assemInfoDirName");
    
    # Run addAnchorTagsToAce.pl
    #
    my $script = getScript("script.createAnchorTagsForSubprojectAce");
    my $logFile = "$logDir/".basename($script).".log.$$";
    my $warningsFile = "$logDir/".basename($script).".warnings.$$";
    my $params = "-log $logFile -warn $warningsFile $gapDirFile";
       
    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);
    
    # Check for successful completion.
    #
    checkProcess(%processInfo);
    
}

#============================================================================#
sub renamePrimers {

    my $primerName = shift;

    my $primersFile = $OBJ_PROPS->getProperty("createSubProjectPrimers.allProjectPrimersFile");

    my $script = getScript("script.renamePrimers");
    my $params = "$primerName $primersFile";

    my $cmd = "$script $params";
    my %processInfo = runCommand($cmd);

    # Check for successful completion.
    #
    checkProcess(%processInfo);

}

#============================================================================#
sub setFileForLogging {
    
    my $logFile = shift;
    
    $OBJ_LOGGER->setLogOutFileAppend($logFile);
    $OBJ_LOGGER->setLogErrorFileAppend($logFile);
    
}

#============================================================================#
sub getScript {
    
    my $scriptType = shift;
    
    my $script = $OBJ_PROPS->getProperty("$scriptType");
       $script = "$FindBin::RealBin/$script" if
            $script !~ /\//; # add path to script if not specified in config file
            
    # Check if script exists.
    #
    if ( !-e $script ) {
        my $errMsg = "ERROR: cannot find script $script defined in config file as $scriptType.\n";
        print $errMsg;
        logError($errMsg, 1);
    }
    
    return $script;
    
}

#============================================================================#
sub checkProcess {
    
    my %processInfo = @_;
    
    if ( $processInfo{exitCode} ) {
        my $errMsg = '';
        if ( defined $processInfo{logStdoutMessage} &&
            $processInfo{logStdoutMessage} ) {
            $errMsg .= $processInfo{stdoutMessage};
        }
        $errMsg .= $processInfo{stderrMessage};
        logError($errMsg,1);
    }
    
}

#============================================================================#
sub runCommand {
   
    my $cmd = shift;
    
    # Execute command, capture stderr, stdout, exitcode
    #
    my $errMessage = "CMD: $cmd\n";
    
    print "Running $cmd ...\n\n";
    logOutput($cmd);
    
    my %processInfo = runProcess($cmd,
        {-checkExecutable=>0,
        }
    );
    
    return %processInfo;
    
}

#============================================================================#
sub logExecution {
    
    my $programExecution = shift;
    
    my $msg = "Command: ".$programExecution."\n".
              "Current directory: ".getcwd;
    logOutput($msg);
}

#============================================================================#
sub logError {
    my $message = shift;
    my $confess = shift || 0;
    
    $OBJ_LOGGER->logError($message);
    $OBJ_WARNINGS->add($message);
    
    if ( $confess ) {
        confess $message;
    }
}
    
#============================================================================#
sub logOutput {
    my $message = shift;
    my $printMsg = shift || 0;
    
    $OBJ_LOGGER->logOut($message);
    print "$message\n" if $printMsg;
}
    
#============================================================================#
sub logOutputFileCreation {
    
    my $outputFile = shift;
    
    $outputFile = abs_path($outputFile);
    
    my $fileExists = -s $outputFile ? 1:0;
    my $msg = $fileExists ? "Created $outputFile" :
        "ERROR: failed to create $outputFile";
    logOutput($msg,1);
    
    return $fileExists;
    
}

#============================================================================#
sub printhelp {
    my $verbose = shift || 1;
    pod2usage(-verbose=>$verbose);
    exit 1;
}

#============================================================================#
