#!/usr/bin/env perl
# PROJECT: CASAVA
# MODULE:  $RCSfile: concatChromBam.pl,v $
# AUTHOR:  Tony Cox
#
# Copyright (c) 2008 Illumina
# This software is covered by the "Illumina Genome Analyzer Software
# License Agreement" and the "Illumina Source Code License Agreement",
# and certain third party copyright/licenses, and any user of this
# source file is bound by the terms therein (see accompanying files
# Illumina_Genome_Analyzer_Software_License_Agreement.pdf and
# Illumina_Source_Code_License_Agreement.pdf and third party
# copyright/license notices).
#
# this script creates SAM and BAM files out of a sorted or export file
#

use warnings FATAL => 'all';
use strict;

use File::Copy qw(move);
use File::Spec;
use File::Temp;
use Getopt::Long;
use Sys::Hostname;

use lib '/home/psgendb/local/pkg/CASAVA_v1.8.2-build/lib/CASAVA-1.8.2/perl';
use Casava::Common::Log;
use Casava::Common::IOLib qw(executeCmd);
use Casava::PostAlignment::Sequencing::Config
  qw(loadConfiguration isSpliceJunctionChrom %chrEnds %CONF_APP %CONF_PROJ readProjectParameters);
use Casava::PostAlignment::Sequencing::SamLib qw(getSamtoolsBin);

#------------------------------------------------------------------------------

my $argvStr     = join ' ', @ARGV;
my $scriptName = (File::Spec->splitpath($0))[2];

my $chrom       = "";
my $projectDir  = "";
my $help        = 0;

my $usage =
    "$scriptName [options]\n"
  . "\t--chrom=NAME      - chromosome\n"
  . "\t--projectDir=DIR  - project directory\n"
  . "\t--help            - print this message\n";

my $result      = GetOptions(
    "chrom=s"       => \$chrom,
    "projectDir=s"  => \$projectDir,
    "help|h"        => \$help
);

if ((not $result) or $help) {
    errorExit "\n$usage";
}
errorExit "ERROR: Incorrect number of arguments\n$usage"
  if ( ($projectDir eq "") or ($chrom eq "") );

loadConfiguration($projectDir);

if(not defined $chrEnds{$chrom}){
    errorExit "ERROR: Invalid chromosome label: $chrom\n";
}

# Configuration:
my $libexecDir      = '/home/psgendb/local/pkg/CASAVA_v1.8.2-build/libexec/CASAVA-1.8.2';
my $timeStampFormat = $CONF_APP{formatTimeStamp};
my $confDir         = $CONF_APP{dirConf};
my $projConfFile    = $CONF_APP{projectConf};
my $bamDirName      = $CONF_APP{dirBam};
my $dirCurrentBuild = $CONF_PROJ{dirBuildParsed};
my $samtoolsBin     = getSamtoolsBin(%CONF_PROJ);
my $concatBamBin    = File::Spec->catfile($libexecDir, 'bam_cat');

my %buildChromsBinSizes = ();
readProjectParameters( %buildChromsBinSizes, "BUILD_BIN_SIZES", $projectDir );
my @chroms = keys %buildChromsBinSizes;

#------------------------------------------------------------------------------
{
    my $hostname        = hostname();
    printLog("Running $hostname:[$0 $argvStr]\n", 0);
}

# write to incomplete -> move to complete
my $bamFilename = 'sorted.bam';
my $tmpBamFilename = "$bamFilename.incomplete";


my $chromPath = File::Spec->catdir($dirCurrentBuild,$chrom);
errorExit "ERROR: Can't find chrom directory: $chromPath\n" unless (-d $chromPath);

my $outputPath = File::Spec->catdir($chromPath,$bamDirName);
errorExit "ERROR: Can't find output directory: $outputPath\n" unless (-d $outputPath);


# Get bin-level bam files -- warnings have already been
# issues for missing bin files by sorted2bam.pl, so no
# need to repeat them here.
#
# warn option (fixed for the time being):
my $isWarnMissingSorted = 1;

my @tmpBamList = ();
my $binCount = $buildChromsBinSizes{$chrom};
for(my $i=0;$i<$binCount;++$i) {
    my $binId = sprintf "%04d", $i;

    my $binDir = File::Spec->catdir($chromPath, $binId);
    next if (not -d $binDir);

    my $tmpBamPath = File::Spec->catfile($binDir, $tmpBamFilename);
    next if (not -f $tmpBamPath);

    push(@tmpBamList, $tmpBamPath);
}

my $bamPath = File::Spec->catfile($outputPath,$bamFilename);

if     (scalar(@tmpBamList) == 0) {
   logWarning("No non-empty bins for chr $chrom - skipping");
   exit;
} elsif(scalar(@tmpBamList) == 1) {
    move($tmpBamList[0],$bamPath) or
      errorExit("ERROR: File move failed: $!\n"
               ."\tAttempting to move ". $tmpBamList[0]." to " . $bamPath ."\n");
} else {
    my $headerFH = File::Temp->new();
    my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$tmpBamList[0]." > $headerFH'";
    executeCmd($getHeaderCmd, 5);

    my $tmpBamPath = File::Spec->catdir($outputPath,$tmpBamFilename);
    my $concatBamCmd = "bash -c '$concatBamBin $headerFH $tmpBamPath ";
    $concatBamCmd .= join(" ",@tmpBamList) ."'";
    executeCmd($concatBamCmd, 5);
    unlink(@tmpBamList);
    $headerFH = undef;

    move($tmpBamPath,$bamPath) or
      errorExit("ERROR: File move failed: $!\n"
                ."\tAttempting to move ". $tmpBamPath ." to " . $bamPath ."\n");
}

my $indexBamCmd = "$samtoolsBin index $bamPath";
executeCmd($indexBamCmd,5);

#------------------------------------------------------------------------------
