#!/usr/bin/env perl
# PROJECT: CASAVA
# MODULE:  $RCSfile: makeGenomeBam.pl,v $
#
# Copyright (c) 2010 Illumina
# This software is covered by the "Illumina Genome Analyzer Software
# License Agreement" and the "Illumina Source Code License Agreement",
# and certain third party copyright/licenses, and any user of this
# source file is bound by the terms therein (see accompanying files
# Illumina_Genome_Analyzer_Software_License_Agreement.pdf and
# Illumina_Source_Code_License_Agreement.pdf and third party
# copyright/license notices).
#
# \author Chris Saunders
#
# This script merges chromosome BAM files into a single BAM file 
#

use warnings FATAL => 'all';
use strict;

use File::Copy qw(move);
use File::Spec;
use Getopt::Long;
use Sys::Hostname;

use lib '/home/psgendb/local/pkg/CASAVA_v1.8.2-build/lib/CASAVA-1.8.2/perl';
use Casava::Common::Log;
use Casava::Common::IOLib qw(createDir executeCmd);
use Casava::PostAlignment::Sequencing::Config
  qw(loadConfiguration isSpliceJunctionChrom %chrEnds %CONF_APP %CONF_PROJ readProjectParameters);
use Casava::PostAlignment::Sequencing::GenomicIOLib qw(@nmnmTags);
use Casava::PostAlignment::Sequencing::SamLib qw(getSamtoolsBin getChangeChrLabelType changeChrLabel);

my $bamChangeChrType = 0;

#------------------------------------------------------------------------------

sub getBamChrLabel($)
{
    my ($chrName) = @_;

    return changeChrLabel($chrName,$bamChangeChrType);
}

#------------------------------------------------------------------------------

my $argvStr     = join ' ', @ARGV;
my $scriptName = (File::Spec->splitpath($0))[2];

my $projectDir  = "";
my $isRealignedBam = 0;
my $help        = 0;

my $usage =
    "$scriptName [options]\n"
  . "\t--projectDir   - project directory\n"
  . "\t--realignedBam - merge realigned BAMs instead of primary BAMs\n"
  . "\t--help         - print this message\n";

my $result      = GetOptions(
    "projectDir=s"  => \$projectDir,
    "realignedBam"  => \$isRealignedBam,
    "help|h"        => \$help
);

if ((not $result) or $help) {
    errorExit "\n\n$usage";
}
errorExit "ERROR: Incorrect number of arguments\n$usage"
  if ($projectDir eq "");

loadConfiguration($projectDir);

# Configuration:
my $libexecDir      = '/home/psgendb/local/pkg/CASAVA_v1.8.2-build/libexec/CASAVA-1.8.2';
my $bamDirName      = $CONF_APP{dirBam};
my $confDir         = $CONF_APP{dirConf};
my $confFile        = $CONF_APP{projectConf};
my $concatBamBin    = File::Spec->catfile($libexecDir, 'bam_cat');
my $dirCurrentBuild = $CONF_PROJ{dirBuildParsed};
my $samtoolsBin     = getSamtoolsBin(%CONF_PROJ);

$bamChangeChrType = getChangeChrLabelType($CONF_PROJ{bamChangeChromLabels});

my $currentBuildDir   = $CONF_PROJ{dirBuildParsed};
my $NMNM_TAG        = $CONF_APP{TAG_NMNM};
my $nmnmDir = File::Spec->catdir( $currentBuildDir, $NMNM_TAG );

my $verboseLevel = 5;

my %buildChromsBinSizes = ();
readProjectParameters( %buildChromsBinSizes, "BUILD_BIN_SIZES", $projectDir);
my @chroms = sort { $chrEnds{$b}->{length} <=> $chrEnds{$a}->{length} } keys %buildChromsBinSizes;

#------------------------------------------------------------------------------

{
    my $hostname        = hostname();
    printLog("Running $hostname:[$0 $argvStr]\n", 0);
}

# file/dir names -- this should all be synced up somewhere:
my $genomeDir = 'genome';
my $realignedDirName="realigned";
my $bamFileName = "sorted.bam";
if($isRealignedBam) {
    $bamFileName = "sorted.realigned.bam";
}

# get the complete list of bam files and reference sequences to merge into the
# genome output file:
#

# order references in BAM file by chromosome size:
my @allBams = ();
foreach my $chrom (@chroms) {
    next if ( isSpliceJunctionChrom($chrom, %CONF_PROJ) );
    my $bamDirPath = File::Spec->catdir($dirCurrentBuild,$chrom,$bamDirName);
    if($isRealignedBam) {
        $bamDirPath = File::Spec->catdir($bamDirPath,$realignedDirName);
    }
    my $bamFilePath = File::Spec->catfile($bamDirPath,$bamFileName);
    push(@allBams,$bamFilePath) if(-f $bamFilePath);
}

# add NMNM files:
if( (-d $nmnmDir) and (not $isRealignedBam) ) {
    for my $tag (@nmnmTags) {
        my $outPrefix = $CONF_APP{$tag} . "_";
        my $outPathPrefix = File::Spec->catfile( $nmnmDir, $outPrefix);
        for my $outPath (glob("$outPathPrefix*.bam")) {
            push @allBams, $outPath;
        }
    }
}

if((scalar @allBams) == 0){
    if($isRealignedBam) {
        exit;
    } else {
        errorExit "ERROR: No chromosome BAM files to be merged.\n";
    }
}

# merge bam files:
#
my $outputPath = File::Spec->catdir($projectDir,$genomeDir,$bamDirName);
errorExit "ERROR: Can't find genome bam output directory: $outputPath\n" unless (-d $outputPath);

if($isRealignedBam) {
    $outputPath=File::Spec->catdir( $outputPath, $realignedDirName );

    createDir($outputPath) if(not -e $outputPath);
    errorExit "ERROR: can't create dir: $outputPath\n" if(not -d $outputPath);
}

my $genomeBamPath = File::Spec->catfile($outputPath,$bamFileName);
my $tmpGenomeBamPath = "$genomeBamPath.incomplete";


my $headerFH = File::Temp->new();
my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$allBams[0]." > $headerFH'";
executeCmd($getHeaderCmd, $verboseLevel);

# translate header to new chromosome labels:
my $headerFH2 = File::Temp->new();
my $isFirstSQ = 1;
while(<$headerFH>){
    if(/^\@SQ/) {
        if($isFirstSQ) {
            # @chroms already sorted by chromosome size:
            for my $chrom (@chroms) {
                next if ( isSpliceJunctionChrom($chrom, %CONF_PROJ) );
                my $printChrom = changeChrLabel($chrom,$bamChangeChrType);
                printf $headerFH2 "\@SQ\tSN:%s\tLN:%i\n", $printChrom, $chrEnds{$chrom}->{length};
            }
            $isFirstSQ=0;
        }
        next;
    }
    print $headerFH2 $_;
}

my $concatBamCmd = "bash -c '$concatBamBin $headerFH2 $tmpGenomeBamPath ";
$concatBamCmd .= join(" ",@allBams) ."'";
executeCmd($concatBamCmd, $verboseLevel);

move($tmpGenomeBamPath,$genomeBamPath) or
  errorExit("ERROR: File move failed: $!\n"
            ."\tAttempting to move ". $tmpGenomeBamPath ." to " . $genomeBamPath ."\n");

my $genomeBamIndexCmd = "$samtoolsBin index $genomeBamPath";
executeCmd($genomeBamIndexCmd, $verboseLevel);


1;
__END__
