#!/usr/bin/env perl

=head1 NAME

createSubProject.pl

=head1 SYNOPSIS
  
  createSubProject.pl -is <454Scaffolds.txt> 
                      -ip <454Contigs.ace.1.pairs>    
                      -od <existing output dir or path> 
                      -of <full path for creation of sub project fof>
                      -sf <name of scaff info file to be created within each subproject>
	              -rl <ReadLinkage min ex 2> 
                      -ms <min Scaffold len ex 50000> 
                      -ic <ignore Circ gap 1=true 0=false> 
  
  Note:all given options are required

=head1 DESCRIPTION

This script takes as input newbler generated 454Scaffolds.txt file and 
newblerAce2ReadPair.pl generated read pairs file. The output dir or path
for output dir creation is specified.

Suproject directories will be created in the output dir with the following 
naming convention:

s00002c00004c00005 (scaffold, left contig, right contig)

Within each subproject directory is a scaffInfo file with the following tab-delimited one line format:

 1.Gap Name          ex. s00002c00009c00004

 2.Gap Size          ex. 100

 3.Left Contig Len   ex. 548

 4.Left Contig Name  ex. contig00009

 5.Right Contig Len  ex. 19097

 6.Right Contig Name ex. contig00004

 7.Scaffold          ex. scaffold00002

Parameters required on the command line are :

 -rl = minReadLinkage = minimum reads linking two contigs
                        required to create subproject

 -ms = minScaffSize   = minimum scaffold size to consider
                        when creating subprojects (kbp)

 -ic = ignoreCircGap  = if set to 0 will create an additional
                        circularization gap per scaffold if 
                        linking criteria is met.


=head1 VERSION

$Revision: 1.6 $

$Date: 2009-08-26 17:18:14 $

=head1 HISTORY

=over
7
=item *

Brian Foster 2008/10/30 Creation

=back

=cut

# Includes


use strict;
use Carp;
use vars qw( $RealBin $opt454Scaffolds $optReadPair $optOutDir $optHelp $optReadLinkage $optMinScaff $optIgnoreCirc $optFofPath $optScaffInfoName);
use FindBin qw($RealBin);

use Getopt::Long;
use Pod::Usage;
use lib "$RealBin/../lib";
use File::Path;
use File::Basename;
use PGF::Newbler::Scaffolds454;

#============================================================================#
# INPUT VALIDATION
#============================================================================#

if( !GetOptions(
        "is=s"=>\$opt454Scaffolds,
        "ip=s"=>\$optReadPair,
        "od=s"=>\$optOutDir,

        "of=s"=>\$optFofPath,
        "sf=s"=>\$optScaffInfoName,

	"rl=s"=>\$optReadLinkage,
	"ms=s"=>\$optMinScaff,
	"ic=s"=>\$optIgnoreCirc,
        "h"=>\$optHelp,
    )
) {
    printHelp();
}

pod2usage(-verbose=>2) and exit 1 if defined $optHelp;

if (! defined $opt454Scaffolds ||  ! defined $optReadPair || ! defined $optOutDir
      || ! defined $optReadLinkage || ! defined $optMinScaff || ! defined $optIgnoreCirc
      || ! defined $optFofPath || ! defined $optScaffInfoName){
    printHelp();
}
if ( ! -e $opt454Scaffolds ) {
    print STDERR "Cannot find your scaffold file $opt454Scaffolds\n";
    printHelp();
}
if (! -e $optReadPair){
    print STDERR "Cannot find your read pair file $optReadPair\n";
    printHelp();    
}
if ( ! -d $optOutDir ){
    mkpath($optOutDir) || print STDERR "can't make $optOutDir\n";
    printHelp();
}
if ( ! -d dirname($optFofPath)){
    mkpath (dirname($optFofPath));
}
if ( ! -w dirname($optFofPath)){
    print STDERR dirname($optFofPath), "Not writable\n";
    printHelp();
}
if(! -w $optOutDir){
    print STDERR "$optOutDir not writable\n";
    printHelp();
}

#============================================================================#
# MAIN
#============================================================================#
# get linking information from pair file
my %linking;
my (@F,$template);
open (IN , $optReadPair) or confess "can't open $optReadPair\n";
while(<IN>){
    chomp;
    @F = (split/\t/,$_);
    next if $F[6] eq "U";
    ($template = $F[0])=~ s/^(\w+?)(?:\_|\.).*$/$1/;
    #if read is left right and not on same contig
    if ($F[3] eq "+" && $F[10] eq "-" && $F[4] ne $F[11]){
	$linking{$F[4]}{$F[11]}{$template}++;
    }
}
close(IN);

# get and output gaps
my ($gapDir,$scaffInfo,$string,$tmpGapName);
my $scaff = PGF::Newbler::Scaffolds454->new("$opt454Scaffolds");
my @scaffoldNames = $scaff->allScaffolds();
my @gapFof;
foreach my $scaffold (sort @scaffoldNames){
    my @gaps = $scaff->getGaps(scaffold=>$scaffold);
    if (scalar (@gaps) > 0){
	$tmpGapName = $scaffold .  $gaps[-1]->{rightContigName} . $gaps[0]->{leftContigName};
	$tmpGapName =~ s/scaffold/s/g;
        $tmpGapName =~ s/contig/c/g;
	# Try to add circularizing subproject
	push (@gaps,{ leftContigName=>$gaps[-1]->{rightContigName},
		     leftContigLen =>$gaps[-1]->{rightContigLen},
		     gapSize=>100,
		     rightContigName=>$gaps[0]->{leftContigName},
		     rightContigLen =>$gaps[0]->{leftContigLen},
		     scaffold=>$scaffold,
		     gapName=>$tmpGapName,
		     gapCirc=>1
		     });
	#ignore last circularization gap if specified
	pop(@gaps) if $optIgnoreCirc;
	foreach my $gap (@gaps){

	    #scaffold size criteria
	    last if ($scaff->scaffoldLength($gap->{scaffold}) < $optMinScaff);
	    # read pair linkage criteria
	    next if (scalar(keys %{$linking{$gap->{leftContigName}}{$gap->{rightContigName}}}) < $optReadLinkage);

	    #create subdir in outputdir
	    $optOutDir =~ s/\/+$//;
	    $gapDir = $optOutDir  . "/" . $gap->{gapName};
	    mkdir($gapDir);
	    push @gapFof, $gapDir;

	    #create scaffinfo.txt file
	    $string = "";
	    $scaffInfo = $gapDir . "/" . $optScaffInfoName;
	    open(OUT, ">$scaffInfo") or confess "can't open $scaffInfo for writing\n";
	    foreach my $key (sort keys %{$gap}){
		$string .= "\t" . $gap->{$key};
	    }
	    $string =~ s/^\t//;
	    print OUT "$string\n";
	    close(OUT);
	    
	    #create isCircular file
	    if (exists $gap->{gapCirc}){
		open(OUT,">${gapDir}/isCircular") or confess "can't open ${gapDir}/isCircular\n";
		print OUT "";
		close(OUT);
	    }
	}
    }
}
open (OUT, ">$optFofPath") or confess "can't print out $optFofPath\n";
foreach my $path (@gapFof){
    print OUT "$path\n";
}
close(OUT);

#============================================================================#
sub printHelp {
    
    my $optVerbose = shift || 1;
    pod2usage(-verbose=>$optVerbose);
    exit 1;
}

#============================================================================#

exit 0;
