#!/usr/bin/env perl

=head1 NAME

primer3Design.pl

=head1 SYNOPSIS
  
  primer3Design.pl    -if         <input sequence fasta file>
                      -ic         <sequence coords file>
                      -ip         <primer3 parameter file>
                      -o          <primer output file>
                      
                      -help

  Note:all given options are required except help

    PRIMER3_PATH environmental variable path to primer3_core executable must be set 

=head1 DESCRIPTION

primer3Design.pl is a program that takes as inputs: a sequence file in fasta format
,a sequence coordinate file (in the format described below), and a primer3 
parameter file. The program then calls primer3_core and outputs a primer output
file( in the format described below).

options:

-if:    A sequence file in fasta format.  The sequence id represented by the first
    space separated field in the fasta header lines.

-ic:
    The sequence coordinates file used to direct primer3 and must have the following
    format:
    
       FASTA_TAG=(sequence id)
       TARGET_REGION=(start and length coords of region to design primers around)
       TEMPLATE=(Template id)
       LEFT_PRIMER_NAME=(example s00127c01594c01595_left)
       RIGHT_PRIMER_NAME=(example s00127c01594c01595_right)
       EXCLUDED_REGION=(start and len of region to exclude)
       EXCLUDED_REGION=(Note: there can be multiple regions)

    Example:

       FASTA_TAG=s00127c01594c01595
       TARGET_REGION=60727 50
       TEMPLATE=GDNA
       LEFT_PRIMER_NAME=s00127c01594c01595_left
       RIGHT_PRIMER_NAME=s00127c01594c01595_right
       EXCLUDED_REGION=1 59626
       EXCLUDED_REGION=60677 50
       EXCLUDED_REGION=60777 50
       EXCLUDED_REGION=61877 59335

-ip:
   Primer3 paramter file.  Formated list of primer3 paramters.

    Example:

     PRIMER_NUM_RETURN=5
     PRIMER_MAX_END_STABILITY=9.0
     PRIMER_MAX_MISPRIMING=12.00
     PRIMER_PAIR_MAX_MISPRIMING=24.00
     ...
     PRIMER_IO_WT_SEQ_QUAL=0.0
     PRIMER_TASK=pick_pcr_primers
     PRIMER_FIRST_BASE_INDEX=1
     PRIMER_PICK_ANYWAY=1
     =

-o:
   Path to primer output file containing the designed primer information
   with the following format:
    
   name=(left primer name),(right)
   position=("left primer id" "left primer start" "left primer end"),(right)
   type=(sequencing type left),(right)
   sequence=(primer sequence left),(right)
   template=(template to use)
   date=(left primer design date),(right date) temp=(left temp),(right)
   
   Example:

   name=s00127c01594c01595_left,s00127c01594c01595_right
   position=s00127c01594c01595 60623 60643,s00127c01594c01595 60892 60912
   type=custom PCR,custom PCR
   sequence=CGGTtACGTCAaGGTTCGTT,GCAAAGTCCACCTGAATGGT
   template=GDNA1
   date=20090120:142243,20090120:142243 temp=60,60

=head1 VERSION

$Revision: 1.8 $

$Date: 2009-09-22 17:42:44 $

=head1 HISTORY

=over

=item *

Brian Foster 2008/12/23 Creation

=back

=cut

# Includes

use strict;
use Carp;
use vars qw( $RealBin $optif $optic $optip $opto $opth );
use FindBin qw($RealBin);

use Getopt::Long;
use Pod::Usage;
use lib "$RealBin/../lib";
use File::Path;
use File::Basename;
use File::Temp qw(tempdir);
use PGF::Utilities::RunProcess qw(runProcess);

#============================================================================#
# INPUT VALIDATION
#============================================================================#

if( !GetOptions(
		"if=s"=>\$optif,
		"ic=s"=>\$optic,
		"ip=s"=>\$optip,
		"o=s"=>\$opto,
		"h"=>\$opth,
		)
) {
    printHelp();
}

pod2usage(-verbose=>2) and exit 1 if defined $opth;


# Check environmental var
#
my $primer3Path;
if ( defined $ENV{PRIMER3_PATH} ) {
    $primer3Path = $ENV{PRIMER3_PATH};
    if ( ! -x $primer3Path ) {
	print STDERR "$primer3Path not found or not executable\n";
	printHelp();
    }
} 
else { 
    print "Can't find the location of primer3_core using PRIMER3_PATH environmental variable\n";
    printHelp();
}

# Validate all options
#
if (! defined $optif|| ! defined $optic || ! defined $optip || ! defined $opto){
    printHelp();
}
if (! -s $optif){
    print "$optif is not found or is zero size\n";
    printHelp();    
}
if (! -s $optic){
    print "$optic is not found or is zero size\n";
    printHelp();    
}
if (! -s $optip){
    print "$optip is not found or is zero size\n";
    printHelp();    
}
if ( ! -d dirname($opto)){
    mkpath dirname($opto);
}
if ( ! -w dirname($opto)){
    print $opto, " Not writable\n";
    printHelp();
}

#============================================================================#
# MAIN
#============================================================================#

# Parse primer input file
#
my (%primers,$id);
{
    open (PRI,$optic) or confess "can't open $optic\n";
    my $tmp = $/;
    $/ = "\nFASTA_";
    my (@temp);
    while(my $record = <PRI>){
	$record =~ s/^FASTA_//;
	$record =~ s/FASTA_$//;
	@temp = grep /^[^\#]/ , split/\n/,$record;
	$id = parseTags(\@temp,"TAG");
	my %tempHash = ();
	foreach my $tag (qw [TARGET_REGION TEMPLATE LEFT_PRIMER_NAME RIGHT_PRIMER_NAME EXCLUDED_REGION]){
	    $tempHash{$tag} = parseTags(\@temp,$tag);
	}
	push @{$primers{$id}{targets}},\%tempHash;
    }
    close(PRI);
    $/ = $tmp;
}

# index sequences then validate vs primer inputs
#
my %seqindex;
open (FAS, $optif) or confess "can't open $optif\n";
while (my $line = <FAS>){
    if ($line =~ /^>(\S+).*$/){
	$id = $1;
	confess "duplicate ids in $optif\n" if (exists $seqindex{$id});
	$seqindex{$id}{'start'} =  tell(FAS);
	next;
    } 
    $seqindex{$id}{'length'} += length($line) - 1;
    $seqindex{$id}{'end'} = tell(FAS);
}
foreach my $id (keys %primers){
    if (! exists $seqindex{$id}){
	confess "$id in primers file not in sequence file\n";
    }
}

# Read parameter file
#
my $primer3Parameters;
open (PAR, $optip) or confess "can't open $optip\n";
while (my $line = <PAR>){
    $primer3Parameters .= $line;
}
close(PAR);

# Go through primers, fetch seq, design ... etc.
#
my $DEBUG = 0;
my $cleanupTmpDir = $DEBUG ? 0:1;
my $tmpFilePath = tempdir( CLEANUP=>$cleanupTmpDir );
my ($primer3Input,$primer3Output,$cmd);
my $success = 0;
foreach my $id (keys %primers){
    for (my $i=0; $i<scalar (@{$primers{$id}{targets}}); $i++){
	$primer3Input = "${tmpFilePath}\/primer3.input";
	$primer3Output = "${tmpFilePath}\/primer3.output";
	
	# Create primer3 input file
	open (OUT, ">$primer3Input") or confess "can't open $primer3Input for writing\n";
	my $seqRef = &fetchSequence(*FAS,$seqindex{$id}{'start'},$seqindex{$id}{'end'});
	print OUT "SEQUENCE=" .  $$seqRef . "\n";  
	print OUT "TARGET=" . $primers{$id}{targets}[$i]->{TARGET_REGION} . "\n";
	print OUT "EXCLUDED_REGION=" . $primers{$id}{targets}[$i]->{EXCLUDED_REGION} . "\n";
	print OUT "PRIMER_SEQUENCE_ID=" . $id . "\n";
	print OUT $primer3Parameters;
	close (OUT);
	
	# Call primer3_core on input file
	$cmd = "$primer3Path < $primer3Input > $primer3Output";
	print STDERR "$cmd\n" if $DEBUG;
	system($cmd);
	
	# Parse primer3 ouput
	open(PR_IN, "$primer3Output") or die "can't open $primer3Output\n";
	my %hash;
	my $tmp = $/;
	{
	    $/ = "\n";
	    while (<PR_IN>){
		chomp;
		next if ($_=~/^=$/);
		next if ($_=~/^SEQUENCE=/);
		next if ($_=~/^PRIMER_PRODUCT_SIZE_RANGE=/);
		my @tmp = split(/\=/,$_);
		if ($tmp[0] eq 'PRIMER_LEFT_SEQUENCE' && $tmp[1] ne ''){
		    $success = 1;
		    $primers{$id}{targets}[$i]->{success} = 1;
		}
		$hash{$tmp[0]}=$tmp[1] if $tmp[0] ne '';
	    }
	    $/ = $tmp;
	    close(PR_IN);
	}
    
	system("rm $primer3Input $primer3Output") unless $DEBUG;
	$primers{$id}{targets}[$i]->{result} = \%hash;
    }
}

# Report primers
# 
if ($success){
    my ($primerStart,$primerEnd);
    my $date = &getTimeStamp();
    open (OUT, ">$opto") or confess "can't open $opto\n";
    foreach my $id (keys %primers){
	for (my $i=0; $i<scalar (@{$primers{$id}{targets}}); $i++){
	    if ($primers{$id}{targets}[$i]->{success}){
		print OUT "name=" .  $primers{$id}{targets}[$i]->{LEFT_PRIMER_NAME} . "," . $primers{$id}{targets}[$i]->{RIGHT_PRIMER_NAME} . "\n";
		($primerStart,$primerEnd) = split/\,/,$primers{$id}{targets}[$i]->{result}{PRIMER_LEFT};
		$primerEnd = $primerStart + $primerEnd;
		print OUT "position=$id $primerStart $primerEnd";
		($primerStart,$primerEnd) = split/\,/,$primers{$id}{targets}[$i]->{result}{PRIMER_RIGHT};
		$primerEnd = $primerStart - $primerEnd;
		print OUT ",$id $primerStart $primerEnd\n";
		print OUT "type=custom PCR,custom PCR\n";
		print OUT "sequence="  . $primers{$id}{targets}[$i]->{result}{PRIMER_LEFT_SEQUENCE} . "," . $primers{$id}{targets}[$i]->{result}{PRIMER_RIGHT_SEQUENCE} . "\n";
		print OUT "template=" . $primers{$id}{targets}[$i]->{TEMPLATE}. "\n";
		print OUT "date=" . $date . "," . $date;
		print OUT " temp=" . sprintf("%.0f",$primers{$id}{targets}[$i]->{result}{PRIMER_LEFT_TM}) ;
		print OUT "," . sprintf("%.0f",$primers{$id}{targets}[$i]->{result}{PRIMER_RIGHT_TM}) . "\n\n";
	    }
	}
    }
}

#use Data::Dumper;
#print Data::Dumper->Dump([\%primers]);
#exit;


#============================================================================#
sub getTimeStamp{
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
    return sprintf ("%4d%02d%02d:%02d%02d%02d",$year+1900,$mon+1,$mday,$hour,$min,$sec);
}

#============================================================================#
# Parses from primer input and reformats to "1239,52 2347,98" coord format
#
sub parseTags{
    my $arrRef = $_[0];
    my $tag = $_[1];

    my @return;
    foreach my $record (map{/^$tag\=(.*)/} grep /^$tag\=/ , @{$arrRef}){
	$record=~ s/\s+$//;
	$record=~ s/\s+/\,/g;
	if (($record =~ tr/\,/\,/) > 1){
	    warn "$tag\=$record coordinates should be on separate lines\n";
	}
	push @return , $record;
    }
    return join (" ",@return);
}

#============================================================================#
# Fetches Sequence from open file handle
#
sub fetchSequence{
    my $FILE = $_[0];
    my $start = $_[1];
    my $end = $_[2];
    my $return;
    seek($FILE,$start,0);
    while(my $line = <$FILE>){
	last if $line =~ />/;
	chomp $line;
	$return .= $line;
    }
    return \$return;
}

#============================================================================#
sub printHelp {
    
    my $optVerbose = shift || 1;
    pod2usage(-verbose=>$optVerbose);
    exit 1;
}

#============================================================================#

exit 0;
