#!/usr/bin/env perl
#
=head1 NAME

fastaParser.pl - parses one or more entries in a single or multi fasta/qual file.

=head1 SYNOPSIS

  fastaParser.pl [options] [<fasta tag> <fasta tag> ...]  or  -i <file of fasta tags>
  
  Options:
  -f <file>   Fasta or qual file name (required)
  -r          Reverse complement sequence (optional; applies to fasta sequences only)
  -l          Report sequence length only (optional)
  -o <file>   Output file (optional; default prints to screen)
  -i <file>   Input file of fasta tags to parse
  -h          Help message (optional)
  
  The <fasta tag> is the first non spaced set of characters after the '>' symbol
  in the fasta definition line.  If no fasta tags are specified, then all tags
  are used.

=head1 DESCRIPTION

$Revision: 1.10 $

$Date: 2009-08-26 17:18:14 $

=head1 AUTHOR(S)

Stephan Trong

=head1 HISTORY

=over

=item *

S.Trong 2008/11/06 creation

=back

=cut

use strict;
use warnings;
use Pod::Usage;
use Getopt::Long;
use FindBin;
use lib "$FindBin::RealBin/../lib";
use PGF::Parsers::FastaParse;
use vars qw( $optHelp $optInputFile $optRevComp $optReportLengthOnly $optOutputFile $optInputList);

#============================================================================#
# INPUT VALIDATION
#============================================================================#
if( !GetOptions(
        "f=s"=>\$optInputFile,
        "r"=>\$optRevComp,
	"i=s"=>\$optInputList,
        "l"=>\$optReportLengthOnly,
        "o=s"=>\$optOutputFile,
        "h"=>\$optHelp,
    )
) {
    usage();
}

usage(2) if $optHelp;

#============================================================================#
# INITIALIZE VARIABLES
#============================================================================#
my %inputTags = ();

if ($optInputList) {
    open LIST, "$optInputList" or die "can't open $optInputList: $!\n";
    while (my $read = <LIST>) {
	chomp $read;
	++$inputTags{$read};
    }
    close LIST;
} else {
    @inputTags{@ARGV} = (1) x @ARGV if @ARGV;
}

#============================================================================#
# VALIDATE INPUTS
#============================================================================#
if ( !$optInputFile ) {
    print STDERR "You must specify the -f <file> option.\n";
    usage();
}

if ( !-s $optInputFile) {
    print STDERR "The file you specified does not exist or is zero size.\n";
    exit 1;
}

#============================================================================#
# MAIN
#============================================================================#

printFastaEntries($optInputFile, $optOutputFile, %inputTags);

exit 0;

#============================================================================#
# SUBROUTINES
#============================================================================#
sub printFastaEntries {
    my $inputFile = shift;
    my $outputFile = shift || '';
    my %inputTags = @_;
    
    my $printToFile = length $outputFile ? 1:0;
    my $objFastaParser = new PGF::Parsers::FastaParse($inputFile);

    if ( $printToFile ) {
        open FH, ">$outputFile" or die "Cannot create output file $outputFile: $!\n";
    }
    
    while ($objFastaParser->MoreEntries) {
        $objFastaParser->ReadNextEntry( -rawFormat=>1 );
        next if (%inputTags && !$inputTags{$objFastaParser->Name});
        
	if ($optRevComp) {
	    $objFastaParser->reverseComp;
	}

        my $fastaTag = $objFastaParser->Tag;
        my $seq = $objFastaParser->Seq;
        my $entry = '';
        
        if ($optReportLengthOnly) {
            $entry = "$fastaTag\n".$objFastaParser->Length."\n";
        } else {
            $entry = "$fastaTag\n$seq";
        }
        
        if ( $printToFile ) {
            print FH $entry;
        } else {
            print $entry;
        }
    }
    close FH if $printToFile;
    
}

#============================================================================#
sub usage {
    my $verbose = shift || 1;
    pod2usage(-verbose=>$verbose);
    exit 1;
}

#============================================================================#
