#!/usr/bin/env perl

=head1 NAME

idRepeatBoundary.pl

=head1 SYNOPSIS
 
 idRepeatNoundary.pl [options] -if <input fasta> 
                               -id <input blast database> 
                               -e <contig end> 
                               -blastcmd <blastcommand>
                               -ao <AnchorFileName>
                               -bo <BoundaryFileName>
  Options:
  -h    this help message
  -d    turn debug printing on
  -id   input blast database (REQUIRED)
  -if   input query fasta    (REQUIRED)
  -e    contig end           (REQUIRED)
  -li   library info file    (REQUIRED)
  -blastcmd  blast command   (REQUIRED)
        ex /usr/X11R6/bin/blastall -p blastn -F F -e 1e-5
        #-m 8 is hardcoded, and -i -d are command line options
  -bo   boundary output file name (REQUIRED) ex  /thisdir/ThisCtg.boundary"
  -ao   anchor output file name (REQUIRED)  ex "/thisotherdir/ThisCtg.anchor"
  -k    keep temporary blast files
  -al   unique anchor size  (default=50)
  -pl   length of boundary padding  (default=50)
  -wl   window size to examine for repeats  (default=500)
  -sl   subwindow size to examine for repeats  (default=100)
  -ws   window step size  (default=250)
  -rl   definition: repeat minimum length  (default=100)
  -ri   definition: repeat minimum identity  (default=95)

  Input fasta is aligned to a database using blastn -m 8 and the unique/repeat
boundary is returned as well as a unique anchor in fasta format.
 
=head1 DESCRIPTION

Given a newbler assembly where 2 contigs in a scaffold
flank a particular gap,

     ctgA            ctgB
 |----------| gap |----------|
 1          N     1          N


This program is designed to identify the boundary between unique and repeat 
sequence within a contig by aligning the contig to a blast database of the
entire assembly. Blast results are parsed for any repeat >= repeat length (-rl)
 with percent identity >= repeat identity (-ri).  

The boundary locations of unique and repeat as well as the fasta sequence for 
a unique anchor are deposited in the location of the input query sequence.

The unique-repeat boundary is defined by examining blast results within a 
specified size window X to Y (-wl), starting in a small subwindow Y-Z defined 
by subwindow length (-sl).

<-gap
ctgNNN
|----------------------------------------------------....
X            Y-Z        Y    
^.............|.........^


If there are no repeat hits then the boundary is X+padding (-pl). The unique 
anchor is then: X+padding (-pl) to X+padding+anchor length (-al).

If there are blast hits then the hits are merged to consense overlapping, adjacent,
 and redundant hits, and the presence of merged repeat is first checked for in 
the sub-window, defined by Y-subwindow length (-sl) to Y.  If repeat is present
 within this sub-window then the entire window X to Y is shifted by window 
step (-ws).  The window keeps sliding by -ws, and the presence of repeat is 
checked in the subwindow until no repeat is found.

Once the presence of repeat is not detected in the subwindow, the rest of the 
window X to Y - subwindow length (-sl) is checked for repeat and the 
unqique/repeat boundary is defined as the end of the last repeat within that 
window + padding (-pl):

<-gap
ctgNNN
--------||||||----|||||---------------------....
           ^repeat
                              Y minus Z
X.................................|........Y
                              ^        ^- no repeat detected in sub-window
                              |
                   boundary  -|
                      <-(-pl)->
                               x      y
      repeat bounds            >------< unique anchor
|-----------------------------|                      
                               |------.....
                                unique bounds
 
The END of the unique boundary is defined by the largest insert size library
for the project x 1.5, starting at the beginning of the contig, no the beginning
of the unique boundary.

If the boundary end coordinate goes past the end of the contig then the contig 
is considered all repeat and no anchor tag is created.

The unique anchor (see x,y) start is then defined as 1 + boundary position + 
padding length (-pl), and end is start + anchor length (-al).


The boundaries of unique and repeat as well as unique anchor sequence are 
named and deposited according to -bo -ao respectively.


boundary file:
#uniqueStart    uniqueEnd       repeatStart     repeatEnd
51      2080    1       50


anchor file:
>contig00005  length=2080   numreads=426
GGTCGGCGACGTAGCCGGGCGCGGCGCCGTCCGGCACCTCGCCGTGGAAC

Contig end, specified by -e, is important in that if the contig resides on the 
left side of a gap, then the fasta sequence is reverse complimented and treated
 as a contig who resides on the right side of the gap (for coding efficiency).
The boundary and anchor sequence are identified, and reverse complimented.

The coordinates of unique and repeat can then be used to extract which reads 
can be safely gathered for subprojecting, and the anchor is used to check if 
reassembly of a subproject is in a single contig.

=head1 VERSION

$Revision: 1.23 $

$Date: 2010-01-07 19:41:48 $

=head1 AUTHOR(S)

Kurt M. LaButti

=head1 HISTORY

=over

=item *

K.LaButti 2008/10/23 Creation

=back

=cut

 

use strict;
use warnings;
use Carp;
use Pod::Usage;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";
use File::Temp qw(tempdir); 
use PGF::GapResolution::ParseLibInfo;
use PGF::Polishing::FindTargets::Region;
use PGF::GapResolution::Coords qw(reverseCompCoords);
use PGF::Utilities::FastaSeq qw(formatSeq);
use Getopt::Long;
use vars qw( $optHelp $optDebug $optBoundaryPadLength $optOutputName
	     $optAnchorLength $optWindowLength $optSubWindowLength $optWindowStep 
	     $optCleanupTmpFiles $optRepeatLength $optRepeatIdentity 
	     $optContigEnd $optInputDb $optInputQueryFasta $optBlastCommand
             $optOutputBoundaryFile $optOutputAnchorFile $optLibraryInfoFile);

#============================================================================#
# INPUT OPTIONS
#============================================================================#
if( !GetOptions(
		"h"       => \$optHelp,
		"d"       => \$optDebug,
		"k"       => \$optCleanupTmpFiles,
		"blastcmd=s"   => \$optBlastCommand,
		"al=i"    => \$optAnchorLength,
		"pl=i"    => \$optBoundaryPadLength,
		"wl=i"    => \$optWindowLength,
		"ws=i"    => \$optWindowStep,
		"li=s"    => \$optLibraryInfoFile,
		"sl=i"    => \$optSubWindowLength,
		"rl=i"    => \$optRepeatLength,
		"ri=i"    => \$optRepeatIdentity,
		"id=s"    => \$optInputDb,
		"if=s"    => \$optInputQueryFasta,
		"e=s"     => \$optContigEnd,
		"bo=s"    => \$optOutputBoundaryFile,
		"ao=s"    => \$optOutputAnchorFile
	)
    ) {
    printhelp(1);
}

if ( defined $optHelp ) {
    printhelp(2);
}

printhelp() if !$optInputDb;
printhelp() if !$optInputQueryFasta;
printhelp() if !$optContigEnd;
printhelp() if !$optLibraryInfoFile;
printhelp() if !$optBlastCommand;
printhelp() if !$optOutputBoundaryFile;
printhelp() if !$optOutputAnchorFile;



if ( !-s $optInputQueryFasta ) {
   confess "Input query fasta file $optInputQueryFasta does not exist or is empty.\n";
}

if ( !-s "${optInputDb}.nin" ) {
   confess "Input database $optInputDb does not exist or not writeable.\n";
}

if ( !-s "$optLibraryInfoFile" ) {
   confess "Input library file $optLibraryInfoFile does not exist or is empty.\n";
}

if ($optContigEnd !~ /[rl]/i ) {
    confess "contigEnd [$optContigEnd] is not valid.  Use [LlRr].\n";
}

#============================================================================#
# INITIALIZE VARIABLES
#============================================================================#

#keep temporary output files -k (1=cleanup)
#
my $cleanupTmpFiles = defined $optCleanupTmpFiles ? 0 : 1;

#blast command, input,  and output
#
my $blastCommand = $optBlastCommand;
my $inputQueryFasta = $optInputQueryFasta;

# library information object
#
my $libraryObj = PGF::GapResolution::ParseLibInfo->new($optLibraryInfoFile);

# findRegion objoct (for merging overlapping/adjacent repeats)
#
my $findRegionObj = PGF::Polishing::FindTargets::Region->new();

#repeat criteria
#
my $repeatLength   = defined $optRepeatLength   ? $optRepeatLength   : 100;
my $repeatIdentity = defined $optRepeatIdentity ? $optRepeatIdentity : 95;

#window and boundary criteria
#
my $windowLength       = defined $optWindowLength      ? $optWindowLength      : 500;
my $windowStep         = defined $optWindowStep        ? $optWindowStep        : 250;
my $uniqueAnchorLength = defined $optAnchorLength      ? $optAnchorLength      : 50;
my $boundaryPadLength  = defined $optBoundaryPadLength ? $optBoundaryPadLength : 50;
my $subWindowLength    = defined $optSubWindowLength   ? $optSubWindowLength   : 100;
my $outputBoundaryFile    = $optOutputBoundaryFile;
my $outputAnchorFile      = $optOutputAnchorFile;


#print all params if -d
#
if (defined $optDebug) {
    print "program parameters:\n";
    print "inputDb:         $optInputDb\n";
    print "inputFasta:      $optInputQueryFasta\n";
    print "BLASTcmd:        $optBlastCommand\n"; 
    print "contigEnd:       $optContigEnd\n";
    print "repeatLen:       $repeatLength\n";
    print "repeatId:        $repeatIdentity%\n";
    print "windowLength:    $windowLength\n";
    print "subWindowLength: $subWindowLength\n";
    print "windowStep:      $windowStep\n";
    print "anchorLength:    $uniqueAnchorLength\n";
    print "boundaryPad:     $boundaryPadLength\n";
    print "outputBoundaryFile  $outputBoundaryFile\n";
    print "outputAnchorFile    $outputAnchorFile\n";
    print "cleanup:         $cleanupTmpFiles\n\n";
}

#============================================================================#
# MAIN
#============================================================================#

# get size of contig using fastaParser.pl
#
my $contigLength = `$RealBin/fastaParser.pl -l -f $inputQueryFasta`;
chomp $contigLength;
$contigLength =~ s/^>.+\n//;
print "$RealBin/fastaParser.pl -l -f $inputQueryFasta = $contigLength bp\n" if ($optDebug);

#reverse complement contig fasta if contig is on the L side of a gap
#
if ($optContigEnd =~ /[Ll]/) {
    print "reverse!\n" if ($optDebug);;
    $inputQueryFasta  = reverseComp($inputQueryFasta);
}

#align contig to database
#
my $tmpOutputBlastFile = "${inputQueryFasta}.mega";
my $result = system("$optBlastCommand -m 8 -d $optInputDb -i $inputQueryFasta -o $tmpOutputBlastFile");
unless ($result == 0) { confess "Blast failed ($result)\n";}
print "blast command=$blastCommand -m 8 -d $optInputDb -i $inputQueryFasta -o $tmpOutputBlastFile\n" if ($optDebug) ;
print "blast success=$result\n\n" if ($optDebug);

#parse blast output
#
my $refArrayOfHashBlastHits = readInBlastOutput("$tmpOutputBlastFile");
my @arrayOfHashBlastHits = @$refArrayOfHashBlastHits;

if ($optDebug) {    
    for my $row (@arrayOfHashBlastHits) {
	print "repeat\tstart=$row->{start} end=$row->{stop}\n";
    }
}

# cluster overlapping repeat hits if there are > 1 hits
#
my @mergedRegionsArrayOfHash ;

if (@arrayOfHashBlastHits > 1) {
    my $mergeIfThisManyBasesApart = 0;
    @mergedRegionsArrayOfHash = $findRegionObj->mergeRegions($refArrayOfHashBlastHits,
							     $mergeIfThisManyBasesApart);
    if ($optDebug) {    
	for my $row (@mergedRegionsArrayOfHash  ) {
	    print "mergedRepeat\tstart=$row->{start} end=$row->{stop}\n";
	}
    }   
} else {
    @mergedRegionsArrayOfHash =  @arrayOfHashBlastHits;
}

# determine the largets insert library, and find it's insert size
#
my $maxInsertLibrary = $libraryObj->getMaxLibrary();
my $maxInsertSize = $libraryObj->getInsertSize($maxInsertLibrary);
my $maxInsertBuffer = sprintf "%.0f", ($maxInsertSize * 1.5);

#identify repeat boundary
#
my $boundary = identifyBoundary($contigLength,
				$windowLength,
				$subWindowLength, 
				$windowStep, 
				\@mergedRegionsArrayOfHash);

print "Boundary: $boundary\n" if ($optDebug);





#check if boundary is past contig end
#
my $uniqueAnchorSeq;
my $seqLength;  #this is ancient and I had to add the contig length earlier and more recently as contigLength
if ($boundary >= $contigLength){
    print "Boundary ($boundary) is > contig length ($contigLength):  contig is all repeat, skipping anchor generation and setting boundary to contig length.\n" if $optDebug;
    $seqLength = $contigLength;
    $boundary = $contigLength;
    #skip anchor generation
} else {
    
    #generate and deposit unique Anchor, and get contig length
    #
    print "#generating anchors...\n" if $optDebug;
    ($uniqueAnchorSeq, $seqLength) = generateAnchor($inputQueryFasta, 
						    $optContigEnd, 
						    $boundary, 
						    $uniqueAnchorLength);
    print "SeqLength: $seqLength\n" if ($optDebug);

}




#deposit unique/repeat boundary file
#
printData($boundary, 
	  $seqLength, 
	  $optContigEnd, 
	  $outputBoundaryFile, 
	  $outputAnchorFile, 
	  $maxInsertBuffer,	  
	  $uniqueAnchorSeq);

#cleanup temp reverseC fasta file
#
unlink $inputQueryFasta if ($cleanupTmpFiles == 1 && $optContigEnd =~ /[Ll]/);










exit 0;


#============================================================================#
# SUBROUTINES
#============================================================================#
sub printhelp {
    my $verbose = shift || 1;
    pod2usage(-verbose=>$verbose);
    exit 1;
}



#============================================================================#
sub readInBlastOutput {

    my ($file) = @_;
    my @arrayOfHashBlastHits;  #array of hash

    open BLAST, "$file" or confess "Cannot open $file for reading ($!)\n";
    
    while (my $line = <BLAST>) {
	chomp $line;
	my @tmp = split /\s+/, $line;

	#print "possiblehit: $tmp[6] $tmp[7]\n" if $optDebug;
#ignore self hit
#	
	if ( $tmp[0] eq $tmp[1] && $tmp[6] <= ($tmp[8]+5) && $tmp[7] >= ($tmp[9]-5) ) {

#ignore self hit if contig is rc
#
	} elsif ( $tmp[0] eq $tmp[1] &&  $tmp[6] <= ($tmp[9]+5) && $tmp[7] >=  ($tmp[8]-5) ) {
	
#record other hits start/end on query that meet len/pcnt id criteria
#
	} elsif ($tmp[2] >= $repeatIdentity && $tmp[3] >= $repeatLength) {
	    print "\trecord repeat hit: $tmp[6] $tmp[7]\n" if $optDebug;
	    push(@arrayOfHashBlastHits, {start=>$tmp[6], stop=>$tmp[7]}  );	    
	}
	
    } 

    close BLAST;

#cleanup blast files if $cleanupTempFiles = 1
#    
    unlink $file if ($cleanupTmpFiles == 1);
    
#sort array
#
    @arrayOfHashBlastHits = sort { $a->{start} <=> $b->{start} } @arrayOfHashBlastHits;
   
    
    return \@arrayOfHashBlastHits;
}




#============================================================================#
sub identifyBoundary {

    my ($contigLength,
	$windowLength,
	$subWindowLength,
	$windowStep,
	$mergedRegionsArrayOfHash) = @_;
    
    my $windowStart = 1; 
    my $windowEnd = $windowStart+$windowLength;
    my $subWindowStart = $windowEnd-$subWindowLength;
    my $index = 0;
    my $boundary = "FALSE";
    my $repeatFound = 0;

    
#array is empty = no repeats
#    
    if ($#$mergedRegionsArrayOfHash < 0) { 
	print "identifyBoundary: no repeats!\n" if ($optDebug);
	$boundary = $boundaryPadLength;
	
    } else {	
	
#
# FOR EACH WINDOW CHECK FOR REPEAT

      OUTER:
	for (my $windowEnd = $windowLength; $boundary eq "FALSE"; $windowEnd+=$windowStep) {
	    $subWindowStart = $windowEnd - $subWindowLength;	
	   

#check if window start is past contig end
	#    if ($subWindowStart >= $contigLength) {
	#	#you've gone past the contig end therefore contig is all repeat
	#	print "subwindowStart $subWindowStart>= contig length $contigLength: setting boundary to $contigLength\n" if $optDebug;
	#	$boundary = "$contigLength";
	#	last;
#check if the window end is past contig end
#	    } elsif ($windowEnd > $contigLength) {
#		print "subwindowEnd $windowEnd>= contig length $contigLength: resetting window end to $contigLength\n" if $optDebug;
#		$windowEnd = $contigLength;

#	    }


	    print "CHECK:(subWinStart-subWinEnd) $subWindowStart\-$windowEnd... \n" if $optDebug;
#loop through each repeat
#	
	    for  (0..$#$mergedRegionsArrayOfHash) {
		my $repeatStart = $mergedRegionsArrayOfHash->[$_]{'start'};
		my $repeatEnd   = $mergedRegionsArrayOfHash->[$_]{'stop'};
		print "\tidendityBoundary: repeatStart=$repeatStart repeatEnd=$repeatEnd\n" if ($optDebug);
#check for repeat in sub window
#
		$repeatFound = isRepeat($subWindowStart, $windowEnd, $repeatStart, $repeatEnd);
		
#repeat found! go back and try next window 
#
		if ($repeatFound == 1) {
		    print "next window\n" if $optDebug;	
		    next OUTER; #next window in the outer loop		    
#repeat found, but its past the window, set to end of last repeat if there is one, or = bound. len
#		    
		} elsif ($repeatStart > $windowEnd) {
		    print "repeat is past window end ($repeatStart > $windowEnd)\n" if $optDebug;
		    #$boundary =  $mergedRegionsArrayOfHash->[$_-1]{'stop'}+$boundaryPadLength;
		    $boundary =  (($_-1) < 0) 
			? $boundaryPadLength
			: $mergedRegionsArrayOfHash->[$_-1]{'stop'}+$boundaryPadLength;
		    last;
#this repeat not in window
		} else { 
		    print "this repeat not in win, keep checking repeats\n" if $optDebug;
		    next; # no repeat in window, keep going to check other repeats
		}
		
		
	    }
	    
	    
	    #no repeats found, set boundary to last repeat end
	    if ($boundary eq "FALSE") {
		print "doesn't seem to be any repeats, setting to furthest position\n" if $optDebug;
		$boundary =  $mergedRegionsArrayOfHash->[-1]{'stop'}+$boundaryPadLength; 
	    }
	}
	
	
	
    }
    
    
    
    return $boundary;
}




#============================================================================#
sub isRepeat {

    my ($windowStart, $windowEnd, $repeatStart, $repeatEnd) = @_;
    my $repeat;


    print "\t\tisRepeat: $windowStart, $windowEnd, $repeatStart, $repeatEnd =" if ($optDebug);


    #if repeat matches any of the below conditions 
    #it is within the window boundary
    
#completely spans window
    if ($repeatStart <= $windowStart && $repeatEnd >= $windowStart ){
	$repeat = 1;
#completely within window
    } elsif ($repeatStart >= $windowStart && $repeatEnd <= $windowEnd){ 
	$repeat = 1;
#starts in window and extends past
    } elsif ($repeatStart <= $windowEnd && $repeatEnd >= $windowEnd ){
	$repeat = 1;
#starts before window and extends into
    } elsif ($repeatStart <= $windowStart && $repeatEnd >= $windowEnd) {
	$repeat = 1;
    } else {
	#no repeat here
	$repeat = 0;
    }
    
    print "$repeat\n" if ($optDebug);
    return $repeat;
}




#============================================================================#
sub generateAnchor {

    my ($seqFile, $contigEnd, $boundary, $anchorLength) = @_;
    my $start = $boundary;
#    my $end = $boundary + $anchorLength;
    my $uniqueAnchor;
    my $seqLength;
    
    open IN, "$seqFile" or confess "Can't open $seqFile\n";
   
    $/ = "\n>";
    
#splice the seq from the main fasta using coordinates
#
    while(<IN>){
	s/\n>$//;
	my @temp = split("\n",$_);
	(my $id = shift @temp)=~ s/^>//;
	my $seq = join("",@temp);
	$seqLength = length $seq;
	$seq = substr($seq, $start, $anchorLength); 
	$seq = formatSeq($seq,60)."\n";
	$uniqueAnchor = ">$id\n$seq";
	print "seqlen=$seqLength anchorStart=$start $anchorLength $seq\n" if $optDebug;
    }
    close IN;
    
    return ($uniqueAnchor, $seqLength);
}





#============================================================================#
sub printData {

    my ($boundary, $seqLength, $contigEnd, $outputBoundaryFile, $outputAnchorFile, $maxInsertBuffer, $uniqueAnchorSeq) = @_;   

    print "printData: ($boundary, $seqLength, $contigEnd, $outputBoundaryFile, $outputAnchorFile, $maxInsertBuffer, " if $optDebug;

    if ($optDebug && $uniqueAnchorSeq) { 
	print "$uniqueAnchorSeq\n" ;
    } elsif ($optDebug) {
	print "anchor=n/a\n";
    }
    

   
    my $beginUnique = $boundary+1;
    my $endUnique   = ($maxInsertBuffer >= $seqLength) ? $seqLength : $maxInsertBuffer;   
    my $beginRepeat = 1;
    my $endRepeat   = $boundary;
    my $okToPrintAnchor = 1;



# check for an "all repeat" contig
#

#basically i jerry rigged the code so if it sees a boundary > contig len then it must
#be a contig that is all repeat.  So in order to skip anchor creation and generate
#the correct boundary file coords I have to resort to these hack loops

    if ($boundary >= $seqLength) {
	print "boundary is past ctg end!resetting unique to 0\n" if $optDebug;
	$beginUnique = 0;
	$endUnique = 0;
	$endRepeat = $seqLength;
	$okToPrintAnchor = 0;
	print "CHECK begU=$beginUnique endU=$endUnique begR=$beginRepeat endR=$endRepeat\n" if $optDebug;
    }



#reverse comp coords, if we're dealing w/ an all repeat contig skip doing that so we can set unique start/end to 0
#
    if ($contigEnd =~ /[lL]/) {
	$endUnique   = reverseCompCoords($seqLength, $beginUnique)if ($okToPrintAnchor == 1);
	$beginUnique = ($maxInsertBuffer > $seqLength) ? "1" : reverseCompCoords($seqLength, $maxInsertBuffer)if ($okToPrintAnchor == 1);
	$endRepeat   = reverseCompCoords($seqLength, $beginRepeat);
	$beginRepeat = reverseCompCoords($seqLength,$boundary);
	$uniqueAnchorSeq = revCompInMem($uniqueAnchorSeq) if ($okToPrintAnchor == 1);
    }
 


# debug checking
#
    print "CHECK2 begU=$beginUnique endU=$endUnique begR=$beginRepeat endR=$endRepeat printAnchor=$okToPrintAnchor " if $optDebug;
    if ($optDebug && $uniqueAnchorSeq) { 
	print "anchor=$uniqueAnchorSeq\n" ;
    } elsif ($optDebug) {
	print "anchor=n/a\n";
    }
    

# set boundary info data and print
#
    my $boundaryData = "#uniqueStart\tuniqueEnd\trepeatStart\trepeatEnd\n$beginUnique\t$endUnique\t$beginRepeat\t$endRepeat\n";

    
    open OUTB, ">$outputBoundaryFile" or confess "Can't open $outputBoundaryFile ($!)\n";
    print OUTB "$boundaryData\n";
    close OUTB;

    if ($okToPrintAnchor == 1) {
	open OUTA, ">$outputAnchorFile"   or confess "Can't open $outputAnchorFile ($!)\n";
	print OUTA "$uniqueAnchorSeq\n";        
	close OUTA;
    }
    
    
}



#============================================================================#
sub reverseComp {

    my ($fastaFile) = @_;
    my $fastaFileRcName = "${fastaFile}.rc";

    open(F,$fastaFile);
    open REVC, ">$fastaFileRcName" or confess "Can't open $fastaFileRcName for writing ($!)\n";


    my $seq = "";
    my $current_entry = "NO_CURRENT_ENTRY";
    my $bpl = 60;
    
    
    while (my $i = <F>) {
	chomp $i;
	if ($i =~ /^>/) {
	    if (!($current_entry eq  "NO_CURRENT_ENTRY")) {
		print REVC "$current_entry\n";
		
		my $rev = reverse($seq);
		$rev =~ tr/acgtACGT/tgcaTGCA/;
		$rev =~ tr/mrykvhdbMRYKVHDB/kyrmbdhvKYRMBDHV/;
		my $n_lines = sprintf("%d",length($rev)/$bpl);
		if ((length($rev) % $bpl) != 0) {$n_lines++;}
		
		for (my $j = 0;$j<$n_lines;$j++) {
		    my $seq_line = substr($rev,$j*$bpl,$bpl);
		    my $text = ($seq_line . "\n");
		    print $text;
		}
	    }
	$seq = "";
	    $current_entry = $i;
	} else {
	    $seq .= $i;
	}
    }
    
    print REVC "$current_entry\n";
    
    my $rev = reverse($seq);
    $rev =~ tr/acgtACGT/tgcaTGCA/;
    my $n_lines = sprintf("%d",length($rev)/$bpl);
    if ((length($rev) % $bpl) != 0) {$n_lines++;}
    
    for (my $j = 0;$j<$n_lines;$j++) {
	my $seq_line = substr($rev,$j*$bpl,$bpl);
	my $text = ($seq_line . "\n");
	print REVC $text;
    }
    
    close F;
    close REVC;
    
    return $fastaFileRcName;

}



#============================================================================#
sub revCompInMem {

    my ($fasta) = @_;
    
    my ($header, @sequences) = split /\n/, $fasta;

    my $seq = join "", @sequences;

    $seq = revCString($seq);
    $seq = formatSeq($seq,60)."\n";
    $seq = "$header\n$seq";

     return $seq;
}



#============================================================================#
sub revCString {
    my ($seq) = @_;
    $seq = reverse($seq);
    $seq =~ tr/ACGTacgt/TGCAtgca/;
     return $seq;
}
