package PGF::GapResolution::AlignAnchorSeqs; =head1 NAME =head1 SYNOPSIS use PGF::GapResolution::AlignAnchorSeqs; my $objAligner = PGF::GapResolution::AlignAnchorSeqs->new(); # Set alignment filtering conditions. # $objAligner->setFormatdb($optFormatdb); $objAligner->setAligner($optAligner); $objAligner->setAlignerParams($optAlignerParams); $objAligner->setMinPctIdentity($optMinAnchorSeqPctIdentity); $objAligner->setMinAlignmentLength($optMinAnchorSeqAlignmentLength); # Perform alignment. # $objAligner->alignAnchorSeqs($contigsFasta, $leftAnchorSeqFile, $rightAnchorSeqFile, $leftAnchorSeqBlastOutFile, $rightAnchorSeqBlastOutFile); # Parse alignment results. # my ($leftAnchorContig, $leftAnchorStart, $leftAnchorEnd) = $objAligner->parseAlignmentFile($leftAnchorSeqBlastOutFile); my ($rightAnchorContig, $rightAnchorStart, $rightAnchorEnd) = $objAligner->parseAlignmentFile($rightAnchorSeqBlastOutFile); my ($leftAnchorContigLength, $rightAnchorContigLength) = getContigLengths( $contigsFasta, $leftAnchorContig, $rightAnchorContig); =head1 DESCRIPTION As part of the Gap Resolution sub system, this module is used to align the anchor sequences to the contigs of the assembly and to retrieve the alignment results. =head1 AUTHOR(s) Stephan Trong =head1 HISTORY =over =item * Stephan Trong 12/05/2008 Creation =back =cut #============================================================================# use strict; use Carp; use Carp qw(cluck); use FindBin qw($RealBin); use lib "$FindBin::RealBin/../lib"; my $_DEBUG = 0; #============================================================================# sub new { my $class = shift; my $self = { minPctIdentity=>95, minAlignLength=>40, formatdb=>'/usr/X11R6/bin/formatdb', aligner=>'/usr/X11R6/bin/blastall', alignerParams=>'-p blatn -F F -e 1e-5', }; bless $self, $class; return $self; } #============================================================================# sub setMinPctIdentity { my $self = shift; my $minPctIdentity = shift; $self->{minPctIdentity} = $minPctIdentity; } #============================================================================# sub setMinAlignmentLength { my $self = shift; my $minAlignLength= shift; $self->{minAlignLength} = $minAlignLength; } #============================================================================# sub setAligner{ my $self = shift; my $aligner= shift; $self->{aligner} = $aligner; } #============================================================================# sub setAlignerParams{ my $self = shift; my $params = shift; $self->{alignerParams} = $params; } #============================================================================# sub setFormatdb { my $self = shift; my $formatdb = shift; $self->{formatdb} = $formatdb; } #============================================================================# sub alignAnchorSeqs { my $self = shift; my $referenceFasta = shift; my $leftAnchorSeqFasta = shift; my $rightAnchorSeqFasta = shift; my $leftAlignmentOutputFile = shift; my $rightAlignmentOutputFile = shift; # Create blast db file by executing script that runs formatdb. # $self->_runFormatdb($referenceFasta); # Align anchor sequences to reference sequence using blastall. # $self->_runBlast($referenceFasta, $leftAnchorSeqFasta, $leftAlignmentOutputFile); $self->_runBlast($referenceFasta, $rightAnchorSeqFasta, $rightAlignmentOutputFile); } #============================================================================# sub _runFormatdb { my $self = shift; my $fastaFile = shift; my $formatDbCommand = $self->{formatdb}; my $cmd = "$formatDbCommand -p F -o -i $fastaFile"; print "CMD: $cmd\n" if $_DEBUG; my $success = system($cmd); confess "Reference database creation failed!\n" if $success != 0; } #============================================================================# sub _runBlast { my $self = shift; my $refDb = shift; my $fastaFile = shift; my $outputFile = shift; my $aligner = $self->{aligner}; my $params = $self->{alignerParams}; my $cmd = "$aligner $params -m 8 -d $refDb -i $fastaFile -o $outputFile"; print "CMD: $cmd\n" if $_DEBUG; my $success = system($cmd); confess "Aligner failed!\n" if $success != 0; } #============================================================================# sub parseAlignmentFile { my $self = shift; my $file = shift; my $alignedContig = ''; my $alignedStart = ''; my $alignedEnd = ''; my $minPercentId = $self->{minPctIdentity}; my $minAlignLength = $self->{minAlignLength}; open FH, "$file" or confess "Cannot open $file for reading ($!)\n"; while (my $line = ) { chomp $line; my @entries = split /\s+/, $line; my $queryContig = $entries[0]; my $refContig = $entries[1]; my $percentId = $entries[2]; my $alignLength = $entries[3]; my $queryStart = $entries[6]; my $queryEnd = $entries[7]; my $refStart = $entries[8]; my $refEnd = $entries[9]; # ignore self hit # next if ($queryStart == $refStart && $queryEnd == $refEnd); # ignore self hit if contig is reversce complemented # next if ($queryStart == $refEnd && $queryEnd == $refStart); if ( $percentId >= $minPercentId && $alignLength >= $minAlignLength ) { $alignedContig = $refContig; $alignedStart = $refStart; $alignedEnd = $refEnd; last; } } close FH; return $alignedContig, $alignedStart, $alignedEnd; } #============================================================================# 1;