package PGF::Parsers::ParseAce::Unpad; =head1 NAME PGF::Parsers::ParseAce::Unpad; Module to convert padded positions to unpadded positions and vice versa. =head1 VERSION $Revision: 1.3 $ $Date: 2009-08-26 17:18:34 $ =head1 SYNOPSIS Example: use PGF::Parsers::ParseAce::Unpad; my $obj = PGF::Parsers::ParseAce::Unpad->new(); foreach my $paddedSequence (@paddedSequences) { $obj->use($paddedSequence); my $unpaddedPosition = $obj->padToUnpad($padStart); my $paddedPosition = $obj->unpadToPad($unpadStart); ... } =head1 DESCRIPTION =head1 AUTHOR(S) Stephan Trong =head1 HISTORY =over =item * S.Trong 2006/08/31 Creation =back =cut #============================================================================# use strict; use warnings; use FindBin qw($RealBin); use lib "$FindBin::RealBin/../../../lib"; #============================================================================# sub new { my $class = shift; my $self = {}; bless $self, $class; return $self; } #============================================================================# sub use { my $self = shift; my $paddedContigSequence = shift; @{$self->{_padPositions}} = (); @{$self->{_gapPositions}} = (); @{$self->{_gapLengths}} = (); my $gapLength = 0; my $padPosition = 0; my $lastPadPosition = 0; # Create pad position, gap position and gap length tables. # while( $paddedContigSequence =~ /\*/g ) { $padPosition = pos $paddedContigSequence; push @{$self->{_padPositions}}, $padPosition; if ( $lastPadPosition > 0 && $padPosition-1 != $lastPadPosition ) { push @{$self->{_gapPositions}}, $lastPadPosition-$gapLength+1; push @{$self->{_gapLengths}}, $gapLength; } $gapLength++; $lastPadPosition = $padPosition; } push @{$self->{_gapPositions}}, $lastPadPosition-$gapLength+1; push @{$self->{_gapLengths}}, $gapLength; } #============================================================================# sub padToUnpad { my $self = shift; my $padPosition = shift; my $numberOfPadsAtBase = 0; my $refPadPositions = $self->{_padPositions}; if ($#$refPadPositions == -1 ) { # case in which there are no pads $numberOfPadsAtBase = 0; } elsif ($padPosition < $refPadPositions->[0] ) { # case in which the base is before the first pad $numberOfPadsAtBase = 0; } elsif ($padPosition > $refPadPositions->[$#$refPadPositions] ) { # case in which base is after last pad. $numberOfPadsAtBase = @$refPadPositions; } else { $numberOfPadsAtBase = 1 + _binarySearch( $refPadPositions,$padPosition); } return $padPosition - $numberOfPadsAtBase; } #============================================================================# sub unpadToPad { my $self = shift; my $unpadPosition = shift; my $numberOfPadsBeforeBase = 0; my $refGapPositions = $self->{_gapPositions}; my $refGapLengths = $self->{_gapLengths}; if ($#$refGapPositions == -1 ) { # case in which there are no pads $numberOfPadsBeforeBase = 0; } elsif ($unpadPosition < $refGapPositions->[0] ) { # case in which the base is before the first pad $numberOfPadsBeforeBase = 0; } elsif ($unpadPosition > $refGapPositions->[$#$refGapPositions] ) { # case in which base is after last pad. $numberOfPadsBeforeBase = $refGapLengths->[$#$refGapLengths]; } else { my $index = _binarySearch($refGapPositions,$unpadPosition); $numberOfPadsBeforeBase = $refGapLengths->[$index]; } return $unpadPosition + $numberOfPadsBeforeBase; } #============================================================================# sub _binarySearch { my $A_padPositions = shift; my $padPosition = shift; # Do binary search to find number of pads before the given padded # position. Return the index of the element containing the # padded position in the array ref $A_padPositions. If the # padded position falls between two consecutive elements, return # the index of the element containing the lower number. my ($low, $high) = (0, @$A_padPositions-1); return $low if $A_padPositions->[$low] == $padPosition; return $high if $A_padPositions->[$high] == $padPosition; while ( $low <= $high ) { if ( $low == $high-1 && $A_padPositions->[$high] > $padPosition && $A_padPositions->[$low] < $padPosition ) { return $low; } my $try = int( ($high+$low)/2 ); if ( $A_padPositions->[$try] == $padPosition ) { return $try; } elsif ( $A_padPositions->[$try] < $padPosition ) { $low = $try; } elsif ( $A_padPositions->[$try] > $padPosition ) { $high = $try; } } return 0; } #============================================================================# 1;