package PGF::Parsers::ParseAce;

=head1 NAME

ParseAce - Parse and read ace file

=head1 SYNOPSYS

  use PGF::Parsers::ParseAce;

  # Parse ace file and create ace object.
  #
  $ace = PGF::Parsers::ParseAce->parse( $aceFile, { %HASH });

    where,
    $aceFile is the name of the ace file.
    %HASH is an optional hash input for the following conditions:
    -verbose=>1   # set this to 1 if you want the module to print
                  # the progress of the parsing to STDOUT.

  EXAMPLE:

  $ace = ParseAce->parse( 'myAceFile.ace' );

  # Once you have an $ace object, you can get info from the ace file
  # using the following methods:
  #
  $ace->totalContigs
  $ace->allContigs
  $ace->getTags
  $ace->tag
  $ace->getReadTags
  $ace->contig(...)
  $ace->getreads(...)
  $ace->read(...)
  $ace->unpadded(...)
  $ace->readTagPosition(...)

  # Returns scalar of the total number of contigs:
  #
  $contigs = $ace->totalContigs;

  # Returns array of all contig numbers found in ace file (sorted in ascending order):
  #
  @contigs = $ace->allContigs;

  # Returns array containing block types found in the ace file.  blocks here refer to anything
  # that starts with a tag name and is enclosed in {} brackets.  example: 'CT', 'RT', ...
  #
  @blockTags = $ace->getTags;

  # Returns array containing the data within all blocks for a given block tag type:
  #
  @data = $ace->tag( $type );

    where $type is the tag name of the block (e.g., 'CT', 'WA', ...)

  EXAMPLE:

  @data = $ace->tag('CT');  # here, each element contains the string found within a CT block,
                            # (i.e,. consensus tag).

  # Returns array of read tag types found in RT{} block. example: 'matchElsewhereHighQual', 
  # 'chimera',...
  #
  @readTags = $ace->getReadTags;

  # To get info for a contig:
  #
  $stream = $ace->contig( $contigName );

    where $contigName is the contig number to retrieve. $contigName has to match
    the <contigName> token in the 'CO <contigName>' line within the ace file.

  EXAMPLE:

  $stream = $ace->contig('Contig100');      # getting info for contig 100.
  $contigName = $stream->{number};          # contig number
  $sequence = $stream->{sequence};          # contig sequence
  $seqUnpad = $stream->{sequence_unpadded}; # contig sequence unpadded (untrimmed). 
  $size = $stream->{size};                  # contig size padded (untrimmed)
  $sizeUnpadded = $stream->{size_unpadded}; # contig size unpadded (untrimmed). 
  $quality = $stream->{quality};            # contig quality scores
  $AF_tag = $stream->{AF_tag};              # string containing all AF tags for this contig. 
  $BS_tag = $stream->{BS_tag};              # string containing all BS tags for this contig. 
  $totalNumberOfReads = $stream->{totalReads}; # total number of reads in the contig

  # To get reads within the ace file: 
  #
  # There are two methods for doing this:
  #
  # 1st method:
  
  $ace->fetchreads( %HASH );
  
    where %HASH is a parameter used to limit the return list.
    The following key/value pairs are to be used and could be used in conjunction
    with one another:

    -contig=>$contigName    # returns only reads within this contig.
    -name=>$readName        # returns only reads with this name;  you may specify 
                            # a regular expression in $readName.
    -tag=>$readTag          # returns only reads containing this read tag type. 
                            # read tags correspond to RT{} tags found for each read 
                            # (e.g., 'matchElsewhereHighQual')

  # This prepares a data structure for fetching the attributes of each read
  # using the fetch method:
  
  $read = $ace->fetch( %HASH );
  
    where %HASH is a parameter used to set the following coditions:
    
    -unpadded=>0|1          # set this to true to return unpadded and alignment
                            # positions of the read. (default=1). Note that
                            # converting padded to unpadded positions increases
                            # the parsing time.
                            
  The fetch method returns a reference to a hash containing the read's
  attributes.
  
  EXAMPLE:
  
  $ace->fetchreads( -contig=>'Contig10' );
  
  while( $read = $ace->fetch() ) {
      $readName = $read->{name};      # name of read
      $contig = $read->{contig};      # contig read belongs in
      $startPos = $read->{start};     # starting location of read in contig
      $endPos = $read->{stop};        # ending location of read in contig
      $sequence = $read->{sequence};  # sequence of read
      $complement = $read->{complement}; # returns either "C" (complement) or "U" (uncomplement)
      $tag = $read->{tag};            # read tag lines
      $size = $read->{size};          # length of read padded
      $AF_tag = $read->{AF_tag};      # data of AF tag
      $QA_tag = $read->{QA_tag};      # data of QA tag
      $RD_tag = $read->{RD_tag};      # data of RD tag

      # the following is available only if -unpadded=>1 is specified:
      $sizeUnpad = $read->{size_unpadded};      # length of read unpadded
      $seqUnpad = $read->{sequence_unpadded};   # unpadded sequence of read
      $startPosUnpad = $read->{start_unpadded}; # unpadded starting location of read in contig
      $endPosUnpad = $read->{stop_unpadded};    # unpadded ending location of read in contig
      $startAlign = $read->{align_start};       # starting alignment position of read to contig.
      $endAlign = $read->{align_stop};          # ending alignment position of read to contig.
  }

  # 2nd method:
  #
  # This returns an array of pointers to a data structure containing the read 
  # information. Each element corresponds to one read. See example below on how to 
  # dereference the pointer to get values for the read.
  #
  # Caveat: because this method returns an array of read attributes, the memory
  # footprint may be large and could potentially cause problems if the ace
  # file is large.
  #
  
  @reads = $ace->getreads( %HASH );

    where %HASH is a parameter used to limit the return list.
    The following key/value pairs are to be used and could be used in conjunction
    with one another:

    -contig=>$contigName   # returns only reads within this contig.
    -name=>$readName        # returns only reads with this name;  you may specify 
                            # a regular expression in $readName.
    -tag=>$readTag          # returns only reads containing this read tag type. 
                            # read tags correspond to RT{} tags found for each read 
                            # (e.g., 'matchElsewhereHighQual')
    -unpadded=>1            # set this to true to return unpadded and alignment
                            # positions of the read. (takes longer to retrieve; default).
    -verbose=>1             # set this to 1 if you want the module to print
                            # the progress of the parsing to STDOUT.

  
  EXAMPLES:

  # Get all reads in contig 100 with .x extension.
  #
  # NOTE: this returns an array of pointers to a data structure containing the read 
  # information. Each element corresponds to one read. See example below on how to 
  # dereference the pointer to get values for the read.
  #
  @reads = $ace->getreads( -contig=>'Contig100', -name=>'\.x', -unpadded=>1 );

  # Get all reads with the read tag 'chimera'
  #
  @reads = $ace->getreads( -tag=>'chimera' );

  # To get info for each read once you have a list of reads from $ace->getreads:
  #
  foreach $read (@reads) {
      $readName = $read->{name};      # name of read
      $contig = $read->{contig};      # contig read belongs in
      $startPos = $read->{start};     # starting location of read in contig
      $endPos = $read->{stop};        # ending location of read in contig
      $sequence = $read->{sequence};  # sequence of read
      $complement = $read->{complement}; # returns either "C" (complement) or "U" (uncomplement)
      $tag = $read->{tag};            # read tag lines
      $size = $read->{size};          # length of read padded
      $AF_tag = $read->{AF_tag};      # data of AF tag
      $QA_tag = $read->{QA_tag};      # data of QA tag
      $RD_tag = $read->{RD_tag};      # data of RD tag

      # the following is available only if -unpadded=>1 is specified:
      $sizeUnpad = $read->{size_unpadded};      # length of read unpadded
      $seqUnpad = $read->{sequence_unpadded};   # unpadded sequence of read
      $startPosUnpad = $read->{start_unpadded}; # unpadded starting location of read in contig
      $endPosUnpad = $read->{stop_unpadded};    # unpadded ending location of read in contig
      $startAlign = $read->{align_start};       # starting alignment position of read to contig.
      $endAlign = $read->{align_stop};          # ending alignment position of read to contig.
  }

  # To get the info for a read based on the read name:
  #
  $read = $ace->read( $readName, { %HASH } );

    where $readName is the name of the read you want to get info for,
    %HASH is an optional hash input for the following conditions:
  
    -unpadded=>1   # set this to true to return unpadded and alignment
                   # positions of the read. (takes longer to retrieve).
    -contig_sequence=>sequence   # speeds up the unpadded calculations.

  EXAMPLE:

  $read = $ace->read( 'ABCD1234.x1' );

  $readName = $read->{name};
  $contig = $read->{contig};
  $startPos = $read->{start};
  $endPos = $read->{stop};
  $sequence = $read->{sequence};
  $complement = $read->{complement};
  $tag = $read->{tag};
  $size = $read->{size};
  $AF_tag = $read->{AF_tag};
  $QA_tag = $read->{QA_tag};
  $RD_tag = $read->{RD_tag};

  # The following is available only if -unpadded=>1 is specified:
  #
  $startPosUnpad = $read->{start_unpadded};
  $endPosUnpad = $read->{stop_unpadded};
  $seqUnpad = $read->{sequence_unpadded};
  $sizeUnpad = $read->{size_unpadded};
  $startAlign = $read->{align_start};
  $endAlign = $read->{align_stop};

  # To get unpadded positions of the read if the -unpadded parameter is not specified when
  # using the getreads or the read method, the attributes corresponding to the unpadded 
  # feature can be obtained usnig the unpadded method.
  #
  $read = $ace->unpadded( $read, \%args );

    where,
    $read = reference to hash containing read attributes obtained from the getreads or read
            method (required).
    %args = hash with the following optional key/values: (optional)
            -contig_sequence = padded sequence of contig (unpadded calculations run faster if this 
            is supplied).
   

  EXAMPLES: 

  # if used with the getreads method:
  #
  @reads = $ace->getreads( -contig=>'Contig400', -unpadded=>0 );

  foreach my $read (@reads) {
      $read = $ace->unpadded($read);
      #
      # this adds the following hash element to the reference:
      # size_unpadded
      # sequence_unpadded
      # start_unpadded
      # stop_unpadded
      # align_start
      # align_stop
  }

  # if used with the read method:
  #
  $readObj = $ace->read( 'ABC1234.x1' );
  $read = $ace->unpadded($readObj);

  # NOTE: The retrieval of unpadded positions is slightly faster and more efficient if the 
  # -unpadded parameter is initially specified when using the getreads or read method.

  # To get unpadded positions from a read tag, use the readTagPosition method.
  #
  %attribs = $ace->readTagPosition( $tag, \%args );

    where,
    $tag = string containing read tag. (required)
           (example: 'AAOZ9379.b1 matchElsewhereLowQual phrap 45 82 Thu Jul 29 10:29:54 2004')
    $H_args = reference to hash with the following optional key/values: (optional)
              -read = reference to hash containing read attributes (obtained from the getreads or 
                     read method. (speeds up processing by not having to look up these values for 
                     the read)
              -contig_sequence = sequence of contig the read is in (speeds up processing by not
                     having to look up contig sequence)
  

  EXAMPLE:
  
  $tags=$read->{tag};

  #
  # Here, $tags contain a scalar of lines present in the read tag. The lines are separated
  # by a newline character.
  #
  foreach $readTag ( split/\n/, $tags ) {
      %attribs = $ace->readTagPosition( $readTag );
      $unpaddedStart=$attribs{start_unpadded};  # unpadded start position of the read tag
      $unpaddedStop=$attribs{stop_unpadded};    # unpadded stop position of the read tag
      $paddedStart=$attribs{start};             # padded start position of the read tag
      $paddedStop=$attribs{stop};               # padded stop position of the read tag
      $type=$attribs{type};                     # type of read tag
      $read=$attribs{read};                     # name of read
  }

=head1 CAVEATS

  The unpadded and readTagPosition methods use a suboptimal algorithm to convert padded to unpadded read 
  positions.  This may result in a decrease in performance.  For best performance in obtaining 
  unpadded positions for reads, you should use the getreads method with the -contig=>$contig parameter.

  On rare occasions, the unpadded method returns incorrect unpadded positions.  This issue remains to
  be fixed.

=head1 AUTHOR

Stephan Trong
Lawrence Livermore National Laboratory
DOE Joint Genome Institute
2800 Mitchell Drive
Walnut Creek, Ca 94598
trong1@llnl.gov

=version comment

$Revision: 1.10 $

$Date: 2009-08-26 17:18:27 $

version 1.10: 06/2005 - S.Trong
Modified conversion of padded to unpadded read positions to increase speed. 
-created a hash conversion table for each read position that translated the
 pad location on the contig to unpad location.

version 1.11: 08/17/2005 - S.Trong
Fixed bug in getreads when calling without any arguments.
Fixed bug in getreads when calling argument with -contig and -tag combo.

version 1.12: 09/07/2006 - S.Trong
Improved pad to unpad conversion when calling getreads method.  This 
method now uses the PGF::Parsers::ParseAce::Unpad module to convert
padded to unpadded read positions.

=cut
  

use strict;
use warnings;
use vars qw( $VERSION $RealBin );
use Carp;
use FindBin qw($RealBin);
use lib "$RealBin/ParseAce";
use PGF::Parsers::ParseAce::Unpad;

$VERSION = 1.12;

sub parse {

=head
    Creates an object that contains a set of data retrieved from the ace file.

    ARGUMENTS:
    $file = ace file (required)
    $H_args = reference to hash with the following optional key/values: (optional)
      -verbose = prints processing info to STDOUT.

    RETURNS:
    ace object containing data structure of parsed values from the ace file.

=cut

    my ($class,$file,$args) = @_;

    my $self="";
    $self=_parse_ace($file,$args);
    $self->[5] = []; # holds read list if fetchreads is called.
    $self->[6] = []; # holds unpadded conversion table.
    bless $self, $class;
    return $self;

}

sub _parse_ace {

=head
    This subroutine parses a given ace file and stores the data in a data structure.

    ARGUMENTS:
    $file = ace file (required)
    $H_args = reference to hash with the following optional key/values: (optional)
      -verbose = prints processing info to STDOUT.

    RETURNS: 
    The referent that is returned to the object is an array containing the following 
    data structure:

    $self->[0]->ace file

    $self->[1]->[contigNumber-1] = 
        "startingFileAddr_of_CO,
         startingFilleAddr_of_sequence,endingFileAddr_of_sequence,
         startingFileeAddr_of_quality_score,endingFileAddr_of_quality_score,
         startingFileAddr_of_AF_tag,endingFileAddr_of_AF_tag"
         startingFileAddr_of_BS_tag,endingFileAddr_of_BS_tag"

    $self->[2]->{readName} = 
        "contig,startingFileAddr_of_AF_tag,endingFileAddr_of_AF_tag,
         startingFileAddr_of_RD_tag,endingFileAddr_RD_tag,ending_FileAddr_of_sequence
         FileAddr_of_QA_tag,
         FileAddr_of_DS_tag,
         startingFileAddr_of_RT_tag,readTagTypes"

    $self->[3]->[] = array holding all RT tag types. (e.g., 'matchElsewhereHighQual',...)
    
    $self->[4]->{} = reference to hash containing key=ace tag, value=description
=cut

    my ($file,$args) = @_;

    my %args=defined $args?%$args:();
    my $verbose=defined $args{-verbose}?$args{-verbose}:0;
    my $contig="";
    my $contigSeq="";
    my $cnum="";
    my %contigs=();
    my %readAttribs=();
    my @attribs=();
    my @readTagTypes=();
    my %aceTags=();
    my %foundThisReadTag=();
    my %foundAllReadTag=();
    my %foundAceTag=();
    my $readName;
    my $fileAddr=0;

    croak "No ace file found.\n" if !defined $file;

    open _IFILE, $file or croak "Problem opening ace file $file:$!";
    while ( my $line = <_IFILE> ) {
        if ( defined $line && $line =~ /^CO (\S+) \S+ (\S+)/ ) {
            $contig = $1;

            print "Parsing $2 reads from $contig ...\n" if $verbose;

            $contigs{$contig} = "$fileAddr";
            $fileAddr = tell(_IFILE);
            $contigs{$contig} .= ",$fileAddr";

            while( defined $line && $line =~ /\S/ ) {
                $fileAddr = tell(_IFILE);
                $line = <_IFILE>;
            }
            $contigs{$contig} .= ",$fileAddr";
            while ( $line !~ /^BQ\s*$/ ) {
                $line = <_IFILE>;
            }
            $fileAddr = tell(_IFILE);
            $contigs{$contig} .= ",$fileAddr";
            $line = <_IFILE>;
            while( defined $line && $line =~ /\d/ ) {
                $fileAddr = tell(_IFILE);
                $line = <_IFILE>;
            }
            $contigs{$contig} .= ",$fileAddr";
        }
        if ( defined $line && $line =~ /^AF (\S+) \S+ \S+/ ) {
            $contigs{$contig} .= ",$fileAddr";
            $readAttribs{$1} = $contig if !defined $readAttribs{$1};
            $readAttribs{$1} .= ",$fileAddr";
            $fileAddr = tell(_IFILE);
            $readAttribs{$1} .= ",$fileAddr";
            $line = <_IFILE>;
            while( defined $line && $line =~ /^AF (\S+) \S+ \S+/ ) {
                $readAttribs{$1} = $contig if !defined $readAttribs{$1};
                $readAttribs{$1} .= ",$fileAddr";
                $fileAddr = tell(_IFILE);
                $readAttribs{$1} .= ",$fileAddr";
                $line = <_IFILE>;
            }
            $contigs{$contig} .= ",$fileAddr";
        }
        if ( defined $line && $line =~ /^BS / ) {
            $contigs{$contig} .= ",$fileAddr";
            $line = <_IFILE>;
            if ( defined $line && $line =~ /^BS / ) {
                while( defined $line && $line =~ /^BS / ) {
                    $fileAddr = tell(_IFILE);
                    $line = <_IFILE>;
                }
            } else {
                $fileAddr = tell(_IFILE);
            }
            $contigs{$contig} .= ",$fileAddr";
        }
        if ( defined $line && $line =~ /^RD (\S+) \S+/ ) {
            $readName = $1;
            $readAttribs{$readName} .= ",$fileAddr";
            $fileAddr = tell(_IFILE);
            $readAttribs{$readName} .= ",$fileAddr";

            while( defined $line && $line =~ /\S/ ) {
                $fileAddr = tell(_IFILE);
                $line = <_IFILE>;
            }
            $readAttribs{$readName} .= ",$fileAddr";

            %foundThisReadTag = ();

            $fileAddr = tell(_IFILE);
            $line = <_IFILE>;
            if ( defined $line && $line =~ /^QA \-*\d+/ ) {
                $readAttribs{$readName} .= ",$fileAddr";
            } else {
                $readAttribs{$readName} .= ",NULL";
            }
            $fileAddr = tell(_IFILE);
            $line = <_IFILE>;
            if ( defined $line && $line =~ /^DS CHROMAT_FILE/ ) {
                $readAttribs{$readName} .= ",$fileAddr";
            } else {
                $readAttribs{$readName} .= ",NULL";
            }

        }
        if ( defined $line && $line =~ /^RT{/ ) {
            my $desc = "";
            my $read = "";
            my $tag = "";

            $readAttribs{$readName} .= ",$fileAddr";
            $line = <_IFILE>;
 
            if ( defined $line && $line !~ /^}/ ) {
                if ( $line =~ /\w/ ) {
                    ($read, $tag) = split /\s+/, $line;
                    if ( length $read && length $tag ) {
                        $readAttribs{$readName} .= ",$tag";
                        push @readTagTypes, $tag if !$foundAllReadTag{$tag}++;
                    }
                }
                $line = <_IFILE>;
                while ( defined $line && $line !~ /^}/ ) {
                    $line = <_IFILE>;
                }
            }
        } elsif ( defined $line && $line =~ /^([^\{]+)\{/ ) {
            my $tag = $1;
            my $desc = "";
            $line = <_IFILE>;
            while( defined $line && $line !~ /^}/ ) {
                $desc .= $line if $line =~ /\S/;
                $line = <_IFILE>;
            }
            chop $desc;
            push @{$aceTags{$tag}}, $desc;
        }
        $fileAddr = tell(_IFILE);
    }
    close _IFILE;

    return [ $file, \%contigs, \%readAttribs, \@readTagTypes, \%aceTags ];

}

sub contig {

=head
  This subroutine reassigns the contig attributes stored in an array to readable names 
  stored in hash.  

  ARGUMENTS:
  $contig = contig number. (required)

  RETURNS:
  The return value is a reference to a hash with the following keys:
 
  number = contig number
  size = size (untrimmed)
  size_unpadded = unpadded size (untrimmed)
  sequence = sequence
  sequence_unpadded = unpadded sequence
  totalReads = total number of reads in the contig
  quality = quality score
  CO_tag = line of CO tag
=cut

    my ($self, $contig) = @_;

    my $seq="";
    my $qual="";
    my $line="";
    my $ref="";
    my $bsTag="";
    my $afTag="";
    my %attrib=();
    $attrib{number}="";
    $attrib{size}="";
    $attrib{totalReads}="";
    $attrib{sequence}="";
    $attrib{sequence_unpadded}="";
    $attrib{quality}="";
    $attrib{size_unpadded}="";
    $attrib{CO_tag}="";
    $attrib{BS_tag}="";
    $attrib{AF_tag}="";

    return $ref if !defined $self->[1]->{$contig};

    my @addrs = split /,/, $self->[1]->{$contig};

    open _IFILE, $self->[0] or die "Cannot read ace file.\n";

    $line = _read_line_by_addr( *_IFILE, $addrs[0], $addrs[1] ) if @addrs>3;
    if ( defined $line && $line =~ /^CO (\S+) (\S+) (\S+)/ ) {
        $attrib{CO_tag}=$line;
        $attrib{number}=$1 if length $1;
        $attrib{size}=$2 if length $1;
        $attrib{totalReads}=$3 if length $1;
    }

    $seq = _read_line_by_addr( *_IFILE, $addrs[1], $addrs[2] ) if @addrs>2;
    $seq =~ s/\n//g;
    $qual = _read_line_by_addr( *_IFILE, $addrs[3], $addrs[4] ) if @addrs>4;
    $qual =~ s/\n//g;
    $afTag = _read_line_by_addr( *_IFILE, $addrs[5], $addrs[6] ) if @addrs>6;
    $bsTag = _read_line_by_addr( *_IFILE, $addrs[7], $addrs[8] ) if @addrs>8;

    close _IFILE;

    my $seqUnpad=$seq;
    $seqUnpad =~ s/\*//g;
    $attrib{sequence}=$seq if length $seq;
    $attrib{sequence_unpadded}=$seqUnpad if length $seqUnpad;
    $attrib{quality}=$qual if length $qual;
    $attrib{size_unpadded}=length($seqUnpad) if length $seqUnpad;
    $attrib{BS_tag}=$bsTag if length $bsTag;
    $attrib{AF_tag}=$afTag if length $afTag;
    $ref = {%attrib} if %attrib;

    return $ref;

}

sub getreads {

=head
  This subroutine returns either a scalar or array of references to a hash
  containing values of reads within the ace file.
  
  ARGUMENTS:
  %args = hash with the following keys/value pair to filter the read list (optional).
  If none passed, all reads will be returned.
 
  -contig = contig number
  -name = read name (regular expression accepted)
  -tag = ace tag type as defined in RT{} block.
            (regular expression accepted) 
            (e.g., 'matchElswhereHighQual', 'chimera', 'G_dropout', ...)
  -unpadded = set to 0 to skip conversion of padded to unpadded read positions.
  -verbose = set to 1 to print progress to STDOUT.

  RETURNS:
  Returns a reference to a hash containing the attributes of a given read name.

  The returned referent is a hash with the following key/value pair:
    name = read name
    complement = read complement (C||U)
    contig = contig number the read belongs in
    start = starting location of read
    stop = end location of read
    sequence = sequence of the read
    size = size of read
    tag = RT tag values 
    AF_tag = AF tag line
    RD_tag = RD tag line
    QA_tag = QA tag line
    DS_tag = DS tag line

    # additional key/value pair only if -unpadded is set to 1:
    start_unpadded = unpadded starting location of read
    stop_unpadded = unpadded end location of read
    sequence_unpadded = unpadded sequence of the read
    size_unpadded = unpadded size of read
    align_start = starting position of read aligned to consensus
    align_stop = ending position of read aligned to consensus
=cut

    my ($self, %args) = @_;

    my @reads=();
    my @wantReads=();
    my %contigSeqs=();
    my $ct=0;
    my $verbose=defined $args{-verbose}?$args{-verbose}:0;
    my $computeUnpadPos=defined $args{-unpadded}?$args{-unpadded}:1;

    delete $args{-verbose} if defined $args{-verbose};
    delete $args{-unpadded} if defined $args{-unpadded};

    # get reads by tag type.
    #
    if ( defined $args{-tag} ) {
        my @lookups = @wantReads ? @wantReads : keys %{$self->[2]};
        foreach my $read ( @lookups ) {
            my @tags = split /,/,$self->[2]->{$read};
            if ( @tags > 6 ) {
                @tags = @tags[0,6..$#tags];
                my $contig=shift @tags;
                if ( defined $args{-contig} ) {
                    next if $contig ne $args{-contig};
                    push @wantReads, $read if grep /^$args{-tag}$/, @tags;
                } else {
                    push @wantReads, $read if grep /^$args{-tag}$/, @tags;
                }
            }
        }
    }
    # get reads by contig.
    #
    if ( defined $args{-contig} && !@wantReads ) {
        my $contig = $args{-contig};

        # get reads for this contig by parsing the 'AF' tags
        #
        if ( defined $self->[1]->{$contig} ) {
            open _IFILE, $self->[0] or die "Cannot read ace file.\n";
            my @addrs = split/,/, $self->[1]->{$contig};
            if ( @addrs > 6 ) {
                @wantReads = map /^AF (\S+)/,
                            split /\n/, 
                            _read_line_by_addr( *_IFILE, $addrs[5], $addrs[6] );
            }
            close _IFILE;
        }
    }
    # get reads by name (regex accepted).
    #
    if ( defined $args{-name} ) {
        if ( @wantReads ) {
            @wantReads = grep /$args{-name}/, @wantReads;
        } elsif ( defined %{$self->[2]} ) {
            push @wantReads, grep /$args{-name}/, keys %{$self->[2]};
        }
    }
    # get all reads if no filters defined.
    #
    if ( !%args && defined %{$self->[2]} ) {
        @wantReads = keys %{$self->[2]};
    }

    print "Defining attributes for ",scalar(@wantReads)," reads ...\n" if $verbose;

    # If unpadded positions are specified, then we need to convert pad locs of each
    # read to unpad locs.  This process may take considerable time if the contig and/or
    # number of reads are large.
    #
    if ( $computeUnpadPos ) {
        @reads = _computeUnpadPositions($self,\@wantReads,{-verbose=>$verbose});
        $self->[6] = [];
    } else {
        foreach my $name ( @wantReads ) {
            push @reads, $self->read($name);
        }
    }

    if ( wantarray ) {
        return @reads;
    } elsif ( defined $reads[0] ) {
        return $reads[0];
    } else {
        return @reads;
    }

}

sub fetchreads {

=head
  This subroutine prepares a list of reads to be returned by the fetch method.
  
  ARGUMENTS:
  %args = hash with the following keys/value pair to filter the read list (optional).
  If none passed, all reads will be returned.
 
  -contig = contig number
  -name = read name (regular expression accepted)
  -tag = ace tag type as defined in RT{} block.
            (regular expression accepted) 
            (e.g., 'matchElswhereHighQual', 'chimera', 'G_dropout', ...)

=cut

    my ($self, %args) = @_;

    my @reads=();
    my @wantReads=();
    my %contigSeqs=();

    # get reads by tag type.
    #
    if ( defined $args{-tag} ) {
        my @lookups = @wantReads ? @wantReads : keys %{$self->[2]};
        foreach my $read ( @lookups ) {
            my @tags = split /,/,$self->[2]->{$read};
            if ( @tags > 6 ) {
                @tags = @tags[0,6..$#tags];
                my $contig=shift @tags;
                if ( defined $args{-contig} ) {
                    next if $contig ne $args{-contig};
                    push @wantReads, $read if grep /^$args{-tag}$/, @tags;
                } else {
                    push @wantReads, $read if grep /^$args{-tag}$/, @tags;
                }
            }
        }
    }
    # get reads by contig.
    #
    if ( defined $args{-contig} && !@wantReads ) {
        my $contig = $args{-contig};

        # get reads for this contig by parsing the 'AF' tags
        #
        if ( defined $self->[1]->{$contig} ) {
            open _IFILE, $self->[0] or die "Cannot read ace file.\n";
            my @addrs = split/,/, $self->[1]->{$contig};
            if ( @addrs > 6 ) {
                @wantReads = map /^AF (\S+)/,
                            split /\n/, 
                            _read_line_by_addr( *_IFILE, $addrs[5], $addrs[6] );
            }
            close _IFILE;
        }
    }
    # get reads by name (regex accepted).
    #
    if ( defined $args{-name} ) {
        if ( @wantReads ) {
            @wantReads = grep /$args{-name}/, @wantReads;
        } elsif ( defined %{$self->[2]} ) {
            push @wantReads, grep /$args{-name}/, keys %{$self->[2]};
        }
    }
    # get all reads if no filters defined.
    #
    if ( !%args && defined %{$self->[2]} ) {
        @wantReads = keys %{$self->[2]};
    }

    $self->[5] = \@wantReads;
    
}

sub fetch {

=head
    Returns the reference of the next read's attributes.  Must call fetchreads
    first.
=cut

    my $self = shift;
    my %args = @_;

    if ( !@{$self->[5]} ) {
        $self->[6] = [];
        return '';
    }
    
    my $computeUnpadPos=defined $args{-unpadded}?$args{-unpadded}:1;
    
    my $readName = shift @{$self->[5]};
    my $readData = {};
    
    if ( $computeUnpadPos ) {
        ($readData) = _computeUnpadPositions($self,[$readName]);
    } else {
        $readData = $self->read($readName);
    }
    
    return $readData;

}

sub _computeUnpadPositions {

=head
    Converts padded start and stop position of each read to unpadded position and
    returns array of read attributes including the unpadded positions as well as
    alignment positions.

    ARGUMENTS:
    $readlist = reference to array containing the read names to define attributes.
    $args = reference to hash for optional features.  The following features are
      available:
      -verbose=>1  display message showing progress of the computations.

    RETURNS:
    Hash containing the following key/value pairs:
      start = padded start position of read tag
      stop = padded stop position of read tag
      start_unpadded = unpadded start position of read tag
      stop_unpadded = unpadded stop position of read tag
      align_start = start of alignment position of read tag
      align_stop = end of alignment position of read tag
      read = name of read with this tag
      type = type of read tag (e.g., 'matchElsewhereHighQual', 'chimera', ...)
=cut

    my ($self,$readlist,$args) = @_;

    my @wantReads = @{$readlist};
    my %args = defined $args ? %{$args} : ();
    my $verbose = $args{-verbose} ? 1:0;

    my $ct=0;
    my %displayed=();
    my @retReads=();
    my $unpadObject = PGF::Parsers::ParseAce::Unpad->new();

    # first, we need to store reads by contig.
    #
    my %contigReads = ();
    foreach my $name ( @wantReads ) {
        if ( defined $self->[2]->{$name} ) {
            my $contig = (split /,/,$self->[2]->{$name})[0];
            push @{$contigReads{$contig}}, $self->read($name);
        }
    }

    # Now, loop thru each contig and compute the unpadded positions for
    # each read within the contig.
    #
    foreach my $contig ( keys %contigReads ) {
            
        next if !@{$contigReads{$contig}};
        
        if ( defined $self->[6][0] && $self->[6][0] eq $contig ) {
            $unpadObject = $self->[6][1];
        } else {
            # get contig sequence (padded).
            #
            my $contigSeq = "";
            my @addrs = split /,/,$self->[1]->{$contig};
            if ( @addrs > 2 ) {
                open _IFILE, $self->[0] or die "Cannot read ace file.\n";
                $contigSeq = _read_line_by_addr( *_IFILE, $addrs[1], $addrs[2] );
                $contigSeq =~ s/\n//g;
                close _IFILE;
            }

            $unpadObject->use($contigSeq);
            $self->[6] = [$contig,$unpadObject];
        }

        my @reads = @{$contigReads{$contig}};

        print "Creating unpadded hash table for $contig ...\n" if $verbose;
        print "done.\n" if $verbose;

        # now loop thru each read and calculate unpadded positions.
        #
        foreach my $read ( @reads ) {

            # Store unpadded position and sequence in hash reference.
            #
            my $readStart = $read->{start};
            my $readStop = $read->{stop};
            my $alignStart = $read->{align_clipping_start} + $readStart - 1;
            my $alignStop = $read->{align_clipping_stop} + $readStart - 1;
            my $unpadStart = $unpadObject->padToUnpad($readStart);
            my $unpadStop = $unpadObject->padToUnpad($readStop);
            my $unpadAlignStart = $unpadObject->padToUnpad($alignStart);
            my $unpadAlignStop = $unpadObject->padToUnpad($alignStop);
            my $readSeq = $read->{sequence};
            $readSeq =~ s/\*//g;

            $unpadAlignStart = 1 if $unpadAlignStart == -1;
            $unpadAlignStop = 1 if $unpadAlignStop == -1;

            # store attributes.
            $read->{sequence_unpadded}=$readSeq;
            $read->{size_unpadded}=length($readSeq);
            $read->{start_unpadded}=$unpadStart;
            $read->{stop_unpadded}=$unpadStop;
            $read->{align_start}=$unpadAlignStart;
            $read->{align_stop}=$unpadAlignStop;

            push @retReads, $read;
    
            if ( $verbose ) {
                my $pct=++$ct/scalar(@wantReads)*100;
                if ( $pct > 1 && int($pct)%10==0 && !$displayed{int($pct)}++) {
                    print sprintf(" %3d", int($pct)),"% complete ... $ct reads processed.\n";
                }
            }
        }
    }

    return @retReads;

}

sub readTagPosition {

=head
    Parses a read tag and returns the padded unpadded positions of the tag and the
    read and type of the tag.

    ARGUMENTS:
    $tag = string containing read tag. (required)
    $H_args = reference to hash with the following optional key/values: (optional)
              -read = reference to hash containing read attributes (obtained from the getreads or 
                     read method. (speeds up processing by not having to look up these values for 
                     the read)
              -contig_sequence = sequence of contig the read is in (speeds up processing by not
                     having to look up contig sequence)

    RETURNS:
    Hash containing the following key/value pairs:
      start = padded start position of read tag
      stop = padded stop position of read tag
      start_unpadded = unpadded start position of read tag
      stop_unpadded = unpadded stop position of read tag
      read = name of read with this tag
      type = type of read tag (e.g., 'matchElsewhereHighQual', 'chimera', ...)
=cut
    
    my ($self, $tag, $H_args) = @_;

    return $tag unless ( $tag =~ /^(\S+) (\S+) \S+ (\d+) (\d+)/ ); 

    my %attrib=();
    $attrib{start}="";
    $attrib{stop}="";
    $attrib{start_unpadded}="";
    $attrib{stop_unpadded}="";
    $attrib{type}="";
    $attrib{read}="";

    my $read=$1;
    my $type=$2;
    my $tagStart=$3;
    my $tagStop=$4;
    my %args = defined %$H_args ? %$H_args : ();
    my %readAttribs = ();
    my $unpadStart=0;
    my $padStart=0;
    my $contigSeq="";
    my $numStartPads=0;
    my $numStopPads=0;
    my $contig=0;

    if ( !defined $args{-read} ) {
        $args{-read}=$self->read($read,{-unpadded=>1});
        return %attrib if !length $args{-read}; 
    }

    # If reference to hash containing read attributes is passed using the -read 
    # parameter, then get contig, padded start and unpadded start positions 
    # from the attributes.
    #
    if ( defined $args{-read} && defined %{$args{-read}} ) {
        $padStart=$args{-read}->{start}; 
        $contig=$args{-read}->{contig}; 

        if ( length $args{-read}->{start_unpadded} ) {
            $unpadStart=$args{-read}->{start_unpadded};
        } elsif ( defined $args{-contig_sequence} ) {
            my $ref=$self->unpadded( $args{-read}, {-contig_sequence=>$args{-contig_sequence}} );
            $unpadStart=$ref->{start_unpadded};
        } else {
            my $ref=$self->unpadded( $args{-read} );
            $unpadStart=$ref->{start_unpadded};
        }
    }

    # If contig sequence is passed using the -contig_sequence parameter, use sequence
    # passed for counting pads.
    #
    if ( defined $args{-contig_sequence} ) {
        $contigSeq=$args{-contig_sequence};

    # Otherwise, need to look up contig sequence for counting pads.
    #
    } else {
        my $ref=$self->contig($contig);
        $contigSeq=$ref->{sequence};
    }

    $unpadStart=1 if $unpadStart<1;
    $padStart=1 if $padStart<1;

    # Calculate number of pads before start of tag position.
    #
    my $seq=substr($contigSeq,$padStart-1,$tagStart-1);
    $numStartPads=$seq =~ tr/\*/\*/ if defined $seq;

    # Calculate number of pads within the tag position.
    #
    if ( $padStart+$tagStart-1 > length($contigSeq) ) {
        $numStopPads=0;
    } elsif ( $padStart+$tagStart-1+($tagStop-$tagStart) > length($contigSeq)) {
        my $seq=substr($contigSeq,$padStart+$tagStart-2,length($contigSeq)-($padStart+$tagStart-1));
        $numStopPads=$seq =~ tr/\*/\*/ if defined $seq;
    } else {
        $seq=substr($contigSeq,$padStart+$tagStart-2,$tagStop-$tagStart+1);
        $numStopPads=$seq =~ tr/\*/\*/ if defined $seq;
    }

    # Compute padded start and stop positions of the tag.
    #
    my $startUnpadded=$unpadStart+$tagStart-$numStartPads-1;
    my $stopUnpadded=$unpadStart+$tagStop-$numStartPads-$numStopPads-1;

    $attrib{start}=$tagStart;
    $attrib{stop}=$tagStop;
    $attrib{type}=$type;
    $attrib{start_unpadded}=$startUnpadded;
    $attrib{stop_unpadded}=$stopUnpadded;
    $attrib{read}=$read;

    return %attrib;
}

sub read {

=head
  Parameters: 
    $name = read name
    $H_args = reference to hash with the following optional key/values:
              -unpadded  = returns additional unpadded and alignment values.
              -contig_sequence = sequence of contig (unpadded calculations 
                 run faster if this is supplied).

  Returns a reference to a hash containing the attributes of a given read name.

  The returned referent is a hash with the following key/value pair:
    name = read name
    complement = read complement (C||U)
    contig = contig number the read belongs in
    start = starting location of read
    stop = end location of read
    sequence = sequence of the read
    size = size of read
    tag = RT tag values 
    align_clipping_start = alignment clipping start (padded);
    align_clipping_stop = alignment clipping stop (padded);
    AF_tag = AF tag line
    RD_tag = RD tag lines
    QA_tag = QA tag line
    DS_tag = DS tag line

  Additional tags that are defined if using the -unpadded option.

    start_unpadded = unpadded starting location of read
    stop_unpadded = unpadded end location of read
    sequence_unpadded = unpadded sequence of the read
    size_unpadded = unpadded size of read
    align_start = starting position of read aligned to consensus
    align_stop = ending position of read aligned to consensus
=cut

    my ($self, $name, $H_args) = @_;

    my %args = %$H_args if defined $H_args;

    my %attrib=();
    $attrib{name}="";
    $attrib{complement}="";
    $attrib{sequence}="";
    $attrib{size}="";
    $attrib{start}="";
    $attrib{stop}="";
    $attrib{sequence_unpadded}="";
    $attrib{size_unpadded}="";
    $attrib{start_unpadded}="";
    $attrib{stop_unpadded}="";
    $attrib{align_start}="";
    $attrib{align_stop}="";
    $attrib{contig}="";
    $attrib{tag}="";
    $attrib{AF_tag}="";
    $attrib{RD_tag}="";
    $attrib{QA_tag}="";
    $attrib{DS_tag}="";
    $attrib{align_clipping_start}=0;
    $attrib{align_clipping_stop}=0;

    return \%attrib if !defined $self->[2]->{$name};

    # Array index of addrs:
    #
    # 0-startingFileAddr_of_AF_tag,
    # 1-endingFileAddr_of_AF_tag,
    # 2-startingFileAddr_of_RD_tag,
    # 3-endingFileAddr_RD_tag,
    # 4-ending_FileAddr_of_sequence
    # 5-FileAddr_of_QA_tag,
    # 6-FileAddr_of_DS_tag,
    # 7-startingFileAddr_of_RT_tag,
    # 8-readTagTypes"

    my $line;
    my @addrs = split /,/,$self->[2]->{$name};
    my $contig = shift @addrs;

    $attrib{contig}=$contig if defined $contig;

    open _IFILE, $self->[0] or die "Cannot read ace file.\n";

    # get AF tag.
    #
    if ( @addrs > 1 && $addrs[0] =~ /^\d+$/ && $addrs[1] =~ /^\d+$/ ) {
        $line = _read_line_by_addr( *_IFILE, $addrs[0], $addrs[1] );
        if ( defined $line && $line =~ /^AF (\S+) (\S+) (\S+)/ ) {
            chomp $line;
            $attrib{name}=$1;
            $attrib{complement}=$2;
            $attrib{start}=$3;
            $attrib{AF_tag}=$line;
        }
    }

    # get RD tag.
    #
    if ( @addrs > 3 && $addrs[2] =~ /^\d+$/ && $addrs[3] =~ /^\d+$/ ) {
        $line = _read_line_by_addr( *_IFILE, $addrs[2], $addrs[3] );
        if ( defined $line && $line =~ /^RD/ ) {
            chomp $line;
            $attrib{RD_tag}=$line;
        }
    }

    # get QA tag.
    #
    if ( @addrs > 5 && $addrs[5] =~ /^\d+$/ ) {
        seek(_IFILE,$addrs[5],0);
        $line = <_IFILE>;
        if ( defined $line && $line =~ /^QA/ ) {
            chomp $line;
            $attrib{QA_tag}=$line;
            if ( $line =~ /^QA \-*\d+ \-*\d+ (\-*\d+) (\-*\d+)/ ) {
                $attrib{align_clipping_start}=$1;
                $attrib{align_clipping_stop}=$2;
            } else {
            }
        }
    }

    # get DS tag.
    #
    if ( @addrs > 6 && $addrs[6] =~ /^\d+$/ ) {
        seek(_IFILE,$addrs[6],0);
        $line = <_IFILE>;
        if ( defined $line && $line =~ /^DS/ ) {
            chomp $line;
            $attrib{DS_tag}=$line;
        }
    }

    # get sequence and optionally, unpadded positions, alignment positions and unpadded 
    # sequence size.
    #
    if ( @addrs > 4 && $addrs[3] =~ /^\d+$/ && $addrs[4] =~ /^\d+$/ ) {
        $line = _read_line_by_addr( *_IFILE, $addrs[3], $addrs[4] );

        if ( defined $line ) {
            $line =~ s/\n//g;
            my $readSeq=$line;

            $attrib{sequence}=$readSeq;
            $attrib{size}=length($readSeq);
            $attrib{stop}=length($readSeq) + $attrib{start} - 1;
        }
    }

    # get lines in RT{} blocks.
    #
    if ( @addrs > 7 && $addrs[7] =~ /^\d+$/ ) {
        my $desc="";
        seek(_IFILE,$addrs[7],0);
        $line = <_IFILE>;
        while ( defined $line && $line !~ /^RD/ && $line !~ /^CO/ ) {
            if ( defined $line && $line =~ /^RT{/ ) {
                $line = <_IFILE>;
                while( defined $line && $line !~ /^}/ ) {
                    $desc .= $line if ( $line =~ /\w/ );
                    $line = <_IFILE>;
                }
                $attrib{tag}=$desc;
            }
            $line = <_IFILE>;
        }
    }

    close _IFILE;


    # determine unpadded sequence and positions if $args{-unpadded} is specified.
    #
    # Note: this may slow the execution time, especially if the contig is large due to the manual 
    # removal of the padded character '*' from the sequence of the contig based on location of the 
    # read and re-calculating the positions of the read. 
    #
    if ( defined $args{-unpadded} && $args{-unpadded} ) {
        my $ref = $self->unpadded( \%attrib, \%args );
        %attrib = %$ref;
    }
    
    return \%attrib;

}

sub unpadded {

=head
  Calculates unpadded start, stop, alignment start and alignment stop positions of a given
  read attribute.  The argument is a reference to a hash containing the read attributes 
  obtained by calling the getreads or read method.  The return value is a reference to a hash
  containing the original hash elements plus the following:
    
  ARGUMENTS: 
  $read = reference to hash containing read attributes obtained from the getreads or read
          method (required).
  $H_args = reference to hash with the following optional key/values: (optional)
              -contig_sequence = padded sequence of contig (unpadded calculations 
                 run faster if this is supplied).

  RETURNS:
  Reference to hash from $read plus the following key/value pairs:
    start_unpadded = unpadded starting location of read
    stop_unpadded = unpadded end location of read
    sequence_unpadded = unpadded sequence of the read
    size_unpadded = unpadded size of read
    align_start = starting position of read aligned to consensus
    align_stop = ending position of read aligned to consensus
=cut

    my ( $self, $read, $H_args ) = @_;

    return $read if !defined %{$read};

    my %args = defined %$H_args ? %$H_args:();

    my %attrib = %{$read};

    my $contigSeq = "";

    if ( defined $args{-contig_sequence} && length $args{-contig_sequence} ) {
        $contigSeq = $args{-contig_sequence};

    # get contig sequence from ace file if not present.
    # Need this to calculate unpadded positions of the read.
    #
    } else {
        my $contig = $attrib{contig};

        open _IFILE, $self->[0] or die "Cannot read ace file.\n";

        my ($addrContigSeq) = (split /,/,$self->[1]->{$contig})[1];
        seek(_IFILE,$addrContigSeq,0);
        my $line = <_IFILE>;
        while( defined $line && $line =~ /\S/ ) {
            chomp $line;
            $contigSeq .= $line;
            $line = <_IFILE>;
        }
        close _IFILE;
    }
    my $readStart=$attrib{start};
    my $readStop=$attrib{stop};
    my $paddedStart=0;
    my $paddedStop=0;
    my $unpadStart=$attrib{start};
    my $unpadStop=$readStop;
    my $size = $attrib{size};

    $size += $readStart if $readStart<1;
    $readStart=1 if $readStart<1;
    $readStop=1 if $readStop<1;

    # calculate unpadded starting and ending positions of the read.
    #
    my $cutContig = substr($contigSeq,0,$readStart);
    $paddedStart = $cutContig =~ tr/\*/\*/ if defined $cutContig;
    $unpadStart = $readStart-$paddedStart if $paddedStart;

    $size = length($contigSeq)-$readStart+1 if $size > length($contigSeq)-$readStart+1;
    if ( $size > 0 ) {
        $cutContig = substr($contigSeq,$readStart-1,$size);
        $paddedStop = $cutContig =~ tr/\*/\*/ if defined $cutContig;
        $unpadStop = $readStop-$paddedStart-$paddedStop if $paddedStop;
    } else {
        print "could not calculate unpadded positions for ",$read->{readName},"!\n";
    }

    my $readSeq = $attrib{sequence};
    $readSeq =~ s/\*//g;

    my $alignClippingStart=0;
    my $alignClippingStop=0;
    my $paddedReadLength=0;

    if ( $attrib{QA_tag} =~ /^QA \d+ \d+ (\d+) (\d+)/ ) {
        $alignClippingStart=$1;
        $alignClippingStop=$2;
    }
    if ( $attrib{RD_tag} =~ /^RD \S+ (\d+)/ ) {
        $paddedReadLength=$1;
    }

    my $alignStart = $readStart + $alignClippingStart - 1;
    my $alignStop = $readStart + $alignClippingStop - 1;

    if ( $unpadStart == 0 ) {
        $alignStart--;
        $alignStop--;
    } elsif ( $unpadStart < 0 ) {
        $alignStart += $unpadStart - 1;
        $alignStop += $unpadStart - 1;
    }

    $cutContig = substr($contigSeq,0,$alignStart);
    my $numOfPads = $cutContig =~ tr/\*/\*/ if defined $cutContig;
    $alignStart -= $numOfPads;

    $cutContig = substr($contigSeq,0,$alignStop);
    $numOfPads = $cutContig =~ tr/\*/\*/ if defined $cutContig;
    $alignStop -= $numOfPads;

    $attrib{sequence_unpadded}=$readSeq;
    $attrib{size_unpadded}=length($readSeq);
    $attrib{start_unpadded}=$unpadStart;
    $attrib{stop_unpadded}=$unpadStop;
    $attrib{align_start}=$alignStart;
    $attrib{align_stop}=$alignStop;

    return \%attrib;

}

sub getTags {

=head
    Returns all tag types that are found in the ace file.  Tag types correspond
    to blocks enclosed in {} brackets.  Examples: RT{}, CT{}, WA{}.  The tag names
    for this example would be RT, CT, WA. 

    ARGUMENTS:
    none

    RETURNS:
    Array containing all tag types.
=cut

    my $self = shift;

    if ( defined %{$self->[4]} ) {
        return keys %{$self->[4]};
    } else{
        return;
    }

}

sub tag {

=head
  This subroutine returns an array containing the data within the ace tags.
  Each element stores a string consisting of the data within the tag.

  ARGUMENTS:
  Array of tag types (optional).

  Pass in the tag type ("CT" and/or "RT") in list format to return specific ace tags 
  you would like to retrieve (required).  If no values are passed, all ace tags are
  returned.

  RETURNS:
  Array containig data within the ace tags.
=cut

    my ($self, @types) = @_;

    my @tagValues=();
    my %tags=();
    @tags{@types}=(1)x@types if @types;

    foreach my $type ( @types ) {
        if ( defined @{$self->[4]->{$type}} ) {
            push @tagValues, @{$self->[4]->{$type}};
        }
    }

    return @tagValues;
}

sub getReadTags {

=head
  This subroutine returns an array containing the read tag names that are found in
  the RT{} block in the ace file.  Examples of tag names include 'matchElswhereHighQual',
  'chimera', ...

   ARGUMENTS:
   none

   RETURNS:
   Array containing read tag names found in the read tag block.
=cut

    my $self = shift;

    if ( defined @{$self->[3]} ) {
        return @{$self->[3]};
    } else {
        return;
    }

}

sub totalContigs {

=head
  Returns the total number of contigs found in the ace file.

  ARGUMENTS:
  none

  RETURNS:
  scalar containing total number of contigs.
=cut

    my $self=shift;

    return scalar keys %{$self->[1]};

}

sub allContigs {

=head
  Returns an array containing all contig numbers found in the ace file.

  ARGUMENTS:
  none

  RETURNS;
  Array containing all contigs sorted in ascending numerical order.
=cut

    my $self=shift;

    return _sortalphanum( keys %{$self->[1]} );

}

sub _read_line_by_addr {

=head
  Returns data from a file handle for two given bytes of data.
  The first byte is the length up to the starting location to parse the data.
  The second byte is the length of the ending location to parse the data.

  ARGUMENT:
  $FH = file handle to opened ace file. (required)
  $addr1 = starting file address to extract data (required)
  $addr2 = endind file address to extract data (required)
 
=cut

    my ($FH,$addr1,$addr2) = @_;

    my $line="";

    return $line if $addr2<$addr1;

    seek($FH,$addr1,0);
    CORE::read($FH,$line,$addr2-$addr1);
    return $line;

}

sub _sortalphanum {

=head
  Returns a list sorted alphanumerically.
=cut

    my ( @datas ) = @_;

    my $formatted;
    my @tosort;

    foreach (@datas) {
        if ( /\d+/ ) {
            ($formatted = $_) =~ s/(\d+)/sprintf( "%010d", $1 )/eg;
        } else {
            $formatted = $_;
        }
        push @tosort, [ $_, $formatted ];
    }

    my @sorted = map { $_->[0] }
                 sort { $a->[1] cmp $b->[1] }
                 map { [ $_->[0], $_->[1] ] } @tosort;


    return @sorted;

}


1;