package PGF::Utilities::FastaSeq; =head1 NAME PGF::Utilities::FastaSeq; Contains methods to manipulate fasta and quality sequences =head1 VERSION $Revision: 1.3 $ $Date: 2009-10-08 16:50:18 $ =head1 SYNOPSIS use PGF::Utilities::FastaSeq qw(formatSeq); my $seq = "ACGTACGTACGTACGT"; my $qual = "10 10 10 10 20 20 20 20 30 30 30 30 40 40 40 40"; my $formatSeq = formatSeq($seq,4); # first parameter=sequence, # second parameter=length of chopped # fragment # optional third parameter=delimiter # (default='\n') my $formatQual = formatSeq($qual,4); # same as above, replacing first # parameter with quality print $formatSeq; # prints: # ACGT # ACGT # ACGT # ACGT print $formatQual; # prints: # 10 10 10 10 # 20 20 20 20 # 30 30 30 30 # 40 40 40 40 Note that $formatSeq and $formatQual does not contain a newline at the end of the string. =head1 DESCRIPTION =head1 AUTHOR(S) Stephan Trong =head1 COPYRIGHT DOE Joint Genome Institute Microbial Genomics Copyright (C) 2007 The Regents of the University of California All rights reserved. NOTICE: The Government is granted for itself and others acting on its behalf a paid-up, nonexclusive irrevocable worldwide license in this data to reproduce, prepare derivative works, and perform publicly and display publicly. Beginning five (5) years after permission to assert copyright is granted, subject to two possible five year renewals, the Government is granted for itself and others acting on its behalf a paid-up, non-exclusive, irrevocable worldwide license in this data to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. =head1 HISTORY =over =item * Stephan Trong 03/08/2007 Creation =back =cut #============================================================================# use strict; use warnings; use Carp; require Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw( formatSeq ); our %EXPORT_TAGS = ( all=>[ qw( formatSeq ) ] ); #============================================================================# sub formatSeq { my $seq = shift; my $length = shift; my $delimiter = shift || "\n"; # if $seq are quality scores ... # if ( $seq =~ /\d+/ ) { my $regex = "(?:\\d+\\s+){".($length-1)."}\\d+"; my $ct = 1; my $columnify = ''; my $subSeq = $seq; while ( $subSeq =~ /^\s*($regex)/ ) { $columnify .= "$1$delimiter"; $subSeq = $'; } if ( length $columnify ) { my $newSeq = $columnify; if ( length $subSeq ) { $subSeq =~ s/^\s+//; $newSeq .= $subSeq; } $seq = $newSeq; } # if $seq are nucleotides ... # } else { $seq =~ s/(.{$length})/$1$delimiter/g; } $seq =~ s/^\s+|\s+$//g; return $seq; } #============================================================================# 1;