package PGF::Newbler::Scaffolds454; =head1 NAME Scaffolds454 - Parse and read 454Scaffolds.txt files =head1 SYNOPSYS use PGF::Newbler::Scaffolds454; # Parse Scaffolds.txt and create object # my $scaffObj = Scaffolds454->new("./data/454Scaffolds.txt"); # within the $scaff object, there is access to the following methods: $scaffObj->getContigs() $scaffObj->scaffoldLength(...) $scaffObj->allScaffolds() $scaffObj->totalScaffolds() $scaffObj->getGaps() $scaffObj->getContigScaffold(...); $scaffObj->getContigOrientation(...); # Returns an array of all contig names # @contigs = $scaffObj->getContigs() # When used with scaffold name returns an array of contig names in scaffold # @contigs = $scaffObj->getContigs("scaffold00008"); # Returns scaffold length given a scaffold # $length = $scaffObj->scaffoldLength("scaffold00008"); # Returns an array of all the scaffold names # @scaffoldNames = $scaffObj->allScaffolds(); # Returns a count value of the number of scaffolds # $scaffoldCount = $scaff->totalScaffolds(); #Returns array or scalar of gap data structure(s) Depending on how called # @gaps = $scaff->getGaps(); #gets array of all gaps @gaps = $scaff->getGaps('scaffold00008'); #gets array of all gaps in specified scaffold $gap = $scaff->getGaps(gap=>'s0001c00001c00002'); #gets single gap specified #gap data structure example: foreach my $gap (@gaps){ print "leftContig = ",$gap->{leftContigName} ,"\n"; print "leftContigLen = ",$gap->{leftContigLen} ,"\n"; print "gapSize = ",$gap->{gapSize} ,"\n"; print "rightContigName = ",$gap->{rightContigName},"\n"; print "rightContigLen = ",$gap->{rightContigLen} ,"\n"; print "scaffold = ",$gap->{scaffold} ,"\n"; print "gapName = ",$gap->{gapName} ,"\n\n"; } #Returns scaffold from which a contig belongs # my $scaffold = $scaffObj->getContigScaffold("contig000001"); #Returns contig length for a contig # my $length = $scaffObj->getContigLength("contig000001"); #Returns scaffold from which a contig belongs # my $scaffold = $scaffObj->getContigOrientation("contig000001"); =head1 AUTHOR Brian Foster DOE Joint Genome Institute 2800 Mitchell Drive Walnut Creek, Ca 94598 bfoster@lbl.gov =head1 VERSION $Revision: 1.8 $ $Date: 2009-08-26 17:18:24 $ =head1 HISTORY =over =item * B.Foster 2008/10/27 creation =back =cut use strict; use warnings; use Carp; #============================================================================# sub new { my ($class, $file) = @_; my $self=_parse_454Scaffolds($file); bless $self, $class; return $self; } #============================================================================# sub totalScaffolds{ my $self = shift; return scalar(@{$self->[1]}); } #============================================================================# sub allScaffolds{ my $self = shift; return @{$self->[1]}; } #============================================================================# sub scaffoldLength{ my $self = shift; my $scaffold = $_[0]; my $return; if (! defined $scaffold){ confess "scaffold not specified in scaffoldLength\n"; } else{ if (exists $self->[2]{$scaffold}){ $return = $self->[2]{$scaffold}[-1]{sEnd}; } else{ confess "scaffold $scaffold does not exist\n"; } } return $return; } #============================================================================# sub getContigs{ my $self = shift; my $scaffold = $_[0]; my %return; # if no args return all contigs if (! defined $scaffold){ foreach my $scaff (keys %{$self->[2]}){ for (my $i=1;$i<@{$self->[2]{$scaff}};$i++){ if ($self->[2]{$scaff}[$i]{contig} =~ /contig/){ $return{$self->[2]{$scaff}[$i]{contig}}++; } } } } # else scaffold filter specified else{ if (exists $self->[2]{$scaffold}){ for (my $i=1;$i<@{$self->[2]{$scaffold}};$i++){ if ($self->[2]{$scaffold}[$i]{contig} =~ /contig/){ $return{$self->[2]{$scaffold}[$i]{contig}}++; } } } else{ confess "scaffold $scaffold does not exist in assembly\n"; } } return (keys %return); } #============================================================================# sub getGaps{ my ($self, %hashArgs) = @_; my @returnArray; # if no args return all gaps if (! %hashArgs){ foreach my $gap (sort keys %{$self->[0]}){ push @returnArray,$self->[0]{$gap}; } return @returnArray; } # else filters specified else{ my $keys = join(" ",keys(%hashArgs)); #gap key found return single gap ref if($keys =~ /\bgap\b/){ return $self->[0]{$hashArgs{gap}}; } #scaffold key found return array of all gaps in scaffold elsif($keys =~ /\bscaffold\b/){ foreach my $gap (sort keys %{$self->[0]}){ push @returnArray,$self->[0]{$gap} if ($self->[0]{$gap}{scaffold} eq $hashArgs{scaffold}); } return @returnArray; } else{ confess "incorrectly keyed arguments to getGeps\n"; } } } #============================================================================# sub getContigScaffold{ my ($self, $contig) = @_; my $scaffold = ""; if (! defined $contig){ confess "contig not specified in getContigScaffold\n"; } if ($self->[3]{$contig}){ $scaffold = $self->[3]{$contig}{contig}; } return $scaffold; } #============================================================================# sub getContigLength{ my ($self, $contig) = @_; my $length = ""; if (! defined $contig){ confess "contig not specified in getContigScaffold\n"; } if ($self->[3]{$contig}){ $length = $self->[3]{$contig}{length}; } return $length; } #============================================================================# sub getContigOrientation{ my ($self, $contig) = @_; my $orient = ""; if (! defined $contig){ confess "contig not specified in getContigScaffold\n"; } if ($self->[3]{$contig}){ $orient = $self->[3]{$contig}{orient}; } return $orient; } #============================================================================# sub createContigOrientationFile { my $self = shift; my $outputFile = shift; my %contigOrientations = (); my @contigs = $self->getContigs(); foreach my $contig (@contigs) { my $orientation = $self->getContigOrientation($contig); $contigOrientations{$contig} = $orientation; } if ( %contigOrientations ) { unless( open FH, ">$outputFile" ) { confess "Failed to create contig orientation file $outputFile"; } foreach my $contig ( sort {$a cmp $b} keys %contigOrientations ) { print FH "$contig\t$contigOrientations{$contig}\n"; } close FH; } } #============================================================================# sub _parse_454Scaffolds{ my $file = $_[0]; my (%scaff,%contig2scaff); my ($scaff,$sStart,$sEnd,$index,$type,$contig,$cStart,$cEnd,$orient); confess "454Scaffolds file not specified\n" if (! defined $file); open (_IN,$file) or confess "could not open $file for reading\n"; while(<_IN>){ chomp; ($scaff,$sStart,$sEnd,$index,$type,$contig,$cStart,$cEnd,$orient) = split/\s+/,$_; next unless $contig; $scaff{$scaff}[$index]{'sStart'} = $sStart; $scaff{$scaff}[$index]{'sEnd'} = $sEnd; $scaff{$scaff}[$index]{'contig'} = $contig; $scaff{$scaff}[$index]{'cStart'} = $cStart; $scaff{$scaff}[$index]{'cEnd'} = $cEnd; $scaff{$scaff}[$index]{'orient'} = ($orient) ? $orient : ""; $contig2scaff{$contig}{contig} = $scaff if $contig =~ /contig/; $contig2scaff{$contig}{length} = $cEnd if $contig =~ /contig/; $contig2scaff{$contig}{orient} = ($orient && $contig =~ /contig/)? $orient : ""; } close(_IN); #form gap hash my (%gaps,$gapName); foreach my $scaff (sort keys %scaff){ for (my $i=1;$i$scaff{$scaff}[$i-1]{'contig'}, leftContigLen =>$scaff{$scaff}[$i-1]{'sEnd'}-$scaff{$scaff}[$i-1]{'sStart'}+1, gapSize=>$scaff{$scaff}[$i]{'sEnd'}-$scaff{$scaff}[$i]{'sStart'}+1, rightContigName=>$scaff{$scaff}[$i+1]{'contig'}, rightContigLen=>$scaff{$scaff}[$i+1]{'sEnd'}-$scaff{$scaff}[$i+1]{'sStart'}+1, scaffold=>$scaff, gapName=>$gapName }; } } my @scaffolds = keys(%scaff); return [\%gaps, \@scaffolds, \%scaff,\%contig2scaff]; } 1; # so the require or use succeeds