#!/usr/local/bin/perl package main; our $SEE; package CDNA::Splice_graph_assembler; use strict; use warnings; use Carp; use Overlap_piler; use CDNA::CDNA_alignment; use CDNA::Alignment_segment; use Data::Dumper; no warnings "recursion"; our $FUZZ_DIST = 20; # bp's not to trust at non-splice termini; untrustworthy alignment extensions. ## allowable connections between gene structure components: my %ACCEPTABLE_CONNECTIONS = ( "terminal_left_exon" => {"intron" => 1}, "internal_exon" => {"intron" => 1}, "intron" => {"internal_exon" => 1, "terminal_right_exon" => 1, }, ); #### sub new { my $packagename = shift; my $self = { _graph_nodes => [], # each and every graph node (main repository) _incoming_alignments => [], # alignments to assemble _assemblies => [], # resulting alignment assemblies _unincorporated_alignments => [], _valid_splice_paths => [], # Splice_graph_path objects _assembled_splice_paths => [], # compatible splice paths chained into splice path assemblies. ## other helpers _graph_node_hashkey_lookup => {}, # key is lend,rend,type,orient _graph_node_via_nodeID => {}, # node access by nodeID ## various node types: (all subsets of _graph_nodes and accessible via helpers above. _internal_exons => [], _introns => [], _terminal_left_exons => [], _terminal_right_exons => [], _singleton_exons => [], ## misc _terminal_exon_nodeID_to_nonsplice_position_list => {}, # track coords of non-splice site ends in terminal exon supports }; bless ($self, $packagename); return ($self); } #### sub assemble_alignments { my $self = shift; my @alignments = @_; ## build the splicing graph. $self->build_splicing_graph(@alignments); ## Chain together splice paths that are compatible: print "-Chaining compatible splice paths\n" if $SEE; $self->_chain_compatible_splice_paths(); ## Extend splice paths into maximally scoring complete paths with terminal exons print "-Extending splice paths into maximal structures.\n" if $SEE; $self->_extend_splice_paths_to_termini(); ## products are included in the assemblies list. ## Add the singleton assemblies $self->_append_singletons_to_assembly_list(); ## convert the singletons to cdna alignments and add to assembly list ## assign the incoming alignments to the assemblies that contain them. print "-Assigning transcript alignments to assemblies\n" if $SEE; my $have_unincorporated_alignments_flag = $self->_correlate_assemblies_with_incoming_alignments(); if ($have_unincorporated_alignments_flag) { ## find paths from unincorporated terminal segments. print "-Not all alignments were included. Exploring assemblies from uninorporated alignments\n" if $SEE; $self->_explore_assemblies_from_unincorporated_alignments(); print "-Assigning transcript alignments to assemblies, again...\n" if $SEE; $have_unincorporated_alignments_flag = $self->_correlate_assemblies_with_incoming_alignments(); if ($have_unincorporated_alignments_flag) { ## something horribly has gone wrong. confess "Error, not all incoming alignments are accounted for!\n" . $self->toString(); } } print $self->toString() if $SEE; } #### sub build_splicing_graph { my $self = shift; my @alignments = @_; $self->set_incoming_alignments(@alignments); ## going to process the alignments in several phases: # -decompose alignments into structural components # -identify valid paths to connect components # -extract assemblies using valid paths # -associate transcripts with assemblies ## first, examine the alignments with the most segments first: @alignments = reverse sort {$a->get_num_segments() <=> $b->get_num_segments()} @alignments; ## build components of the splice graph ## add internal exons and all introns to splice graph: (unambiguous structures) ## terminal structures are not as well defined. Apply these later. print "-Decomposing alignments into gene structure components.\n" if $SEE; foreach my $alignment (@alignments) { if ($alignment->get_num_segments() > 1) { $self->_decompose_unambiguous_alignment_structures_add_nodes($alignment); } } ## Add terminal exons: ## Only add terminal exons where it's clear that it's not just part of an existing internal exon print "-Adding terminal exon segments\n" if $SEE; my @single_segments; # capture those alignments w/o introns foreach my $alignment (@alignments) { my $segment_orient = $alignment->get_spliced_orientation(); if ($alignment->get_num_segments() > 1) { foreach my $segment ($alignment->get_alignment_segments()) { if ($segment->is_first() || $segment->is_last()) { my $exon_type = ($segment->is_first()) ? "terminal_left_exon" : "terminal_right_exon"; $self->_try_terminal_exon_addition($exon_type, $segment, $segment_orient); ## at this point, introns and internal exons are scored only by perfect support (exact boundaries). ## terminal exons, on the other hand, have evidence scored based on boundary match. ## Now, need to augment scores of internal exons that encompass terminal exons my ($segment_lend, $segment_rend) = $segment->get_coords(); $self->_augment_internal_exon_scores_using_terminal_alignment_segment($exon_type, $segment_lend, $segment_rend, $segment_orient); } } } else { push (@single_segments, $alignment); } } ## reconstruct some internal exons based on overlapping right/left terminal exons print "-Merging overlapping right-to-left terminal exons\n" if $SEE; $self->_merge_overlapping_right_to_left_terminal_exons(); ## apply intron-less segments as evidence to internal and terminal segments ## and instantiate single exons where they're not simply supporting existing structures. print "-Analyzing intronless alignments\n" if $SEE; $self->_analyze_intronless_alignments(@single_segments); print "-Building the splice graph\n" if $SEE; $self->_build_splice_graph(); # could/should have done this earlier during alignment parse for efficiency. return; } #### sub _find_internal_exons_encompassing_coords_and_share_boundary { my $self = shift; my ($exon_type, $segment_lend, $segment_rend, $segment_orient) = @_; unless ($exon_type =~ /left|right/) { confess "invalide terminal exon type: $exon_type\n"; } my @internal_exons_found; foreach my $internal_exon (@{$self->{_internal_exons}}) { my $internal_exon_orient = $internal_exon->get_orient(); my ($internal_exon_lend, $internal_exon_rend) = $internal_exon->get_coords(); ## if internal exon encompasses the terminal exon, add it to its evidence collection if ( ($segment_orient eq $internal_exon_orient) && ($internal_exon_lend <= ($segment_lend + $FUZZ_DIST) && ($segment_rend - $FUZZ_DIST) <= $internal_exon_rend) # internal encompasses it && ( ($exon_type eq "terminal_right_exon" && $internal_exon_lend == $segment_lend) || ($exon_type eq "terminal_left_exon" && $internal_exon_rend == $segment_rend) ) ## a splice boundary in common ) { push (@internal_exons_found, $internal_exon); } } return (@internal_exons_found); } sub _find_terminal_exons_encompassing_segment { my $self = shift; my ($lend, $rend, $orient) = @_; my @nodes_encompassing_segment; foreach my $node_obj (@{$self->{_terminal_left_exons}}, @{$self->{_terminal_right_exons}}) { my ($node_lend, $node_rend) = $node_obj->get_coords(); my $node_orient = $node_obj->get_orient(); if ( ($node_orient eq $orient || $orient eq '?') && ($lend + $FUZZ_DIST >= $node_lend) && ($rend - $FUZZ_DIST <= $node_rend) ) { push (@nodes_encompassing_segment, $node_obj); } } return (@nodes_encompassing_segment); } #### sub get_assemblies { my $self = shift; return (@{$self->{_assemblies}}); } #### sub get_incoming_alignments { my $self = shift; return (@{$self->{_incoming_alignments}}); } #### sub get_unincorporated_alignments { my $self = shift; return (@{$self->{_unincorporated_alignments}}); } #### sub _add_alignment_assembly { my $self = shift; my (@cdna_alignments) = @_; push (@{$self->{_assemblies}}, @cdna_alignments); return; } #### sub set_incoming_alignments { my $self = shift; my @alignments = @_; @{$self->{_incoming_alignments}} = @alignments; return; } #### sub _decompose_unambiguous_alignment_structures_add_nodes { my $self = shift; my ($alignment) = @_; my $spliced_orient = $alignment->get_spliced_orientation(); my @path_nodes; ## Add internal exons my @segments = $alignment->get_alignment_segments(); foreach my $segment (@segments) { #print "seg: " . $segment->toString() . "\n"; if ($segment->is_internal()) { my ($exon_lend, $exon_rend) = $segment->get_coords(); my $node = $self->_add_internal_exon($exon_lend, $exon_rend, $spliced_orient); push (@path_nodes, $node); } } ## Add introns my @intron_coords = $alignment->get_intron_coords(); foreach my $intron_coordset (@intron_coords) { my ($intron_lend, $intron_rend) = @$intron_coordset; #already sorted my $node = $self->_add_intron($intron_lend, $intron_rend, $spliced_orient); push (@path_nodes, $node); } ## add a path $self->_extract_and_add_path_from_node_list(@path_nodes); return; } #### sub _extract_and_add_path_from_node_list { my $self = shift; my @path_nodes = @_; ## sort by genome order: @path_nodes = sort {$a->{lend} <=> $b->{lend}} @path_nodes; my @ordered_nodeID_list; my @coords; foreach my $path_node (@path_nodes) { my $nodeID = $path_node->get_nodeID(); push (@ordered_nodeID_list, $nodeID); push (@coords, $path_node->get_coords()); } my $orient = $path_nodes[0]->get_orient(); @coords = sort {$a<=>$b} @coords; my $lend = shift @coords; my $rend = pop @coords; my $splice_graph_path = Splice_graph_path->new($lend, $rend, $orient, \@ordered_nodeID_list); $self->_add_splice_graph_path($splice_graph_path); return; } #### sub _try_terminal_exon_addition { my $self = shift; my ($exon_type, $segment, $orient) = @_; my ($segment_lend, $segment_rend) = $segment->get_coords(); print "Examining terminal exon: $exon_type, $segment_lend, $segment_rend\n" if $SEE; ## only consider this a genuine terminal exon if it doesn't appear to be part of an ## existing internal exon from a more complete alignment if (my @internal_segments = $self->_find_internal_exons_encompassing_coords_and_share_boundary($exon_type, $segment_lend, $segment_rend, $orient)) { if ($SEE) { print "$exon_type, $segment_lend-$segment_rend, $orient, found already represented by internal exons:\n"; foreach my $internal_segment (@internal_segments) { print "\t" . $internal_segment->toString() . "\n"; } } return; } my $exon = $self->_find_existing_terminal_exon ($exon_type, $segment_lend, $segment_rend, $orient); if ($exon) { print "-found existing terminal exon with shared boundary: " . $exon->toString() . "\n" if $SEE; my $nodeID = $exon->get_nodeID(); ## check for boundary adjustment: my ($exon_lend, $exon_rend) = $exon->get_coords(); my $nonsplice_coord = undef; if ($exon_type eq "terminal_left_exon") { $nonsplice_coord = $segment_lend; if ($segment_lend < $exon_lend) { print "-extending left boundary to $segment_lend\n" if $SEE; $exon->set_coords($segment_lend, $exon_rend); # extend left boundary } } elsif ($exon_type eq "terminal_right_exon") { $nonsplice_coord = $segment_rend; if ($segment_rend > $exon_rend) { print "-extending right boundary to $segment_rend\n" if $SEE; $exon->set_coords($exon_lend, $segment_rend); } } else { confess "Error, exon type $exon_type not accounted for. "; # should never get here anyway } $exon->increment_evidence_support(); ## account for extra evidence $self->_add_to_terminal_exon_nonsplice_position_list($nodeID, $nonsplice_coord); } else { ## add new terminal exon: $self->_add_graph_node($exon_type, $segment_lend, $segment_rend, $orient); } return; } #### sub _add_to_terminal_exon_nonsplice_position_list { my $self = shift; my ($nodeID, $nonsplice_coord) = @_; my $nodeID_to_pos_list_href = $self->{_terminal_exon_nodeID_to_nonsplice_position_list}; my $list_aref = $nodeID_to_pos_list_href->{$nodeID}; unless (ref $list_aref) { $list_aref = $nodeID_to_pos_list_href->{$nodeID} = []; } push (@$list_aref, $nonsplice_coord); return; } #### sub _find_existing_terminal_exon { my $self = shift; my ($exon_type, $segment_lend, $segment_rend, $orient) = @_; my $exon_list_aref = undef; if ($exon_type eq "terminal_left_exon") { $exon_list_aref = $self->{_terminal_left_exons}; } elsif ($exon_type eq "terminal_right_exon") { $exon_list_aref = $self->{_terminal_right_exons}; } else { confess "exon_type $exon_type not accepted for terminal exons"; } foreach my $exon (@$exon_list_aref) { my ($lend, $rend) = $exon->get_coords(); if ( ($exon->get_orient() eq $orient) && ( ($exon_type eq "terminal_left_exon" && $rend == $segment_rend) || ($exon_type eq "terminal_right_exon" && $lend == $segment_lend) ) ) { return ($exon); # found it! } } return (undef); # didn't find one. } #### sub _augment_internal_exon_scores_using_terminal_alignment_segment { my $self = shift; my ($exon_type, $segment_lend, $segment_rend, $segment_orient) = @_; my @relevant_internal_exons = $self->_find_internal_exons_encompassing_coords_and_share_boundary($exon_type, $segment_lend, $segment_rend, $segment_orient); foreach my $internal_exon (@relevant_internal_exons) { $internal_exon->increment_evidence_support(); } return; } #### sub _merge_overlapping_right_to_left_terminal_exons { my $self = shift; ## looking for this situation: # <----------- right terminal exon # ---------> left terminal exon # that can be merged into: # <--------------> an internal exon # my %nodeIDs_targeted_for_removal; # if they fully overlap, construct a nice internal exon w/o extensions beyond splice boundary foreach my $right_terminal_exon (@{$self->{_terminal_right_exons}}) { my $right_orient = $right_terminal_exon->get_orient(); my ($right_lend, $right_rend) = $right_terminal_exon->get_coords(); my $right_nodeID = $right_terminal_exon->get_nodeID(); foreach my $left_terminal_exon (@{$self->{_terminal_left_exons}}) { my $left_orient = $left_terminal_exon->get_orient(); my ($left_lend, $left_rend) = $left_terminal_exon->get_coords(); my $left_nodeID = $left_terminal_exon->get_nodeID(); unless ($right_orient eq $left_orient) { next; } # must be transcribed on same strand! ## check for overlap: unless ($left_lend <= $right_rend && $left_rend >= $right_lend) { next;} ## make sure right's left splice is before left's right splice (doh! should have better names). unless ($right_lend < $left_rend) { next; } ## check for extensions: my $left_overhang = $right_lend - $left_lend; my $right_overhang = $right_rend - $left_rend; my $merge_flag = 0; if ($left_overhang <= $FUZZ_DIST && $right_overhang <= $FUZZ_DIST) { ## nice merge, as in illustration $merge_flag = 1; ## also, target these for deletion now. $nodeIDs_targeted_for_removal{$right_nodeID} = 1; $nodeIDs_targeted_for_removal{$left_nodeID} = 1; } else { ## must check position lists to see if transcripts are contained that have ## nicely overlapping boundaries if ($self->_right_left_terminal_exons_overlap_via_position_lists($right_nodeID, $left_nodeID, $right_lend, $left_rend)) { $merge_flag = 1; if ($left_overhang <= $FUZZ_DIST) { ## check for insufficient overhang $nodeIDs_targeted_for_removal{$left_nodeID} = 1; } if ($right_overhang <= $FUZZ_DIST) { $nodeIDs_targeted_for_removal{$right_nodeID} = 1; } } } if ($merge_flag) { $self->_merge_terminal_exons($right_terminal_exon, $left_terminal_exon); } } } ## process deletions: if (%nodeIDs_targeted_for_removal) { $self->_purge_nodes(%nodeIDs_targeted_for_removal); } return; } #### sub _merge_terminal_exons { my $self = shift; my ($right_terminal_exon, $left_terminal_exon) = @_; my ($right_lend, $right_rend) = $right_terminal_exon->get_coords(); my ($left_lend, $left_rend) = $left_terminal_exon->get_coords(); my ($exon_lend, $exon_rend) = ($right_lend, $left_rend); ## splice junctions for new internal exon my $orient = $right_terminal_exon->get_orient(); if ($orient ne $left_terminal_exon->get_orient()) { confess "Error, trying to merge two terminal exons with opposite transcriptional orientations!"; } if ($self->_graph_node_exists("internal_exon", $exon_lend, $exon_rend, $orient)) { confess "Error, trying to merge two terminal exons into an internal exon that already exists!"; } my $node = $self->_add_graph_node("internal_exon", $exon_lend, $exon_rend, $orient); ## add to it the evidence from the other nodes being merged: my $evidence_support_to_add = $left_terminal_exon->get_num_evidence_support() + $right_terminal_exon->get_num_evidence_support(); $evidence_support_to_add -= 1; # node already has value of one. if ($evidence_support_to_add) { $node->increment_evidence_support($evidence_support_to_add); } return; } #### sub _add_splice_graph_path { my $self = shift; my ($splice_graph_path) = @_; ## add it as long as: # -it's not a subpath of an already existing path # -if an existing path is a subpath of this, remove it and replace it with this path my @current_valid_splice_paths = $self->_get_valid_splice_paths(); ## check to see if splice_graph_path is already represented in the current path list: foreach my $current_valid_splice_path (@current_valid_splice_paths) { if ($splice_graph_path->is_subpath_of($current_valid_splice_path)) { return; # nothing to do; path is already included as a subset of the current path set } } # if got this far, our new path is not already fully represented. # add it, and any other existing splice paths that are not a subpath of it. my @new_valid_splice_paths = ($splice_graph_path); foreach my $current_valid_splice_path (@current_valid_splice_paths) { if (! $current_valid_splice_path->is_subpath_of($splice_graph_path)) { push (@new_valid_splice_paths, $current_valid_splice_path); } } $self->_set_valid_splice_paths(@new_valid_splice_paths); return; } #### sub _get_valid_splice_paths { my $self = shift; return (@{$self->{_valid_splice_paths}}); } #### sub _set_valid_splice_paths { my $self = shift; my @valid_splice_paths = @_; ## completely stomps the existing contents!!! @{$self->{_valid_splice_paths}} = @valid_splice_paths; return; } sub _graph_node_exists { my $self = shift; my ($type, $lend, $rend, $orient) = @_; my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient); return (exists $self->{_graph_node_hashkey_lookup}->{$hashkey}); } sub _get_graph_node_via_coords_n_type { my $self = shift; my ($type, $lend, $rend, $orient) = @_; my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient); my $node = $self->{_graph_node_hashkey_lookup}->{$hashkey}; unless ($node) { confess "Error, no graph node retrieved based on data ($type, $lend, $rend, $orient)"; } return ($node); } #### sub _add_graph_node { my $self = shift; my ($type, $lend, $rend, $orient) = @_; my $hash_key = $self->_get_hash_key($type, $lend, $rend, $orient); ## only add the node if it doesn't already exist! my $graph_node_hashkey_lookup_href = $self->{_graph_node_hashkey_lookup}; if (my $existing_node = $graph_node_hashkey_lookup_href->{$hash_key}) { ## increment evidence for existing node: $existing_node->increment_evidence_support(); return ($existing_node); } else { # add it my $node = Splice_graph_node->new($type, $lend, $rend, $orient); push (@{$self->{_graph_nodes}}, $node); # add to complete node list ## helpers for node access: $graph_node_hashkey_lookup_href->{$hash_key} = $node; # store in lookup table my $nodeID = $node->get_nodeID(); $self->{_graph_node_via_nodeID}->{$nodeID} = $node; ## splay based on type: my $type = $node->get_type(); if ($type eq "internal_exon") { push (@{$self->{_internal_exons}}, $node); } elsif ($type eq "intron") { push (@{$self->{_introns}}, $node); } elsif ($type eq "terminal_left_exon") { push (@{$self->{_terminal_left_exons}}, $node); } elsif ($type eq "terminal_right_exon") { push (@{$self->{_terminal_right_exons}}, $node); } elsif ($type eq "singleton_exon") { push (@{$self->{_singleton_exons}}, $node); } else { confess "Error, do not recognize type: $type for node"; } return ($node); } } #### sub _add_intron { my $self = shift; my ($intron_lend, $intron_rend, $spliced_orient) = @_; my $node = $self->_add_graph_node("intron", $intron_lend, $intron_rend, $spliced_orient); return ($node); } #### sub _add_internal_exon { my $self = shift; my ($exon_lend, $exon_rend, $spliced_orient) = @_; my $node = $self->_add_graph_node("internal_exon", $exon_lend, $exon_rend, $spliced_orient); return ($node); } #### sub get_graph_nodes { my $self = shift; return (sort {$a->{lend}<=>$b->{lend}} @{$self->{_graph_nodes}}); } #### sub get_graph_node_via_nodeID { my $self = shift; my $nodeID = shift; my $node_obj = $self->{_graph_node_via_nodeID}->{$nodeID} or confess "Error, no node found based on nodeID: $nodeID\n" . $self->toString(); return ($node_obj); } #### sub toString { my $self = shift; my @graph_nodes = $self->get_graph_nodes(); my $num_graph_nodes = scalar (@graph_nodes); my $text = "Splice_graph_assembler instance with $num_graph_nodes graph nodes:\n"; foreach my $graph_node (@graph_nodes) { $text .= $graph_node->toString() . "\n"; } $text .= "\tvalid paths thru nodes:\n"; foreach my $splice_path ($self->_get_valid_splice_paths()) { $text .= "\t" . $splice_path->toString() . "\n"; } $text .= "\tassembled splice paths:\n"; foreach my $splice_path (@{$self->{_assembled_splice_paths}}) { $text .= "\t" . $splice_path->toString() . "\n"; } $text .= "\tFinal assemblies with termini\n"; foreach my $assembly ($self->get_assemblies()) { my @node_list = @{$assembly->{__Splice_graph_assembler_nodeID_list}}; $text .= "\t" . join (",", @node_list) . "\n"; } $text .= $self->toAlignIllustration(60); return ($text); } =item toAlignIllustration() =over 4 B illustrates the individual cDNAs to be assembled along with the final products. B $max_line_chars(optional) $max_line_chars is an integer representing the maximum number of characters in a single line of output to the terminal. The default is 100. B $alignment_illustration_text $alignment_illustration_text is a string containing a paragraph of text which illustrates the alignments and assemblies. An example is below: ---> <--> <-----> <---> <---------------- (+)gi|1199466 ---> <--> <-----> <---> <------------ (+)gi|1209702 ----> <--> <---- (+)AV827070 ----> <--> <--- (+)AV828861 ----> <--> <--- (+)AV830936 ---> <--> <- (+)H36350 ASSEMBLIES: (1) ----> <--> <-----> <---> <---------------- (+) gi|1199466, gi|1209702, AV827070, AV828861, AV830936, H36350 =back =cut ; sub toAlignIllustration () { my $self = shift; my $max_line_chars = shift; $max_line_chars = ($max_line_chars) ? $max_line_chars : 100; #if not specified, 100 chars / line is default. ## Get minimum coord for relative positioning. my @coords; my @alignments = @{$self->{_incoming_alignments}}; foreach my $alignment (@alignments) { my @c = $alignment->get_coords(); push (@coords, @c); } @coords = sort {$a<=>$b} @coords; print "coords: @coords\n" if $::SEE; my $min_coord = shift @coords; my $max_coord = pop @coords; my $rel_max = $max_coord - $min_coord; my $alignment_text = ""; ## print each alignment followed by assemblies: my $num_alignments = $#alignments + 1; $alignment_text .= "Individual Alignments: ($num_alignments)\n"; my $i = 0; foreach my $alignment (@alignments) { $alignment_text .= (sprintf ("%3d ", $i)) . $alignment->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n"; $i++; } my @assemblies = @{$self->{_assemblies}}; my $num_assemblies = $#assemblies + 1; $alignment_text .= "\n\nASSEMBLIES: ($num_assemblies)\n"; foreach my $assembly (@assemblies) { $alignment_text .= " " . $assembly->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n"; } if (my @unincorporated_alignments = $self->get_unincorporated_alignments()) { my $num_unincorporated = scalar @unincorporated_alignments; $alignment_text .= "\n\nUNINCORPORATED_ALIGNMENTS($num_unincorporated)\n"; foreach my $alignment (@unincorporated_alignments) { $alignment_text .= " " . $alignment->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n"; } } return ($alignment_text); } #### sub _get_hash_key { my $self = shift; my ($type, $lend, $rend, $orient) = @_; return ("$type,$lend,$rend,$orient"); } #### sub _purge_nodes { my $self = shift; my (%nodeIDs) = @_; ## perform deletions based on hashkeys foreach my $nodeID (keys %nodeIDs) { print "PURGING node: $nodeID\n" if $SEE; my $node = $self->{_graph_node_via_nodeID}->{$nodeID}; my $type = $node->get_type(); my ($lend, $rend) = $node->get_coords(); my $orient = $node->get_orient(); my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient); delete $self->{_graph_node_hashkey_lookup}->{$hashkey}; delete $self->{_graph_node_via_nodeID}->{$nodeID}; } ## now, do array replacements: foreach my $node_list_aref ( $self->{_graph_nodes}, ## only deleting the terminal exons, even though this could be more generic $self->{_terminal_left_exons}, $self->{_terminal_right_exons}, ) { my @replacments; my $need_replacement_flag = 0; foreach my $node (@$node_list_aref) { my $nodeID = $node->get_nodeID(); if ($nodeIDs{$nodeID}) { ## must delete! $need_replacement_flag = 1; } else { push (@replacments, $node); } } if ($need_replacement_flag) { @$node_list_aref = @replacments; ## Doing Replacment } } return; } #### sub _right_left_terminal_exons_overlap_via_position_lists { my $self = shift; my ($right_nodeID, $left_nodeID, $left_splice_coord, $right_splice_coord) = @_; ## looking for the following: # # ## termini of transcripts in right terminal # <---------------------X---------------------- ## right terminal exon # -------------------X---------------------------> ## left terminal exon # ## termini of transcripts in left terminal # # # The X termini show the endpoints of other transcripts incorporated that would define a proper merging situation. # look for boundary pair such that both are included within the splice junctions # and right boundary >= left boundary my $right_pos_list_aref = $self->{_terminal_exon_nodeID_to_nonsplice_position_list}->{$right_nodeID}; my $left_pos_list_aref = $self->{_terminal_exon_nodeID_to_nonsplice_position_list}->{$left_nodeID}; foreach my $right_pos (@$right_pos_list_aref) { unless ($right_pos > $left_splice_coord && $right_pos < $right_splice_coord) { next; } foreach my $left_pos (@$left_pos_list_aref) { unless ($left_pos > $left_splice_coord && $left_pos < $right_splice_coord) { next; } if ($right_pos >= $left_pos) { return (1); # found suitable case } } } return (0); # no such example found. } #### sub _analyze_intronless_alignments { my $self = shift; my @intronless_segments = @_; ## actually alignment objects. print "method: _analyze_intronless_alignments()\n" if $SEE; my %applied_segment_indices; { ## hack in a hidden attribute that uniquely identifies each of these segments. ## perl allows this, but I'm not so happy with doing it. for now, it'll be fine. ## This __intronless_segment_index will be used to identify those segments that are ## applied to existing structures (internal/terminal exons), and those left over ## and still need to be accounted for. my $id = 0; foreach my $intronless_segment (@intronless_segments) { $id++; $intronless_segment->{__intronless_segment_ID} = $id; } } ## increment evidence for internal exons containing intronless segment ## and extend terminal exons overlapping intronless segments ## first, examine the internal_segments: foreach my $internal_exon (@{$self->{_internal_exons}}) { my $internal_exon_orient = $internal_exon->get_orient(); my ($internal_exon_lend, $internal_exon_rend) = $internal_exon->get_coords(); foreach my $intronless_segment (@intronless_segments) { my $intronless_segment_orient = $intronless_segment->get_spliced_orientation(); my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords(); if ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $internal_exon_orient) { ## look for encapsulation if ( ($intronless_lend + $FUZZ_DIST) >= $internal_exon_lend && ($intronless_rend - $FUZZ_DIST) <= $internal_exon_rend) { $internal_exon->increment_evidence_support(); $applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated already print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient found incorporated in internal exon: " . $internal_exon->toString() . "\n" if $SEE; } } } } ## examine left terminal exons ## first sort so we examine the right boundaries in order from right to left to faciliate extensions @intronless_segments = reverse sort {$a->{rend}<=>$b->{rend}} @intronless_segments; foreach my $left_terminal_exon (@{$self->{_terminal_left_exons}}) { my $left_terminal_exon_orient = $left_terminal_exon->get_orient(); # -----------------> # left terminal exon foreach my $intronless_segment (@intronless_segments) { my ($left_terminal_lend, $left_terminal_rend) = $left_terminal_exon->get_coords(); # do it here because it might change below my $intronless_segment_orient = $intronless_segment->get_spliced_orientation(); my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords(); unless ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $left_terminal_exon_orient) { next; } ## must overlap unless ($left_terminal_lend <= $intronless_rend && $left_terminal_rend >= $intronless_lend) { next; } unless ($intronless_rend - $FUZZ_DIST <= $left_terminal_rend) { next; } ## must be : # -----------------------> # left terminal exon # ---------------- # intronless segment, overlaps but not passed the splice boundary # extend lend if intronless segment passes it if ($intronless_lend < $left_terminal_lend) { # update coords: $left_terminal_exon->set_coords($intronless_lend, $left_terminal_rend); } ## if got this far, evidence is incorporated. $left_terminal_exon->increment_evidence_support(); $applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient incorporated in left terminal exon: " . $left_terminal_exon->toString() . "\n" if $SEE; } } ## examine right terminal exons ## first sort so we examine the left boundaries in order from left to right to faciliate extensions @intronless_segments = sort {$a->{lend}<=>$b->{lend}} @intronless_segments; foreach my $right_terminal_exon (@{$self->{_terminal_right_exons}}) { my $right_terminal_exon_orient = $right_terminal_exon->get_orient(); # <----------------- # right terminal exon foreach my $intronless_segment (@intronless_segments) { my ($right_terminal_lend, $right_terminal_rend) = $right_terminal_exon->get_coords(); # do it here because it might change below my $intronless_segment_orient = $intronless_segment->get_spliced_orientation(); my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords(); unless ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $right_terminal_exon_orient) { next; } ## must overlap unless ($right_terminal_lend <= $intronless_rend && $right_terminal_rend >= $intronless_lend) { next; } ## must be : # <---------------- # right terminal exon # ---------------- # intronless segment, overlaps but not passed the splice boundary unless ($intronless_lend + $FUZZ_DIST >= $right_terminal_lend) { next; } # extend lend if intronless segment passes it if ($intronless_rend > $right_terminal_rend) { # update coords: $right_terminal_exon->set_coords($right_terminal_lend, $intronless_rend); } ## if got this far, evidence is incorporated. $right_terminal_exon->increment_evidence_support(); $applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient incorporated in right terminal exon: " . $right_terminal_exon->toString() . "\n" if $SEE; } } ## check to see if there are any unincorporated single segments, and if so, instantiate maximal single segments that contain them. my $have_leftover_single_segments_flag = 0; foreach my $intronless_segment (@intronless_segments) { unless ($applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} }) { $have_leftover_single_segments_flag = 1; last; } } unless ($have_leftover_single_segments_flag) { ## all done; they've all been accounted for. print "-All intronless segments are accounted for.\n" if $SEE; return; } ## if got here, we have some single segments that haven't been accounted for. print "-some intronless segments are as of yet unincorporated. Need to instantiate singleton exons\n" if $SEE; $self->_instantiate_maximal_single_exons(\@intronless_segments, \%applied_segment_indices); return; } #### sub _instantiate_maximal_single_exons { my $self = shift; my ($intronless_alignments_aref, $applied_segment_indices_href) = @_; print "\nmethod: _instantiate_maximal_single_exons\n" if $SEE; ## get all coords for segments: my %segment_id_to_coords; my %segment_id_to_alignment; my $plus_strand_overlap_assembler = new Overlap_piler(); my $minus_strand_overlap_assembler = new Overlap_piler(); my $got_plus_strand_flag = 0; my $got_minus_strand_flag = 0; foreach my $alignment (@$intronless_alignments_aref) { print "Got intronless alignment: " . $alignment->toToken() . "\n" if $SEE; my ($align_lend, $align_rend) = $alignment->get_coords(); my $align_id = $alignment->{__intronless_segment_ID}; $segment_id_to_coords{$align_id} = [$align_lend, $align_rend]; $segment_id_to_alignment{$align_id} = $alignment; my $spliced_orient = $alignment->get_spliced_orientation(); if ($spliced_orient eq '+' || $spliced_orient eq '?') { $plus_strand_overlap_assembler->add_coordSet($align_id, $align_lend, $align_rend); print "Adding $align_lend-$align_rend [s$spliced_orient] to plus strand assembler\n" if $SEE; if ($spliced_orient eq '+') { $got_plus_strand_flag = 1; } } if ($spliced_orient eq '-' || $spliced_orient eq '?') { $minus_strand_overlap_assembler->add_coordSet($align_id, $align_lend, $align_rend); print "Adding $align_lend-$align_rend [s$spliced_orient] to minus strand assembler\n" if $SEE; if ($spliced_orient eq '-') { $got_minus_strand_flag = 1; } } } ## unless we have some known spliced orientation, then just assemble everything using the plus strand assembler; either could be used w/ no difference. unless ($got_plus_strand_flag || $got_minus_strand_flag) { $got_plus_strand_flag = 1; } my @singleton_clusters; if ($got_plus_strand_flag) { print "\nBuilding Plus strand clusters of singletons:\n" if $SEE; my @plus_strand_clusters = $plus_strand_overlap_assembler->build_clusters(); if ($SEE) { print "\nSingleton assemblies, PLUS strand assembler:\n"; @plus_strand_clusters = sort {$a->[0]<=>$b->[0]} @plus_strand_clusters; foreach my $cluster (@plus_strand_clusters) { print "clustered IDs [+]: " . join ("-", @$cluster) . "\n"; } } push (@singleton_clusters, @plus_strand_clusters); } if ($got_minus_strand_flag) { print "\nBuilding Minus strand clusters of singletons:\n" if $SEE; my @minus_strand_clusters = $minus_strand_overlap_assembler->build_clusters(); if ($SEE) { print "\nSingleton assemblies, MINUS strand assembler:\n"; @minus_strand_clusters = sort {$a->[0]<=>$b->[0]} @minus_strand_clusters; foreach my $cluster (@minus_strand_clusters) { print "clustered IDs [-]: " . join ("-", @$cluster) . "\n"; } } push (@singleton_clusters, @minus_strand_clusters); } ## convert to singleton assemblies: my @singleton_assemblies; foreach my $singleton_cluster (@singleton_clusters) { my @ids = @$singleton_cluster; my ($min_lend, $max_rend); my %aligned_orient_counts; my %spliced_orient_counts; foreach my $id (@ids) { my ($lend, $rend) = @{$segment_id_to_coords{$id}}; if (! defined ($min_lend)) { ($min_lend, $max_rend) = ($lend, $rend); } else { if ($lend < $min_lend) { $min_lend = $lend; } if ($rend > $max_rend) { $max_rend = $rend; } } my $alignment = $segment_id_to_alignment{$id}; my $spliced_orient = $alignment->get_spliced_orientation(); my $aligned_orient = $alignment->get_aligned_orientation(); if ($spliced_orient ne '?') { $spliced_orient_counts{$spliced_orient}++; } $aligned_orient_counts{$aligned_orient}++; } ## get orientation value with most support: my @spliced_orients = reverse sort {$spliced_orient_counts{$a}<=>$spliced_orient_counts{$b}} keys %spliced_orient_counts; if (scalar @spliced_orients > 1) { # should only be one spliced orient, or no spliced orient if everything was '?' confess "Error, captured a singleton assembly with multiple spliced orientations."; } my $assembly_spliced_orient = shift @spliced_orients; unless ($assembly_spliced_orient) { ## no evidence for assembly spliced orientation. ## take the aligned orientation that has the greatest support. my @aligned_orients = reverse sort {$aligned_orient_counts{$a}<=>$aligned_orient_counts{$b}} keys %aligned_orient_counts; $assembly_spliced_orient = shift @aligned_orients; } unless ($assembly_spliced_orient =~ /^[\+\-]$/) { confess "Error, couldn't decide upon assembly orientation for singleton assembly"; } push (@singleton_assemblies, { lend => $min_lend, rend => $max_rend, ids => [@ids], length => $max_rend - $min_lend + 1, num_alignments_included => scalar @ids, orient => $assembly_spliced_orient, } ); } ## sort so that we maximize for number of alignments included and alignment span @singleton_assemblies = reverse sort {$a->{num_alignments_included}<=>$b->{num_alignments_included} || $a->{length}<=>$b->{length}} @singleton_assemblies; my %ids_accounted_for; SINGLETON_ASSEMBLY_ANALYSIS: foreach my $singleton_assembly (@singleton_assemblies) { my ($lend, $rend, $orient, $ids_aref) = ($singleton_assembly->{lend}, $singleton_assembly->{rend}, $singleton_assembly->{orient}, $singleton_assembly->{ids}); ## make sure singleton contains an alignment that wasn't already applied elsewhere: my $has_alignment_not_applied_elsewhere_flag = 0; foreach my $id (@$ids_aref) { unless ($applied_segment_indices_href->{$id}) { $has_alignment_not_applied_elsewhere_flag = 1; last; } } unless ($has_alignment_not_applied_elsewhere_flag) { ## no reason to bother pursuing it. Everythings accounted for in other already instantiated exons. next SINGLETON_ASSEMBLY_ANALYSIS; } my $found_id_not_accounted_for_flag = 0; ## track those IDs that are built into other already instantiated singleton assemblies foreach my $id (@$ids_aref) { unless ($ids_accounted_for{$id}) { $found_id_not_accounted_for_flag = 1; last; } } if ($found_id_not_accounted_for_flag) { ## add a singleton assembly: $self->_add_graph_node("singleton_exon", $lend, $rend, $orient); foreach my $id (@$ids_aref) { $ids_accounted_for{$id} = 1; } } } ## Ensure that all are accounted for now: foreach my $intronless_alignment (@$intronless_alignments_aref) { my $id = $intronless_alignment->{__intronless_segment_ID}; if (! $applied_segment_indices_href) { ## if not applied before entering this method, then should be applied now if (! $ids_accounted_for{$id}) { my $unaccounted_for_intronless_alignment = $segment_id_to_alignment{$id}; confess "Error, intronless alignment " . $unaccounted_for_intronless_alignment->toToken() . "\n" . "is not accounted for after instantiating maximal single exons.\n"; } } } print "-all intronless alignments should now be accounted for.\n" if $SEE; return; } #### sub _chain_compatible_splice_paths { my $self = shift; ## wrap splice path objs into structs for scoring and dynamic programming to build chains: my @path_structs; foreach my $splice_path ($self->_get_valid_splice_paths()) { my $pathID = $splice_path->get_pathID(); my @nodeIDs = $splice_path->get_ordered_nodeIDs(); my $evidence_support_count = $self->_get_evidence_support_from_nodeIDs(@nodeIDs); my $orient = $splice_path->get_orient(); my ($lend, $rend) = $splice_path->get_coords(); my $struct = { splice_path_obj => $splice_path, path_score => $evidence_support_count, struct_score => $evidence_support_count, nodeIDs => [@nodeIDs], lend => $lend, rend => $rend, orient => $orient, pathID => $pathID, prev => undef, # previous link in a chain of compatible structs }; push (@path_structs, $struct); } @path_structs = sort {$a->{lend}<=>$b->{lend}} @path_structs; ## score them: for (my $i = 0; $i < $#path_structs; $i++) { my $i_path_struct = $path_structs[$i]; my ($i_lend, $i_rend, $i_orient, $i_splice_path, $i_nodeIDs_aref) = ($i_path_struct->{lend}, $i_path_struct->{rend}, $i_path_struct->{orient}, $i_path_struct->{splice_path_obj}, $i_path_struct->{nodeIDs}); for (my $j = $i+1; $j <= $#path_structs; $j++) { ## No self comparisons: if ($i == $j) { next; } ## struct j comes after struct i my $j_path_struct = $path_structs[$j]; my ($j_lend, $j_rend, $j_orient, $j_splice_path, $j_nodeIDs_aref) = ($j_path_struct->{lend}, $j_path_struct->{rend}, $j_path_struct->{orient}, $j_path_struct->{splice_path_obj}, $j_path_struct->{nodeIDs}); ## must have same orient: unless ($i_orient eq $j_orient) { next; } ## check for overlap: unless ($i_lend <= $j_rend && $i_rend >= $j_lend) { next; } ## must be compatible: unless ($i_splice_path->is_compatible($j_splice_path)) { next; } ## double check that neither is a subset of the other: if ($i_splice_path->is_subpath_of($j_splice_path) || $j_splice_path->is_subpath_of($i_splice_path)) { confess "Error, trying to chain two splice paths where one is a subpath of the other:\n" . $self->toString() . "\nOffending paths:\n" . $i_splice_path->toString() . "\n" . $j_splice_path->toString() . "\n"; } ## calculate path score ending at struct j ## don't count the same nodes twice, though: my @nodes_found_in_both_paths = $self->_intersection_of_lists($i_nodeIDs_aref, $j_nodeIDs_aref); my $score_decrement = $self->_get_evidence_support_from_nodeIDs(@nodes_found_in_both_paths); my $candidate_path_score_j = $i_path_struct->{path_score} + $j_path_struct->{struct_score} - $score_decrement; if ($candidate_path_score_j > $j_path_struct->{path_score}) { ## found best path: $j_path_struct->{path_score} = $candidate_path_score_j; $j_path_struct->{prev} = $i_path_struct->{pathID}; } } } ## get the highest scoring paths of compatible subpaths (structs) my %pathIDs_left; my %pathID_to_struct; foreach my $struct (@path_structs) { my $pathID = $struct->{pathID}; $pathIDs_left{$pathID} = 1; $pathID_to_struct{$pathID} = $struct; } my @final_assembled_path_node_lists; while (%pathIDs_left) { ## find the highest scoring chain of paths: my $highest_path_score = 0; my $highest_scoring_pathID = undef; foreach my $pathID (keys %pathIDs_left) { my $struct = $pathID_to_struct{$pathID}; my $path_score = $struct->{path_score}; if ($path_score > $highest_path_score) { $highest_path_score = $path_score; $highest_scoring_pathID = $pathID; } } my %nodes_along_chain; ## walk along chain links and collect the nodes: my $pathID = $highest_scoring_pathID; while (defined $pathID) { delete $pathIDs_left{$pathID}; # remove it so we know it's accounted for as a traversed chain link. my $struct = $pathID_to_struct{$pathID}; my $nodes_aref = $struct->{nodeIDs}; foreach my $node (@$nodes_aref) { $nodes_along_chain{$node} = 1; } $pathID = $struct->{prev}; } my @nodes_along_chain_list = keys %nodes_along_chain; push (@final_assembled_path_node_lists, [@nodes_along_chain_list]); } ## Create new path objects for these chains of subpaths: foreach my $node_list (@final_assembled_path_node_lists) { my @nodes = @$node_list; ## sort them according to position: @nodes = sort { $self->get_graph_node_via_nodeID($a)->{lend} <=> $self->get_graph_node_via_nodeID($b)->{lend}} @nodes; my $path_lend = $self->get_graph_node_via_nodeID($nodes[0])->{lend}; my $path_rend = $self->get_graph_node_via_nodeID($nodes[$#nodes])->{rend}; my $orient = $self->get_graph_node_via_nodeID($nodes[0])->get_orient(); my $splice_path = Splice_graph_path->new($path_lend, $path_rend, $orient, [@nodes]); push (@{$self->{_assembled_splice_paths}}, $splice_path); } return; } #### sub _intersection_of_lists { my $self = shift; my ($list_A_aref, $list_B_aref) = @_; my %eles_in_A; foreach my $ele (@$list_A_aref) { $eles_in_A{$ele} = 1; } my @intersection; foreach my $ele (@$list_B_aref) { if ($eles_in_A{$ele}) { push (@intersection, $ele); } } return (@intersection); } #### sub _get_evidence_support_from_nodeIDs { my $self = shift; my @nodeIDs = @_; my $evidence_sum = 0; foreach my $nodeID (@nodeIDs) { my $node = $self->get_graph_node_via_nodeID($nodeID); my $ev_support = $node->get_num_evidence_support(); $evidence_sum += $ev_support; } return ($evidence_sum); } #### sub _build_splice_graph { my $self = shift; print "## building splice graph:\n" if $SEE; ## do n^2 comparison of nodes: ## number of nodes is relatively small, so this isn't too much of a bottleneck my @nodes = $self->get_graph_nodes(); # already sorted by lend ## init base scores for nodes foreach my $node (@nodes) { my $ev_support = $node->get_num_evidence_support(); # init base score $node->{forward_base_score} = $ev_support; $node->{reverse_base_score} = $ev_support; # init path score $node->{forward_path_score} = $ev_support; $node->{reverse_path_score} = $ev_support; } ## build the splice graph ## at the same time, do the forward dynamic programming calculation so we can backtrack to the best scoring path from any node ## nodeB must come after nodeA, be of same orientation, adjacent, and an acceptable linkage (intron to exon) ## perform calculation from left to right with backtracking from right to left. for (my $i = 1; $i <= $#nodes; $i++) { my $nodeB = $nodes[$i]; my ($nodeB_lend, $nodeB_rend) = $nodeB->get_coords(); my $nodeB_orient = $nodeB->get_orient(); my $nodeB_type = $nodeB->get_type(); my $nodeB_ID = $nodeB->get_nodeID(); for (my $j = $i - 1; $j >= 0; $j--) { my $nodeA = $nodes[$j]; my ($nodeA_lend, $nodeA_rend) = $nodeA->get_coords(); my $nodeA_orient = $nodeA->get_orient(); my $nodeA_type = $nodeA->get_type(); my $nodeA_ID = $nodeA->get_nodeID(); print "-comparing " . $nodeA->toString() . " <=> " . $nodeB->toString() if $SEE; unless ($nodeA_orient eq $nodeB_orient) { print "-opposite orients, cannot link.\n" if $SEE; next; } unless ($nodeB_lend == $nodeA_rend + 1) { ## next base coordinate for next feature print "-not adjacent, cannot link.\n" if $SEE; next; } ## ensure proper connection: unless ($ACCEPTABLE_CONNECTIONS{$nodeA_type}->{$nodeB_type}) { print "-unacceptable linkage types, cannot link ($nodeA_type,$nodeB_type).\n" if $SEE; next; } print "* linking nodes.\n" if $SEE; ## if got this far, connections are perfect! $nodeA->connect_this_next_nodeID($nodeB_ID); $nodeB->connect_this_prev_nodeID($nodeA_ID); ## check scores for best scoring path solution my $path_score = $nodeA->{forward_path_score} + $nodeB->{forward_base_score}; if ($path_score > $nodeB->{forward_path_score}) { ## link the nodes as current best path linkage $nodeB->{forward_path_score} = $path_score; $nodeB->{forward_backtrack_nodeID} = $nodeA_ID; } } } ## Do the reverse dynamic programming calculation so we can find the best scoring path in a left to right traversal from any node ## calculation from right to left with backtracking from left to right for (my $i = ($#nodes - 1); $i >= 0; $i--) { my $nodeA = $nodes[$i]; my $nodeA_ID = $nodeA->get_nodeID(); for (my $j = $i+1; $j <= $#nodes; $j++) { my $nodeB = $nodes[$j]; my $nodeB_ID = $nodeB->get_nodeID(); print "reverse comparison " . $nodeA->toString() . " <=> " . $nodeB->toString() . "\t" if $SEE; ## already stored the prev/next links, so rely on that to know if an acceptable linkage exists: if ($nodeA->has_next_nodeID($nodeB_ID)) { print "connectable.\n" if $SEE; ## check scores for DP my $base_score = $nodeA->{reverse_base_score}; my $path_score = $nodeB->{reverse_path_score} + $base_score; if ($path_score > $nodeA->{reverse_path_score}) { ## make it the best path and link the node $nodeA->{reverse_path_score} = $path_score; $nodeA->{reverse_backtrack_nodeID} = $nodeB_ID; } } else { print "non-connectable nodes.\n" if $SEE; } } } ## defensive programming: foreach my $node (@nodes) { ## ignore singletons if ($node->get_type() eq "singleton_exon") { next; } ## ensure that only terminal left exons have forward_backtrack_nodeID's as undef if ( (! defined $node->{forward_backtrack_nodeID}) && $node->get_type() ne "terminal_left_exon") { print $self->toString(); print Dumper ($node); confess "Error, node has undefined forward backtrack ID but is not a terinal left exon: " . $node->toString() . "\n"; } ## ensure that only terminal right exons have the reverse_backtrack_nodeID's as undef if ( (! defined $node->{reverse_backtrack_nodeID}) && $node->get_type() ne "terminal_right_exon") { print $self->toString(); print Dumper ($node); confess "Error, node has undefined reverse backtrack ID but is not a terminal right exon: " . $node->toString() . "\n"; } } return; } #### sub _extend_splice_paths_to_termini { my $self = shift; my @splice_paths = $self->_get_valid_splice_paths(); foreach my $splice_path (@splice_paths) { my @ordered_nodeIDs = $splice_path->get_ordered_nodeIDs(); my $left_nodeID = $ordered_nodeIDs[0]; my $right_nodeID = $ordered_nodeIDs[$#ordered_nodeIDs]; print "-extending splice_path [" . join (",", @ordered_nodeIDs) . "] maximally to the left and right:\n" if $SEE; my @left_extension_nodes = $self->_extend_node_maximally_left($left_nodeID); @left_extension_nodes = grep { $_ != $left_nodeID } @left_extension_nodes; # remove the nucleating node my @right_extension_nodes = $self->_extend_node_maximally_right($right_nodeID); @right_extension_nodes = grep { $_ != $right_nodeID } @right_extension_nodes; unless (@left_extension_nodes && @right_extension_nodes) { confess "Error, couldn't extend splice path to the left or right to include termini\n" . "left: @left_extension_nodes\n" . "right: @right_extension_nodes\n " . $splice_path->toString(); } ## create alignment assembly: my @all_nodes_in_maximal_path = (@left_extension_nodes, @ordered_nodeIDs, @right_extension_nodes); my $cdna_alignment = $self->_instantiate_cdna_alignment_from_nodeIDs(@all_nodes_in_maximal_path); $self->_add_alignment_assembly($cdna_alignment); } return; } #### sub _extend_node_maximally_left { my $self = shift; my (@nodeIDs) = @_; my $highest_scoring_path = $self->_perform_traceback("forward_backtrack", \@nodeIDs); my $highest_scoring_nodelist_aref = $highest_scoring_path->{path_nodeIDs_aref}; return (@$highest_scoring_nodelist_aref); } #### sub _extend_node_maximally_right { my $self = shift; my (@nodeIDs) = @_; my $highest_scoring_path = $self->_perform_traceback("reverse_backtrack", \@nodeIDs); my $highest_scoring_nodelist_aref = $highest_scoring_path->{path_nodeIDs_aref}; return (@$highest_scoring_nodelist_aref); } #### sub _perform_traceback { my $self = shift; my ($traceback_direction, $nodeIDs_aref) = @_; my @paths_and_score_structs; foreach my $nodeID (@$nodeIDs_aref) { ## do backtracking from forward DP calculation: my @nodes_in_path; my $next_path_node = $nodeID; my $path_score = 0; while (defined $next_path_node) { my $node_obj = $self->get_graph_node_via_nodeID($next_path_node); my $ev_support = $node_obj->get_num_evidence_support(); $path_score += $ev_support; push (@nodes_in_path, $next_path_node); if ($traceback_direction eq "forward_backtrack") { $next_path_node = $node_obj->{forward_backtrack_nodeID}; } elsif ($traceback_direction eq "reverse_backtrack") { $next_path_node = $node_obj->{reverse_backtrack_nodeID}; } else { confess "Don't understand traceback direction: $traceback_direction "; } } my $path_and_score_struct = { score => $path_score, path_nodeIDs_aref => [@nodes_in_path], }; push (@paths_and_score_structs, $path_and_score_struct); } @paths_and_score_structs = reverse sort {$a->{score}<=>$b->{score}} @paths_and_score_structs; my $highest_scoring_path = shift @paths_and_score_structs; return ($highest_scoring_path); } =tree_traversal_replaced_by_DP_method #### my $path_no = 0; sub _tree_traversal { my $self = shift; my ($node_obj, $traversal_direction, $current_path_score, $current_path_list_aref, $max_score_sref, $max_path_href) = @_; #print $self->toString(); #print $self->toAlignIllustration(60); # print "_tree_traversal() current_path: ". join (",", @$current_path_list_aref) . "\n" if $SEE; $self->_ensure_unique_path_nodes(@$current_path_list_aref); # defensive programming my $edge_key = "_" . $traversal_direction . "_nodeID_href"; my %edges = %{$node_obj->{$edge_key}}; ## add current node evidence score to the sum score if (%edges) { ## not a terminal node ## explore each of the linked nodes: my @linked_nodes = keys %edges; foreach my $linked_node (@linked_nodes) { my $linked_node_obj = $self->get_graph_node_via_nodeID($linked_node); ## add linked node to path list: my @current_path_list = @$current_path_list_aref; push (@current_path_list, $linked_node); my $num_evidence_support = $linked_node_obj->get_num_evidence_support(); my $linked_node_path_score = $current_path_score + $num_evidence_support; $self->_tree_traversal($linked_node_obj, $traversal_direction, $linked_node_path_score, [@current_path_list], $max_score_sref, $max_path_href); } } else { ## a terminal node ## base case! if ($node_obj->get_type() !~ /terminal/) { ## supposed to be a terminal node. confess "Reached base case but not a terminal node! " . $node_obj->toString() . "\n" . $self->toString(); } $path_no++; print "$path_no _tree_traversal($traversal_direction) terminal_reached. PATH: " . join (",", @$current_path_list_aref) . "\n" if $SEE; if ($current_path_score >= $$max_score_sref) { #includes case where incoming (starting) node is in fact a terminal node. ## make this maximum scoring path: $max_path_href->{score} = $current_path_score; $max_path_href->{path_nodeIDs_aref} = [@$current_path_list_aref]; $$max_score_sref = $current_path_score; } } return; } =cut #### sub _ensure_unique_path_nodes { my $self = shift; my @nodes = @_; my %check; foreach my $node (@nodes) { if ($check{$node}) { confess $self->toString() . "Error, path list: " . join (",", @nodes) . " has redundant node: $node\n"; } $check{$node} = 1; # log it! } return; # all good! } #### sub _append_singletons_to_assembly_list { my $self = shift; foreach my $node_obj (@{$self->{_singleton_exons}}) { my $nodeID = $node_obj->get_nodeID(); my $cdna_alignment = $self->_instantiate_cdna_alignment_from_nodeIDs($nodeID); $self->_add_alignment_assembly($cdna_alignment); } return; } #### sub _instantiate_cdna_alignment_from_nodeIDs { my $self = shift; my @nodeIDs = @_; my $spliced_orientation = undef; ## add alignment segments for all non-intron nodes: my @coordsets; foreach my $nodeID (@nodeIDs) { my $node_obj = $self->get_graph_node_via_nodeID($nodeID); my $node_type = $node_obj->get_type(); if ($node_type eq "intron") { next; } ## no introns! my ($lend, $rend) = $node_obj->get_coords(); my $orient = $node_obj->get_orient(); my ($end5, $end3) = ($orient eq '+') ? ($lend, $rend) : ($rend, $lend); push (@coordsets, [$end5, $end3]); unless (defined $spliced_orientation) { $spliced_orientation = $orient; } if ($spliced_orientation ne $orient) { confess "Error, alignment segments of opposite orientation are in the same extended splice path! @nodeIDs\n" . $self->toString(); } } @coordsets = sort {$a->[0]<=>$b->[0]} @coordsets; my $match_coord_sum = 0; my @alignment_segments; foreach my $coordset (@coordsets) { my ($end5, $end3) = @$coordset; my $seg_length = abs ($end3 - $end5) + 1; my $match_lend = $match_coord_sum + 1; my $match_rend = $match_coord_sum + $seg_length; my $alignment_segment = new CDNA::Alignment_segment($end5, $end3, $match_lend, $match_rend, 100); # 100% identity push (@alignment_segments, $alignment_segment); $match_coord_sum += $seg_length; } my $cdna_alignment = new CDNA::CDNA_alignment($match_coord_sum, \@alignment_segments); $cdna_alignment->set_spliced_orientation($spliced_orientation); $cdna_alignment->force_spliced_validation($spliced_orientation); ## hack in the node list as a hidden object attribute. $cdna_alignment->{__Splice_graph_assembler_nodeID_list} = [@nodeIDs]; return ($cdna_alignment); } #### sub _correlate_assemblies_with_incoming_alignments { my $self = shift; my @assemblies = $self->get_assemblies(); my %alignment_accs; # keys to alignment objects my %incorporated_alignments; foreach my $assembly (@assemblies) { my $num_segments_in_assembly = $assembly->get_num_segments(); my @incorporated_alignments; foreach my $alignment (@{$self->{_incoming_alignments}}) { my $alignment_acc = $alignment->get_acc(); ## store alignment object in lookup table for later retrieval. unless (exists $alignment_accs{$alignment_acc}) { $alignment_accs{$alignment_acc} = $alignment; } ## check for alignment incorporation: if ($assembly->encapsulates($alignment, $FUZZ_DIST) && ( ## since assemblies have fixed spliced orient, compat will fail when encapsulation ## of single exon alignments of ambiguous orientation exists ($num_segments_in_assembly == 1 && $alignment->get_spliced_orientation() eq '?') || ## do rigorous compatibility check: ($assembly->is_compatible($alignment, $FUZZ_DIST)) ) ) { push (@incorporated_alignments, $alignment_acc); $incorporated_alignments{$alignment_acc} = 1; } } $assembly->set_acc( join ("/", @incorporated_alignments)); # use / as DELIMETER for cdna accessions } my @unincorporated_alignments; foreach my $alignment_acc (keys %alignment_accs) { unless ($incorporated_alignments{$alignment_acc}) { push (@unincorporated_alignments, $alignment_accs{$alignment_acc}); } } @{$self->{_unincorporated_alignments}} = (); #clear if (@unincorporated_alignments) { @{$self->{_unincorporated_alignments}} = @unincorporated_alignments; return (1); # still have unincorporated alignments! } return(0); ## all alignments are accounted for. } #### sub _explore_assemblies_from_unincorporated_alignments { my $self = shift; print "method _explore_assemblies_from_unincorporated_alignments()\n" if $SEE; ## decompose unincorporated alignments and create maximal paths that contain them. my @added_alignment_assemblies; my @unincorporated_alignments = $self->get_unincorporated_alignments(); ## examine in order of descending numbers of segments and length: @unincorporated_alignments = reverse sort { $a->{num_segments} <=> $b->{num_segments} || $a->{cdna_length} <=> $b->{cdna_length} } @unincorporated_alignments; foreach my $unincorporated_alignment (@unincorporated_alignments) { print "\nUnincorporated alignment under scrutiny: " . $unincorporated_alignment->toToken() . "\n" if $SEE; my $orient = $unincorporated_alignment->get_spliced_orientation(); my ($alignment_lend, $alignment_rend) = $unincorporated_alignment->get_coords(); my $found_in_new_assembly_flag = 0; foreach my $new_assembly (@added_alignment_assemblies) { if ($new_assembly->is_compatible($unincorporated_alignment, $FUZZ_DIST) && $new_assembly->encapsulates($unincorporated_alignment, $FUZZ_DIST)) { $found_in_new_assembly_flag = 1; last; } } if ($found_in_new_assembly_flag) { print "-found in a newly created assembly based on an earlier unincorporated alignment. Now accounted for.\n" if $SEE; next; } my $num_segments = $unincorporated_alignment->get_num_segments(); my @all_nodes; if ($num_segments == 1) { ## must be included in some terminal exon not already represented by maximal splice paths (only explanation) ## find a maximal path that includes this segment stemming from a terminal exon my @terminal_nodes = $self->_find_terminal_exons_encompassing_segment($alignment_lend, $alignment_rend, $orient); unless (@terminal_nodes) { confess "Error, searching for terminal nodes that encompass unincorproated singleton segment and none found.\n" . $self->toString(); } ## find maximal path: my @paths_from_terminal_node; foreach my $terminal_node (@terminal_nodes) { my $nodeID = $terminal_node->get_nodeID(); my $type = $terminal_node->get_type(); my $max_path_href; if ($type eq "terminal_left_exon") { ## must extend to the right $max_path_href = $self->_perform_traceback("forward_backtrack", [$nodeID]); #_extend_node_maximally_right($nodeID); } elsif ($type eq "terminal_right_exon") { $max_path_href = $self->_perform_traceback("reverse_backtrack", [$nodeID]); #_extend_node_maximally_left($nodeID); } else { confess "Error, terminal node is supposed to be left or right, but type($type) is not recognized."; } push (@paths_from_terminal_node, $max_path_href); } ## get highest scoring path: @paths_from_terminal_node = reverse sort { $a->{score}<=>$b->{score} } @paths_from_terminal_node; my $highest_scoring_path = shift @paths_from_terminal_node; my $node_IDs_in_path_aref = $highest_scoring_path->{path_nodeIDs_aref}; @all_nodes = @$node_IDs_in_path_aref; } else { ## has an intron! Some alternate termini needed that were not found in a maximal splice path my @segments = $unincorporated_alignment->get_alignment_segments(); my $first_segment = shift @segments; my $last_segment = pop @segments; ## create node list for internal segments and introns: my @nodes_in_central_path; my @introns = $unincorporated_alignment->get_intron_coords(); foreach my $intron (@introns) { my ($lend, $rend) = @$intron; my $node_obj = $self->_get_graph_node_via_coords_n_type("intron", $lend, $rend, $orient); my $nodeID = $node_obj->get_nodeID(); push (@nodes_in_central_path, $nodeID); } foreach my $internal_segment (@segments) { my ($lend, $rend) = $internal_segment->get_coords(); my $node_obj = $self->_get_graph_node_via_coords_n_type("internal_exon", $lend, $rend, $orient); my $nodeID = $node_obj->get_nodeID(); push (@nodes_in_central_path, $nodeID); } my ($first_segment_lend, $first_segment_rend) = $first_segment->get_coords(); my @candidate_preceding_nodes = $self->_find_candidate_preceding_exons($first_segment_lend, $first_segment_rend, $orient); unless (@candidate_preceding_nodes) { confess "Error, no candidate preceding nodes for ($first_segment_lend, $first_segment_rend, $orient)"; } my ($last_segment_lend, $last_segment_rend) = $last_segment->get_coords(); my @candidate_following_nodes = $self->_find_candidate_following_exons($last_segment_lend, $last_segment_rend, $orient); unless (@candidate_following_nodes) { confess "Error, no candidate following nodes for ($last_segment_lend, $last_segment_rend, $orient)"; } ## find maximal paths left and right: my @nodeIDs_left; foreach my $candidate_preceding_node (@candidate_preceding_nodes) { print "Candidate preceding node: " . $candidate_preceding_node->toString() . "\n" if $SEE; my $nodeID = $candidate_preceding_node->get_nodeID(); push (@nodeIDs_left, $nodeID); } my @path_left = $self->_extend_node_maximally_left(@nodeIDs_left); my @nodeIDs_right; foreach my $candidate_following_node (@candidate_following_nodes) { print "Candidate following node: " . $candidate_following_node->toString() . "\n" if $SEE; my $nodeID = $candidate_following_node->get_nodeID(); push (@nodeIDs_right, $nodeID); } my @path_right = $self->_extend_node_maximally_right(@nodeIDs_right); @all_nodes = (@path_left, @nodes_in_central_path, @path_right); } my $assembly = $self->_instantiate_cdna_alignment_from_nodeIDs(@all_nodes); ## verify that this assembly accounts for the unincorporated alignment: unless ( (my $compatible_result = $assembly->is_compatible($unincorporated_alignment, $FUZZ_DIST)) && (my $encapsulated_result = $assembly->encapsulates($unincorporated_alignment, $FUZZ_DIST)) ) { confess "Error, created new assembly to house missing alignment, but it doesn't afterall!\n" . "unincorp: " . $unincorporated_alignment->toToken() . "\n" . "newAsmb: " . $assembly->toToken() . "\n" . "compatible: $compatible_result\n" . "encapsulated: $encapsulated_result\n"; } print "-verified incorporation in new assembly.\n" if $SEE; push (@added_alignment_assemblies, $assembly); } ## add to assembly list $self->_add_alignment_assembly(@added_alignment_assemblies); return; } #### sub _find_candidate_preceding_exons { my $self = shift; my ($lend, $rend, $orient) = @_; ## must have same orient, share rend boundary, and be internal or terminal exons that contain it my @candidate_exons; foreach my $exon (@{$self->{_internal_exons}}, @{$self->{_terminal_left_exons}}) { my ($exon_lend, $exon_rend) = $exon->get_coords(); my $exon_orient = $exon->get_orient(); if ($exon_orient eq $orient && $lend + $FUZZ_DIST >= $exon_lend && $exon_rend == $rend) { push (@candidate_exons, $exon); } } return (@candidate_exons); } #### #### sub _find_candidate_following_exons { my $self = shift; my ($lend, $rend, $orient) = @_; ## must have same orient, share rend boundary, and be internal or terminal exons that contain it my @candidate_exons; foreach my $exon (@{$self->{_internal_exons}}, @{$self->{_terminal_right_exons}}) { my ($exon_lend, $exon_rend) = $exon->get_coords(); my $exon_orient = $exon->get_orient(); if ($exon_orient eq $orient && $rend - $FUZZ_DIST <= $exon_rend && $exon_lend == $lend) { push (@candidate_exons, $exon); } } return (@candidate_exons); } ###################################################### ###################################################### package Splice_graph_node; use strict; use warnings; use Carp; my $nodeID = 0; ## class attribute #### sub new { my $packagename = shift; my ($type, $lend, $rend, $orient) = @_; ## type can be: intron | internal_exon | terminal_left_exon | terminal_right_exon | singleton_exon $nodeID++; # object atts: my $self = { nodeID => $nodeID, type => undef, lend => undef, rend => undef, orient => undef, length => undef, num_evidence_support => 1, # indicate number of transcripts supporting this feature ## graph connections: _prev_nodeID_href => {}, ## keys nodeIDs connected to before this node _next_nodeID_href => {}, ## keys nodeIDs connected to after this node ## attributes for dynamic programming calculations to find longest path forward_base_score => 0, ## calculation performed from left to right, with right to left backtracking forward_path_score => 0, forward_backtrack_nodeID => undef, reverse_base_score => 0, ## calculation performed from right to left with backtracking from left to right reverse_path_score => 0, reverse_backtrack_nodeID => undef, }; bless ($self, $packagename); ## use set methods to set attribute values and validate settings: $self->set_coords($lend, $rend); $self->set_type($type); $self->set_orient($orient); return ($self); } #### sub set_num_evidence_support { my $self = shift; my ($evidence_support) = @_; $self->{num_evidence_support} = $evidence_support; return; } #### sub increment_evidence_support { my $self = shift; my ($inc_val) = @_; if (defined $inc_val) { $self->{num_evidence_support} += $inc_val; } else { ## just add one. $self->{num_evidence_support}++; } return; } #### sub get_num_evidence_support { my $self = shift; return ($self->{num_evidence_support}); } #### sub toString { my $self = shift; my $text = "node: " . $self->get_nodeID() . "\t" . join ("-", $self->get_coords()) . "\torient(" . $self->get_orient() . ")" . "\t" . $self->get_type() . "\tEvSupport: " . $self->get_num_evidence_support(); return ($text); } #### sub get_nodeID { my $self = shift; return ($self->{nodeID}); } #### sub set_coords { my $self = shift; my ($lend, $rend) = @_; unless ($lend =~ /^\d+$/ && $rend =~ /^\d+$/) { confess "Error, coordinates [$lend] or [$rend] are not integers"; } unless ($lend <= $rend) { confess "Error, coordinates out of order"; } $self->{lend} = $lend; $self->{rend} = $rend; $self->{length} = $rend - $lend + 1; return; } #### sub get_coords { my $self = shift; return ($self->{lend}, $self->{rend}); } #### sub set_type { my $self = shift; my ($type) = @_; unless ($type =~ /^(intron|internal_exon|terminal_left_exon|terminal_right_exon|singleton_exon)$/) { confess "type $type is not acceptible"; } $self->{type} = $type; return; } #### sub get_type { my $self = shift; return ($self->{type}); } #### sub set_orient { my $self = shift; my ($orient) = @_; unless ($orient =~ /^[\+\-]$/) { confess "Error, orient($orient) is not allowed here"; } $self->{orient} = $orient; return; } #### sub get_orient { my $self = shift; return ($self->{orient}); } #### sub connect_this_prev_nodeID { my $self = shift; my ($nodeID) = @_; $self->{_prev_nodeID_href}->{$nodeID} = 1; return; } #### sub connect_this_next_nodeID { my $self = shift; my ($nodeID) = @_; $self->{_next_nodeID_href}->{$nodeID} = 1; return; } #### sub get_connected_prev_nodeIDs { my $self = shift; my @prev_nodeIDs = keys %{$self->{_prev_nodeID_href}}; return (@prev_nodeIDs); } #### sub get_connected_next_nodeIDs { my $self = shift; my @next_nodeIDs = keys %{$self->{_next_nodeID_href}}; return (@next_nodeIDs); } #### sub has_next_nodeID { my $self = shift; my ($nodeID) = @_; if ($self->{_next_nodeID_href}->{$nodeID}) { return (1); #yes } else { return (0); # no } } #### sub has_prev_nodeID { my $self = shift; my ($nodeID) = @_; if ($self->{_prev_nodeID_href}->{$nodeID}) { return (1); #yes } else { return (0); #no } } ############################################# ############################################# package Splice_graph_path; use strict; use warnings; use Carp; my $PATH_ID = 0; ## class variable #### sub new { my $packagename = shift; my ($lend, $rend, $orient, $ordered_nodeIDs_aref) = @_; unless ($lend =~ /^\d+$/ && $rend =~ /^\d+$/) { confess "Error, lend $lend and rend $rend must be integers"; } unless ($orient =~ /^[\+\-]$/) { confess "Error, orient($orient) is invalid"; } unless (@$ordered_nodeIDs_aref) { confess "Error, need list of ordered nodeIDs as constructor param"; } $PATH_ID++; my $self = { _lend => $lend, _rend => $rend, _orient => $orient, _ordered_nodeIDs => [@$ordered_nodeIDs_aref], # a path is an ordered list of intron and exon nodeIDs traversed _pathID => $PATH_ID, }; bless ($self, $packagename); return ($self); } #### sub get_ordered_nodeIDs { my $self = shift; return (@{$self->{_ordered_nodeIDs}}); } #### sub toString { my $self = shift; my @ordered_nodeIDs = $self->get_ordered_nodeIDs(); return (join (",", @ordered_nodeIDs)); } #### sub is_subpath_of { my $self = shift; my ($other_path) = @_; my $self_path_string = "," . $self->toString() . ","; ## add comma terminal delimiters my $other_path_string = "," . $other_path->toString() . ","; if ($other_path_string =~ /$self_path_string/) { return (1); #true } else { return (0); #false } } #### sub is_compatible () { my $self = shift; my ($other_splice_path_obj) = @_; ## make sure they have at least one node in common: my @node_IDs_A = $self->get_ordered_nodeIDs(); my @node_IDs_B = $other_splice_path_obj->get_ordered_nodeIDs(); my $found_nodeID_in_common = 0; my $common_nodeID = undef; { # do this in a more restricted scope my %nodeIDs; foreach my $nodeID (@node_IDs_A) { $nodeIDs{$nodeID} = 1; } foreach my $nodeID (@node_IDs_B) { if ($nodeIDs{$nodeID}) { $found_nodeID_in_common = 1; $common_nodeID = $nodeID; last; } } } unless ($found_nodeID_in_common) { return (0); # false; no common node so definitely incompatible } ## examine all nodes adjacent to common node; they should be completely identical my $common_node_pos_A = $self->_find_ele_index_pos_via_value(\@node_IDs_A, $common_nodeID); my $common_node_pos_B = $self->_find_ele_index_pos_via_value(\@node_IDs_B, $common_nodeID); ## search left: my ($i, $j); for ($i=$common_node_pos_A, $j=$common_node_pos_B; $i >= 0 && $j >= 0; $i--, $j--) { my $A_node = $node_IDs_A[$i]; my $B_node = $node_IDs_B[$j]; if ($A_node ne $B_node) { # incompatible return (0); } } ## search right: for ($i = $common_node_pos_A, $j = $common_node_pos_B; $i <= $#node_IDs_A && $j <= $#node_IDs_B; $i++,$j++) { my $A_node = $node_IDs_A[$i]; my $B_node = $node_IDs_B[$j]; if ($A_node ne $B_node) { # incompatible return (0); } } ## if got here, then passed compatibility tests. return (1); # Yes, compatible } sub _find_ele_index_pos_via_value { my $self = shift; my ($list_aref, $value) = @_; for (my $i = 0; $i <= $#$list_aref; $i++) { if ($list_aref->[$i] eq $value) { return ($i); } } confess "Error finding index position of $value in list"; } #### sub get_pathID { my $self = shift; return ($self->{_pathID}); } #### sub get_coords { my $self = shift; return ($self->{_lend}, $self->{_rend}); } #### sub get_orient { my $self = shift; return ($self->{_orient}); } 1; #EOM