// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// /// note coding convention for all ranges '_pos fields' is: /// XXX_begin_pos is zero-indexed position at the begining of the range /// XXX_end_pos is zero-index position 1 step after the end of the range /// #include "starling_pos_processor_indel_util.hh" #include "blt_util/blt_exception.hh" #include #include #include static void finish_indel_sppr(indel& in, starling_pos_processor& sppr, const INDEL_ALIGN_TYPE::index_t iat, const align_id_t id){ using namespace INDEL_ALIGN_TYPE; if (iat == CONTIG) { in.data.contig_ids.insert(id); } else if(iat == CONTIG_READ) { in.data.all_read_ids.insert(id); } else if(iat == GENOME_READ) { in.data.map_read_ids.insert(id); in.data.all_read_ids.insert(id); } else if(iat == GENOME_SUBMAP_READ) { in.data.submap_read_ids.insert(id); } else { assert(0); } const bool is_novel_indel(sppr.insert_indel(in)); if((iat == CONTIG_READ) and is_novel_indel){ std::ostringstream oss; oss << "ERROR: contig read contains novel insertion: " << in << "\n"; throw blt_exception(oss.str().c_str()); } } static void bam_seq_to_str(const bam_seq_base& bs, const unsigned start, const unsigned end, std::string& s) { s.clear(); for(unsigned i(start);i ends(get_nonclip_end_segments(path)); unsigned read_offset(0); unsigned ref_offset(0); const unsigned aps(path.size()); for(unsigned i(0);ibegin()), j_end(edge_indel_ptr->end()); for(;j!=j_end;++j){ if(((i!=0) or (current_pos!=j->right_pos())) and ((i==0) or (current_pos!=j->pos))) continue; indel in; in.key = *j; finish_indel_sppr(in,sppr,iat,id); break; } } } } if((not is_edge_segment) and ((ps.type == INSERT) or (ps.type == DELETE))){ if(ps.length <= max_indel_size) { indel in; in.key.pos=pos+ref_offset; in.key.length = ps.length; if(ps.type == INSERT){ in.key.type=INDEL::INSERT; bam_seq_to_str(bseq,read_offset,read_offset+ps.length,in.data.seq); } else { in.key.type=INDEL::DELETE; } finish_indel_sppr(in,sppr,iat,id); } else { // left side BP: { indel in; in.key.pos=pos+ref_offset; in.key.length=ps.length; in.key.type=INDEL::SV_BP_LEFT; const unsigned start(read_offset); const unsigned size(bseq.size()-read_offset); const unsigned end(start+std::min(size,max_indel_size)); bam_seq_to_str(bseq,start,end,in.data.seq); finish_indel_sppr(in,sppr,iat,id); } // right side BP: { indel in; in.key.pos=pos+ref_offset; if(ps.type == DELETE) in.key.pos+=ps.length; in.key.length=ps.length; in.key.type=INDEL::SV_BP_RIGHT; const unsigned next_read_offset(read_offset+((ps.type==INSERT) ? ps.length : 0)); const unsigned start_offset(next_read_offset-std::min(next_read_offset,max_indel_size)); bam_seq_to_str(bseq,start_offset,next_read_offset,in.data.seq); finish_indel_sppr(in,sppr,iat,id); } } } if (ps.type == MATCH) { read_offset += ps.length; ref_offset += ps.length; } else if(ps.type == DELETE){ ref_offset += ps.length; } else if(ps.type == INSERT or ps.type == SOFT_CLIP){ read_offset += ps.length; } else if(ps.type == HARD_CLIP or ps.type == PAD){ // do nothing } else { assert(0); // can't handle other CIGAR types yet } } }