// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// #include "alignment_util.hh" #include #include // Provides the bounds of an alignment // // note that for normal indel intersection test leading/trailing // indels will be missed if using this range // known_pos_range get_strict_alignment_range(const alignment& al){ const pos_t asize(apath_ref_length(al.path)); return known_pos_range(al.pos,al.pos+asize); } // Provides the bounds of an alignment when edge indels // are converted to match. // known_pos_range get_soft_clip_alignment_range(const alignment& al){ const pos_t lead(apath_insert_lead_size(al.path)); const pos_t trail(apath_insert_trail_size(al.path)); const pos_t asize(apath_ref_length(al.path)); const pos_t begin_pos(al.pos-lead); pos_t end_pos(al.pos+asize+trail); return known_pos_range(begin_pos,end_pos); } // Provides the bounds of an alignment when edge indels and soft clip // are converted to match. // known_pos_range get_alignment_range(const alignment& al){ const pos_t lead(apath_read_lead_size(al.path)); const pos_t trail(apath_read_trail_size(al.path)); const pos_t asize(apath_ref_length(al.path)); const pos_t begin_pos(al.pos-lead); pos_t end_pos(al.pos+asize+trail); return known_pos_range(begin_pos,end_pos); } // Provides the largest reasonable bounds of an alignment by including // any leading and trailing edge sequence and requiring that the end // of the alignment range is equal to at least the // orig_start+read_length and the start of the alignment range is // equal to at least orig_end-read_length: // known_pos_range get_alignment_zone(const alignment& al, const unsigned seq_length){ const known_pos_range ps(get_alignment_range(al)); known_pos_range ps2(ps); ps2.begin_pos=std::max(0,std::min(ps.begin_pos,ps.end_pos-static_cast(seq_length))); ps2.end_pos=std::max(ps.end_pos,ps.begin_pos+static_cast(seq_length)); return ps2; } #if 0 bool normalize_alignment(alignment& al, const std::string& read_seq, const std::string& ref_seq) { assert(0); } // original version of code --very efficient for single indels but not designed to handle // potential complexities of multiple indels interacting during normalization // // shift indels to occur as far "to the left" as possible // // return two pieces of info: // 1) bool indicating if the indel is invalid (i.e. it 'fell off the left end of the read') // 2) distance to shift the indel to the left // // Note that this functions purpose is not to validate indels -- that // will happen in a subsequent step, if an indel 'falls off' the edge // of a read during normalization, it's found to be invalid by // happenstance, but there's no reason to check for the same evidence // of a non-informative indel by trying to push it off the right side // of the read // std::pair normalize_indel(const char* base_seq, const pos_t base_seq_start, const char* insert_seq, const pos_t insert_seq_start, const unsigned insert_size){ assert(NULL != base_seq); assert(NULL != insert_seq); assert(0 != insert_size); unsigned bs(base_seq_start); unsigned is(insert_seq_start); while(true) { // read deletion fell off edge: if(bs==0) return std::make_pair(true,0); // attempt to shift back one base if(base_seq[bs-1] != insert_seq[is+insert_size-1]){ return std::make_pair(false,base_seq_start-bs); } // read insertion fell off edge: if(is==0) return std::make_pair(true,0); bs--; is--; } } // this function is held up on normalization of multiple, potentially colliding // indels -- really this is becoming just a bad way of doing a realignment. Punt // this whole procedure for now. // // Shift all indels as far "to the left" as possible -- note that // some indels may be lost. Returns true if the alignment was changed. // bool normalize_alignment(alignment& al, const std::string& read_seq, const std::string& ref_seq) { bool is_norm(false); std::pair norm_res; unsigned read_offset(0); unsigned ref_offset(0); pos_t last_event_end(ps.pos); const unsigned aps(al.apath.size()); for(unsigned i(0);i