// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// #include "starling_read_align_score.hh" #include "blt_util/log.hh" #include "blt_util/qscore.hh" #include "blt_util/seq_util.hh" #include //#define DEBUG_SCORE #ifdef DEBUG_SCORE #include #include struct align_position { align_position(const char r, const char f, const char i) : read(r), ref(f), insert(i) {} char read; char ref; char insert; }; struct align_printer { void push(const char r, const char f, const char i) { _seq.push_back(align_position(r,f,i)); } void dump(std::ostream& os) const { os << "scoring alignment:\n"; const unsigned ss(_seq.size()); os << "read: "; for(unsigned i(0);i _seq; }; #endif // score a contiguous matching alignment segment // // note that running the lnp value through as a reference creates more // floating point stability for ambiguous alignments which have the // same score by definition. // static void score_segment(const starling_options&, const unsigned seg_length, const bam_seq_base& seq, const uint8_t* qual, const pos_t read_head_pos, const bam_seq_base& ref, const pos_t ref_head_pos, double& lnp) { static const double lnthird(-std::log(3.)); for(unsigned i(0);i(i)); const uint8_t sbase(seq.get_code(readi)); if(sbase == BAM_BASE::ANY) continue; const uint8_t qscore(qual[readi]); bool is_ref(sbase == BAM_BASE::REF); if(not is_ref) { const pos_t refi(ref_head_pos+static_cast(i)); is_ref=(sbase == ref.get_code(refi)); } lnp += ( is_ref ? qphred_to_lncompe(qscore) : qphred_to_lne(qscore)+lnthird ); } } double score_candidate_alignment(const starling_options& client_opt, const indel_buffer& ibuff, const read_segment& rseg, const candidate_alignment& cal, const std::string& ref_seq){ using namespace ALIGNPATH; #ifdef DEBUG_SCORE static const char GAP('-'); align_printer ap; #endif double al_lnp(0.); const string_bam_seq ref_bseq(ref_seq); const bam_seq read_bseq(rseg.get_bam_read()); const uint8_t* qual(rseg.qual()); pos_t read_head_pos(0); pos_t ref_head_pos(cal.al.pos); const std::pair ends(get_nonclip_end_segments(cal.al.path)); const unsigned as(cal.al.path.size()); for(unsigned i(0);i(ii)), ref_bseq.get_char(ref_head_pos+static_cast(ii)), GAP); } #endif read_head_pos+=ps.length; ref_head_pos+=ps.length; } else if(ps.type==INSERT) { indel_key ik(ref_head_pos,INDEL::INSERT,ps.length); // check if this is an edge insertion: if((i==ends.first) or (i==ends.second)){ if(i==ends.first) ik=cal.leading_indel_key; else ik=cal.trailing_indel_key; assert(ik.type!=INDEL::NONE); } const indel_data* id_ptr(ibuff.get_indel_data(ik)); if(NULL == id_ptr){ log_os << "ERROR: candidate alignment does not contain expected insertion: " << ik << "\n" << "\tcandidate alignment: " << cal << "\n"; exit(EXIT_FAILURE); } const string_bam_seq insert_bseq(id_ptr->seq); // if this is a leading edge-insertion we need to set // insert_seq_head_pos accordingly: // pos_t insert_seq_head_pos(0); if(i==0) { insert_seq_head_pos=static_cast(insert_bseq.size())-static_cast(ps.length); } score_segment(client_opt, ps.length, read_bseq, qual, read_head_pos, insert_bseq, insert_seq_head_pos, al_lnp); #ifdef DEBUG_SCORE for(unsigned ii(0);ii(ii)), GAP, insert_bseq.get_char(insert_seq_head_pos+static_cast(ii))); } #endif read_head_pos+=ps.length; } else if((ps.type==DELETE) or (ps.type==SKIP)) { // no read segment to worry about in this case // #ifdef DEBUG_SCORE for(unsigned ii(0);ii(ii)), GAP); } #endif ref_head_pos+=ps.length; } else if(ps.type==SOFT_CLIP) { // we rely on candidate alignment generator to supress // soft-clipping so this routine does not penalizing // soft-clip states for now... the complication is that a // soft-clip alignment will always do better than its // unclipped equivilent. The rationalle right now is that // if a user has soft-clipping on their input reads, they // want it to stay there. // // static const double lnrandom(std::log(0.25)); // al_lnp += (ps.length*lnrandom); read_head_pos+=ps.length; } else { // haven't figured out other clipping policy yet: assert(0); } } #ifdef DEBUG_SCORE ap.dump(std::cerr); #endif return al_lnp; }