#include "variance/AlignCandIndelReadsImpl.hh" #include #include #include using namespace std; /** * Project : CASAVA * Module : $RCSfile: AlignCandIndelReadsImpl.cpp,v $ * @author : Bret D. Barnes * Copyright : Copyright (c) Illumina 2008, 2009. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ //using namespace casava::common; void AlignCandIndelReadsImpl::operator()(Export& record, const std::string& refSeq, const unsigned refPos) { // Get reference alignment tag sequence const string& read = record.getRead(); // const char strand = record.getMatchStrand(); // Get ref info //const unsigned int refLen = refSeq.length(); // Initialize local alignment object #ifdef CHANGED cerr << "HERE!!!!\n\n" << endl; exit(1); TagAligner::TagAligner align(this->_matchCost, this->_misMatchCost, this->_gapExtendCost, this->_gapOpenCost, refSeq.c_str(), read.c_str(),""); #endif align.setSequences(refSeq.c_str(), read.c_str(),"", record.getMatchStrand()); // Perform local alignment //const double laScore = align.align(); align.align(); //cerr << align.toString() << endl; // Here we explicetly define which score we want to use // This can be changed later to use any of the scores const double score = align.getAlnScore(); // Raw local alignment score from DP matrix //double score = align.getReadAlnScore(); // Extended read alignment score for whole read //doubel score = align.getReadNormAlnScore(); // ReadAlnScore() / length (nomralized) // Check alignment score if (score >= this->_alignScoreThresh) { // Check to see if we've aligned the tag // out of bounds of the genome seq frag // NOTE: We could decide to extend the alignment region // if the tag aligns out of bounds. This would take a // little extra time, but might improve the accuracy // of the indel caller (See below: ALIGNED_OUT_OF_BOUND READ) if (align.isInsideRef()) { // AIGNED READ _aCnt++; _aSum += score; // These checks should be removed now, leaving for // temp testing... if (align.getTagAlnStart() != 0 || align.getTagAlnEnd() != static_cast(read.length())) { cerr << "[ERROR]: Tag extnsion failed!" << endl; cerr << align.toString() << endl; exit(EXIT_FAILURE); } // Update new math position, we reverse comp the tag read // so no need to do anything special if the sequence is // on the reverse strand. int newPos = refPos + align.getRefAlnStart(); /* if (record.getMatchStrand() == 'R') { newPos = refPos + align.getRefAlnEnd() - 1; } */ // Update match descriptor string desc = align.getMatchDesc(); // Set new position and match descriptor fields record.setMatchDesc(desc); record.setMatchStart(newPos); // Update range start and end record.setRangeStart(newPos); // rangeEnd should be the position of the end of the ref alignment // i.e. this could be < newPos + read.length(); int rangeEnd = newPos + align.getRefAlnEnd() - align.getRefAlnStart(); record.setRangeEnd(rangeEnd); //record.setRangeEnd(newPos + read.length() - 1); // Update single alignment score // NOTE: Not sure if we should do this??? //record.setSingleAlnScore(align.getReadAlnScore()); } else { // ALIGN_OUT_OF_BOUND_READ => SHADOW READ _oCnt++; _oSum += score; // NOTE: This is where we could try to extend the refSeq in the // direction of where the tag alignment drops off to get the // correct alignment in the case where the refSeq fragement was // too short or shifted left or right too much. // i.e. // // refSeq: RRRRRRRRRRRRRRRRRRRRRRRRRRRRRR // tagSeq: TTTTTTTTTTTTTTT // // Updated: // refSeq: RRRRRRRRRRRRRRRRRRRRRRRRRRRRRR // tagSeq: TTTTTTTTTTTTTTT // // Right now we don't do this... // Set new position and match descriptor fields and range start/end // In this case we did not align well so we'll just put // the shadow +/- mean, if shadow read, and orginal start // if not. // NOTE: We should also update each shadow read's partner offset, // since the parterners match position has now been lost (replaced // by the estimated position. --DONE-- in Export class!!! // // NOTE: Its debatable if we should actually update the match // position since its an estimated match position and if the read // was a match read coming in then the previous match postion // (the position of its partner) holds 'potentially' more information // than the new estimated (from insert size) match position. // Thoughts??? // FOR NOW WE ARE NOT UPADATING THIS... /* if (record.isShadow()) { if (record.getMatchStrand() == 'R') { record.setMatchStart(record.getMatchStart() + mean); } else { record.setMatchStart(record.getMatchStart() - mean); } } */ #ifdef DONT_MESS_WITH_RANGE_UNLESS_WEVE_FOUND_AN_ALIGNMENT // Update match descriptor record.setMatchDesc("-"); // Update range start and end record.setRangeStart(refPos); record.setRangeEnd(refPos + refLen - 1); #endif } } else { // SHADOW READ _uCnt++; _uSum += score; // Set new position and match descriptor fields and range start/end // In this case we did not align well so we'll just put // the shadow +/- mean, if shadow read, and orginal start // if not. // NOTE: We should also update each shadow read's partner offset, // since the parterners match position has now been lost (replaced // by the estimated position. --DONE-- in Export class!!! // // NOTE: Its debatable if we should actually update the match // position since its an estimated match position and if the read // was a match read coming in then the previous match postion // (the position of its partner) holds 'potentially' more information // than the new estimated (from insert size) match position. // Thoughts??? // // FOR NOW WE ARE NOT UPADATING THIS... /* if (record.isShadow()) { if (record.getMatchStrand() == 'R') { record.setMatchStart(record.getMatchStart() + mean); } else { record.setMatchStart(record.getMatchStart() - mean); } } */ #ifdef DONT_MESS_WITH_RANGE_UNLESS_WEVE_FOUND_AN_ALIGNMENT // Update match descriptor record.setMatchDesc("-"); // Update range start and end record.setRangeStart(refPos); record.setRangeEnd(refPos + refLen - 1); #endif // Update single alignment score // NOTE: Not sure if we should do this??? //record.setSingleAlnScore(align.getReadAlnScore()); } // Record perfect alignment data if (align.isPerfectAlignment()) { _pCnt++; } /* cerr << "REF POS\t" << refPos << ", " << refSeq.length() << endl; cerr << align.toString() << endl; cerr << record.toString() << endl; */ } // ~void AlignCandIndelReadsImpl::operator() std::string AlignCandIndelReadsImpl:: toString() const { stringstream con; const double total(static_cast(_aCnt + _oCnt + _uCnt)); const double aCntPer = (double)_aCnt / total; const double oCntPer = (double)_oCnt / total; const double uCntPer = (double)_uCnt / total; const double aAvg = _aSum / static_cast(_aCnt); const double oAvg = _oSum / static_cast(_oCnt); const double uAvg = _uSum / static_cast(_uCnt); const double pAvg = (double)_pCnt / total; con << "# Metric\ttotalCount\tpercentCount\tavgScore\n" << "# aligned\t" << _aCnt << "\t" << aCntPer << "\t" << aAvg << "\n" << "# outOfBound\t" << _oCnt << "\t" << oCntPer << "\t" << oAvg << "\n" << "# unAligned\t" << _uCnt << "\t" << uCntPer << "\t" << uAvg << "\n" << "#\n" << "# Number of perfect alignments: " << _pCnt << " (" << pAvg << "%)" << endl; return(con.str()); } // End of file