/** * Project : CASAVA * Module : $RCSfile: AnomalousRead.cpp,v $ * @author : Richard Shaw * Copyright : Copyright (c) Illumina 2010. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ /*****************************************************************************/ #include #include #include "common/Exceptions.hh" #include "common/SequenceUtils.hh" #include "variance/AnomalousRead.hh" /*****************************************************************************/ using namespace casava::common; namespace ca { namespace variance_detection { /*****************************************************************************/ // AnomalousRead /*****************************************************************************/ AnomalousRead::AnomalousRead(const Alignment& alignment, bool noReverse) : Singleton(alignment.getData().c_str(), alignment.getQuality().c_str(), alignment), myMostLikelyBreakPt(0) { if ((alignment.getMatch().getStrand() == Match::Reverse) && !noReverse) { SequenceUtils::reverseComp(read_); quality_ = std::string(quality_.rbegin(), quality_.rend()); } } /*****************************************************************************/ std::ostream& operator<<(std::ostream& ostrm, const AnomalousRead::Type& anomalousReadType) { switch (anomalousReadType) { case AnomalousRead::ShadowOrSemiAligned: ostrm << "Shadow/SemiAligned"; break; case AnomalousRead::InsertionPair: ostrm << "InsertionPair"; break; case AnomalousRead::DeletionPair: ostrm << "DeletionPair"; break; case AnomalousRead::DuplicationPair: ostrm << "DuplicationPair"; break; case AnomalousRead::InversionPair: ostrm << "InversionPair"; break; case AnomalousRead::ChimericPair: ostrm << "ChimericPair"; break; default: ostrm << "N/A"; } return ostrm; } /*****************************************************************************/ std::istream& operator>>(std::istream& istrm, AnomalousRead::Type& anomalousReadType) { std::string typeStr; istrm >> typeStr; if (typeStr == "Shadow/SemiAligned") { anomalousReadType = AnomalousRead::ShadowOrSemiAligned; } else if (typeStr == "InsertionPair") { anomalousReadType = AnomalousRead::InsertionPair; } else if (typeStr == "DeletionPair") { anomalousReadType = AnomalousRead::DeletionPair; } else if (typeStr == "DuplicationPair") { anomalousReadType = AnomalousRead::DuplicationPair; } else if (typeStr == "InversionPair") { anomalousReadType = AnomalousRead::InversionPair; } else if (typeStr == "ChimericPair") { anomalousReadType = AnomalousRead::ChimericPair; } else { BOOST_THROW_EXCEPTION(CasavaException(EINVAL, "Unrecognised anomalous " "read type " + typeStr)); } return istrm; } /*****************************************************************************/ // SingleChromPairAnomRead /*****************************************************************************/ SingleChromPairAnomRead::SingleChromPairAnomRead(const Alignment& alignment, long leftPos, unsigned int insertSize) : AnomalousRead(alignment) { minPos_ = boost::lexical_cast(leftPos); maxPos_ = minPos_ + boost::lexical_cast(insertSize); // Not leftPos - 1 because integer division rounds down. myMostLikelyBreakPt = leftPos + (insertSize / 2); } /*****************************************************************************/ void SingleChromPairAnomRead::populateBounds(vector& eventStarts, vector& eventEnds) { eventStarts.push_back(SingletonEvent(minPos_, this)); eventEnds.push_back(SingletonEvent(maxPos_, this)); } /*****************************************************************************/ void SingleChromPairAnomRead::populateMetric(IndelMetricStore& indelMetric) { // Uninformative probability density function - flat across insert size. // FIXME : Breakpoint more likely to be towards midpoint? float increment((minPos_ == maxPos_) ? 0.0 : 1.0 / (maxPos_ - minPos_ + 1)); for (int pos(minPos_); pos <= maxPos_; ++pos) indelMetric[pos] += increment; } /*****************************************************************************/ // ChimericPairAnomRead /*****************************************************************************/ ChimericPairAnomRead::ChimericPairAnomRead(const Alignment& alignment, const PairStats& pairStats, unsigned int readLen) : AnomalousRead(alignment) { const long pos(alignment.getPosition()); // double lowSd(0.0); // double highSd(0.0); // pairStats.getSds(laneKey, lowSd, highSd); // Assuming the low and high SD values are similar. // myStandardDeviation = (lowSd + highSd) / 2.0; RelOrient expectedRelOrient; double minInsertSize(0.0); double maxInsertSize(0.0); pairStats.getBounds(alignment.getMachineName(), alignment.getRunNumber(), alignment.getLaneNumber(), alignment.getIndex(), expectedRelOrient, minInsertSize, maxInsertSize); const double mostLikelyInsertSize(pairStats.mostLikelyInsertSize(alignment.getMachineName(), alignment.getRunNumber(), alignment.getLaneNumber(), alignment.getIndex())); const bool isForwardAlign(alignment.getMatch().getStrand() == Match::Forward); bool partnerShouldHaveBeenToRight(false); if (expectedRelOrient == RelOrient("Rp")) { // Innie partnerShouldHaveBeenToRight = isForwardAlign; } else if (expectedRelOrient == RelOrient("Rm")) { // Outie partnerShouldHaveBeenToRight = !isForwardAlign; } else { std::ostringstream orientStrm; orientStrm << expectedRelOrient; BOOST_THROW_EXCEPTION(CasavaException(EINVAL, "Unhandled sample prep : " + orientStrm.str())); } // FIXME : using flat distribution across max expected insert size. // : Should take SDs into account. if (partnerShouldHaveBeenToRight) { minPos_ = boost::lexical_cast(pos); maxPos_ = boost::lexical_cast(pos) + boost::lexical_cast(maxInsertSize) - 1; myMostLikelyBreakPt = pos + (boost::lexical_cast(mostLikelyInsertSize) / 2); } else { // Partner should have been to left. maxPos_ = boost::lexical_cast(pos) + readLen - 1; minPos_ = maxPos_ - boost::lexical_cast(maxInsertSize) + 1; myMostLikelyBreakPt = pos + boost::lexical_cast(readLen) - 1 - (boost::lexical_cast(mostLikelyInsertSize) / 2); } } /*****************************************************************************/ void ChimericPairAnomRead::populateBounds(vector& eventStarts, vector& eventEnds) { eventStarts.push_back(SingletonEvent(minPos_, this)); eventEnds.push_back(SingletonEvent(maxPos_, this)); } /*****************************************************************************/ void ChimericPairAnomRead::populateMetric(IndelMetricStore& indelMetric) { // Uninformative probability density function - flat across insert size. // FIXME : Breakpoint more likely to be towards midpoint? float increment((minPos_ == maxPos_) ? 0.0 : 1.0 / (maxPos_ - minPos_ + 1)); for (int pos(minPos_); pos <= maxPos_; ++pos) { indelMetric[pos] += increment; } } /*****************************************************************************/ } // namespace variance_detection } // namespace ca /*****************************************************************************/