#ifndef INDELFINDERIMPL_HH_ #define INDELFINDERIMPL_HH_ /** * Project : CASAVA * Module : $RCSfile: IndelFinderImpl.hh,v $ * @author : Tony Cox * Copyright : Copyright (c) Illumina 2008, 2009. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ #include #include #include #include #include #include #include #include "variance/StatWidget.hh" #include "variance/SingletonEvent.hh" #include "variance/AnomalousRead.hh" #include "variance/SampleStats.hh" #include "common/Alignment.hh" #include "common/SequenceUtils.hh" #include "common/PairStats.hh" #include "variance/BamAlignmentReader.hh" using namespace casava::common; namespace ca { namespace variance_detection { /** * @class IndelFinderImpl * * @brief This is primary implementation of the IndelFinder (shadow/singleton read clustering). * * Collects summary statistics on the base quality, alignment score and insert size distribution of the reads and finds * singleton/shadow read pairs with scores above threshold determined by these summary statistics. * * The algorithm then searches for clusters of shadow reads on the reference genome and passes these * reads on to the SmallAssembler to perform a localized de-novo assembly. * * */ // un-comment this to get verbose output //#define DEBUG_IDF_IMPL class IndelFinderImpl { public: // Constructor takes thresholds for alignment scores and spanning read thresholds as arguments IndelFinderImpl(int minPairAlScore, int minSingleAlScore, double spanReadThreshold, const std::string& outputFileName, const std::string& statsFileName, const bool isQphred); virtual ~IndelFinderImpl(); /** * numSpanReads getter * @return numSpanReads */ int numSpanReads() { return _numSpanReads; } /** * numStandardDevs getter * @return numStandardDevs */ int numStandardDevs() { return _numStandardDevs; } // find singleton reads void findGoodSingletons ( BamAlignmentReader& alignmentReader, const PairStats& pairStats, std::map& sampleStats, std::vector& singletons ); // std::vector& eventStarts, // std::vector& eventEnds); // collects summary statistics on base qualities and alignment scores void meanBaseQualityGoodPairs ( BamAlignmentReader& alignmentReader, std::map& sampleStats); void generateOutputFile ( vector& singletons ); void generateStatsFile ( const std::map& sampleStats ); private: void addIfAnomalousPairRead(std::vector& singletons, const PairStats& pairStats, const Alignment& alignment, bool alreadyAdded); // minimum paired alignment score for reads considered by meanBaseQualityGoodPairs() int _minPairedReadAlignmentScore; // minimum single read alignment score for reads considered by meanBaseQualityGoodPairs() int _minSingleReadAlignmentScore; // threshold for spanning reads, // log10(best possible align. score / actual single read align. score) must be larger than this double _spanningReadThreshold; // used to compute alignment score thresholds for singleton/shadow pairs int _numStandardDevs; // FIXME separate object to store counts // number of semi-aligning reads collected int _numSpanReads; // number of singleton reads found (!) int _numSingletons; // number of singleton reads actually kept. int _numGoodSingletons; // number of shadow reads kept int _numGoodShadows; // number of insertion candidate pair reads int _numInsCandPairReads; // number of insertion candidate pair reads that are not also semi-aligned int _numInsCandPairReadsNotSemiAligned; // number of deletion candidate pair reads int _numDelCandPairReads; // number of deletion candidate pair reads that are not also semi-aligned int _numDelCandPairReadsNotSemiAligned; // number of duplication candidate pair reads int _numDuplCandPairReads; // number of duplication candidate pair reads not also semi-aligned int _numDuplCandPairReadsNotSemiAligned; // number of inversion candidate pair reads int _numInvCandPairReads; // number of inversion candidate pair reads that are not also semi-aligned int _numInvCandPairReadsNotSemiAligned; // number of chimeric pair reads int _numChimericPairReads; // number of chimeric pair reads that are not also semi-aligned int _numChimericPairReadsNotSemiAligned; // end FIXME // name of file to send reads to std::string _outputFileName; // name of file to send paired end stats to std::string _statsFileName; bool _isQphred; }; } } // end namespace casava{ namespace { applications #endif /*INDELFINDERIMPL_HH_*/