/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file DiversityCalculator.hh ** ** \brief Calculates the diversity in a population of matched fragments. ** ** DiversityCalculator is the top-level component used to estimate ** the diversity in the population of fragment sizes obtained during ** alignment analysis. ** ** \author Come Raczy **/ #ifndef CASAVA_ALIGNMENT_DIVERSITY_CALCULATOR_HH #define CASAVA_ALIGNMENT_DIVERSITY_CALCULATOR_HH #include #include #include #include #include #include "common/Alignment.hh" namespace casava { namespace alignment { namespace fs = boost::filesystem; namespace cc = casava::common; class DiversityCalculator { public: DiversityCalculator(unsigned int maximumFragmentSize, const fs::path &inputDirectory, const fs::path &outputFile, unsigned int processLane); void run(); private: DiversityCalculator(); DiversityCalculator(const DiversityCalculator &); DiversityCalculator &operator=(const DiversityCalculator &); const unsigned int maximumFragmentSize_; const fs::path inputDirectory_; const fs::path outputFile_; const unsigned int processLane_; }; class Fragment { public: enum FragmentType {IN, OUT, FFRR, EQUAL, UNKNOWN /*, ANOMALOUS */}; Fragment() : position_(0), length_(0), type_(IN) {} Fragment(const cc::Alignment &read1, const cc::Alignment &read2); Fragment(const Fragment &fragment); Fragment &operator=(const Fragment &rhs); long getPosition() const {return position_;} unsigned int getLength() const {return length_;} FragmentType getType() const {return type_;} unsigned short getTile() const {return tile_;} short getX() const {return x_;} short getY() const {return y_;} private: long position_; unsigned int length_; FragmentType type_; unsigned short tile_; short x_; short y_; long computePosition(const cc::Alignment &read1, const cc::Alignment &read2) const; unsigned int computeLength(const cc::Alignment &read1, const cc::Alignment &read2, long position) const; FragmentType computeType(const cc::Alignment &read1, const cc::Alignment &read2, long position, unsigned int length) const; }; class LaneDiversityCalculator { public: LaneDiversityCalculator() {} void run(unsigned int maximumFragmentSize, const fs::path &inputDirectory, unsigned int laneId); unsigned int getLaneId() const {return laneId_;} /** ** \brief Number of pairs that were succesfully aligned. ** ** Discards all the pairs that don't pass filter, and all the ** pairs where at least one of the strands is neither 'F' nor ** 'R'. **/ unsigned int getAlignedPairCount() const {return alignedPairCount_;} unsigned int getConventionalPairCount() const {return conventionalPairCount_;} unsigned int getOutwardPairCount() const {return outwardPairCount_;} unsigned int getAnomalousPairCount() const {return anomalousPairCount_;} /** ** \brief Number of pairs that are duplicates of any of the good pairs. ** ** Good pairs, and their unicity, are as defined in getUniqueCount(). **/ unsigned int getDuplicateCount() const {return duplicateCount_;} /** ** \brief Number of unique good pairs. ** ** Good pairs are those that were succesfully aligned (as ** specified in getAlignedPairCount) where both reads align to ** the same chromosome and to the same contig. In addition ** aligned pairs are discarder if: ** ** - they are anomalous with a minimum "single read score" lower ** or equal to 10; or ** - they have a strictly positive minimum "paired read score" ** lower or equal to 5. **/ unsigned int getUniqueCount() const {return uniqueCount_;} size_t getMaximumSize() const {return sizeCount_.size();} /** ** \brief Number of fragments with a size in the decade of the indicated size. **/ unsigned int getSizeCount(const unsigned int size) const {const size_t i = size/10; return i < sizeCount_.size() ? sizeCount_[i] : 0;} private: typedef std::map > FragmentMap; FragmentMap getFragmentMap(boost::iostreams::filtering_istream &exportStream1, boost::iostreams::filtering_istream &exportStream2); void openExportFile(const fs::path& inputDirectory, const unsigned int laneId, const unsigned int readNum, boost::iostreams::filtering_istream &exportFilterStream, std::ifstream &exportStream); void countFragments(const std::vector &fragmentList); void addFragment(const Fragment &fragment); unsigned int alignedPairCount_; unsigned int conventionalPairCount_; unsigned int outwardPairCount_; unsigned int anomalousPairCount_; unsigned int equalCount_; unsigned int duplicateCount_; unsigned int doubleDetection_; unsigned int uniqueCount_; unsigned int unknownCount_; std::vector sizeCount_; unsigned int laneId_; }; } // namespace alignment } // namespace casava #endif // #ifndef CASAVA_ALIGNMENT_DIVERSITY_CALCULATOR_HH