#ifndef CLUSTERFINDERIMPL_HH_ #define CLUSTERFINDERIMPL_HH_ /** * Project : CASAVA * Module : $RCSfile: ClusterFinderImpl.hh,v $ * @author : Tony Cox * Copyright : Copyright (c) Illumina 2008, 2009. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ #include #include #include #include #include #include #include "variance/StatWidget.hh" #include "variance/SingletonEvent.hh" #include "variance/SampleStats.hh" #include "variance/AnomReadGroup.hh" #include "variance/AnomalousReadMgr.hh" #include "common/Alignment.hh" #include "common/PairStats.hh" #include "common/SequenceUtils.hh" using namespace casava::common; namespace ca { namespace variance_detection { /** * @class ClusterFinderImpl * * @brief This implements stage 2 of the indel finder * * Takes the set of shadow (non-aligning) and badly-aligning reads * output by stage 1 and groups them into clusters, ready for each * cluster to be de novo assembled by stage 3 * * */ // un-comment this to get verbose output //#define DEBUG_IDF_IMPL struct OffsetInfo { OffsetInfo() {} int min_; int max_; double mean_; double sd_; }; // ~OffsetInfo class ClusterFinderImpl { public: ClusterFinderImpl ( double maxDist, unsigned int minGroupSize, double spanningReadThreshold, const std::string& inputFileName, const std::string& statsFileName, const std::string& outputFileName, const bool isQphred); void printGroup( vector& group, int& numGroups, ostream& outputFile ); void importSampleStats( map& sampleInfo ); void importReads ( AnomalousReadMgr& anomalousReadMgr, map& sampleInfo, PairStats& pairStats ); bool wantAlignment( const Alignment& alignment ); // TBD should be const void operator()( AnomalousReadMgr& anomalousReadMgr ); void clusterAlignedReads ( vector& matched, vector& matchedClusters ); void clusterShadowsForward ( vector& matchedForward, vector& matchedClustersForward ); void clusterShadowsReverse ( vector& matchedReverse, vector& matchedClustersReverse ); void clusterStrands ( vector& matchedClustersForward, vector& matchedClustersReverse ); void addAnomPairTypeClusters ( AnomalousReadMgr& anomalousReadMgr, AnomalousRead::Type anomReadType, vector& clustersSoFar); void addAnomPairClusters ( AnomalousReadMgr& anomalousReadMgr, vector& clustersSoFar); int getMostProbableBreakPoint(Alignment& alignment); private: unsigned int addIfAnomalousPairRead(AnomalousReadMgr& anomalousReadMgr, const PairStats& pairStats, const Alignment& alignment); const double _maxDist; const unsigned int _minGroupSize; const double _spanningReadThreshold; // used to compute alignment score thresholds for singleton/shadow pairs const std::string _inputFileName; const std::string _statsFileName; const std::string _outputFileName; const bool _isQphred; const double _maxDistAligned; const double _maxDistShadow; const double _maxDistInterStrand; }; // ~class ClusterFinderImpl } } // end namespace casava{ namespace { applications #endif /*INDELFINDERIMPL_HH_*/