/** * Project : CASAVA * Module : $RCSfile: ClusterMergerImpl.hh,v $ * @author : Tony Cox * Copyright : Copyright (c) Illumina 2010. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ /*****************************************************************************/ #include #include #include #include "variance/AnomReadVec.hh" #include "common/PairStats.hh" /*****************************************************************************/ namespace ca { namespace variance_detection { /*****************************************************************************/ /** * @class ClusterMergerImpl * * @brief This implements stage 2b of the indel finder * * Takes the set of clusters output by the ClusterFinder stage and merges * anomalous pair read clusters with compatible shadow/semi-aligned read * clusters - removing any read duplicates. Any anomalous pair read clusters * that cannot be so merged are not output; they are still available in the * output of the previous stage but it is pointless to put them through * assembly. */ class ClusterMergerImpl { public: ClusterMergerImpl(const std::string& inputFileName, const std::string& outputFileName, bool doAdjMerge, int maxAdjMergeInterBreakPtDist, unsigned int maxLinksPerCluster); void importClusters(PairStats& pairStats); void mergeClusters(); void generateOutputFile(); private: typedef unsigned int ClusterId; typedef std::vector ClusterIdVec; typedef ClusterIdVec::iterator ClusterIdVecIter; typedef ClusterIdVec::const_iterator ClusterIdVecCIter; typedef std::set ClusterIdSet; typedef ClusterIdSet::const_iterator ClusterIdSetCIter; typedef std::map ReadClusterIdsMap; typedef ReadClusterIdsMap::const_iterator ReadClusterIdsMapCIter; typedef std::map MergeCandMap; typedef MergeCandMap::iterator MergeCandMapIter; typedef MergeCandMap::const_iterator MergeCandMapCIter; typedef std::map AbsorptionMap; typedef AbsorptionMap::const_iterator AbsorptionMapCIter; void buildReadClusterIdsMap(ReadClusterIdsMap& readClusterIdsMap); void dumpReadClusterIdsMapStats(ReadClusterIdsMap& readClusterIdsMap); void findSharedReadMergeCandidates(ReadClusterIdsMap& readClusterIdsMap, MergeCandMap& mergeCandMap); void killSuperHubClusters(MergeCandMap& mergeCandMap); void findAdjacencyMergeCandidates(bool leftNotRight, MergeCandMap& mergeCandMap); void makeReverseMergeMap(MergeCandMap& mergeCandMap, MergeCandMap& reverseMergeMap); void absorbOneMerge(MergeCandMap& mergeCandMap, MergeCandMap& reverseMergeMap, AbsorptionMap& absorbedByMap, ClusterId absorberClusterId, ClusterId absorbeeClusterId); void consolidateMerges(MergeCandMap& mergeCandMap); void dumpMergeCandidates(const std::string& label, const MergeCandMap& mergeCandMap); void dumpMergeCandidateStats(const std::string& label, const MergeCandMap& mergeCandMap); void doMerges(MergeCandMap& mergeCandMap); const std::string myInputFileName; const std::string myOutputFileName; bool myDoAdjMerge; int myMaxAdjMergeInterBreakPtDist; unsigned int myMaxLinksPerCluster; typedef std::vector AnomReadVecVec; typedef AnomReadVecVec::iterator AnomReadVecVecIter; typedef AnomReadVecVec::const_iterator AnomReadVecVecCIter; AnomReadVecVec myAnomReadVecVec; ReadClusterIdsMap myReadClusterIdsMap; MergeCandMap mySharedReadMergeCandMap; MergeCandMap myAdjacencyMergeCandMap; ClusterIdSet myKilledClusterIdSet; }; /*****************************************************************************/ } // end namespace variance_detection } // end namespace ca /*****************************************************************************/