/**
 * Project     : CASAVA
 * Module      : $RCSfile: ClusterMergerImpl.hh,v $
 * @author     : Tony Cox
 * Copyright   : Copyright (c) Illumina 2010. All rights reserved.
 *
 ** This software is covered by the "Illumina Genome Analyzer Software
 ** License Agreement" and the "Illumina Source Code License Agreement",
 ** and certain third party copyright/licenses, and any user of this
 ** source file is bound by the terms therein (see accompanying files
 ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and
 ** Illumina_Source_Code_License_Agreement.pdf and third party
 ** copyright/license notices).
 *
 */

/*****************************************************************************/

#include <vector>
#include <map>
#include <set>

#include "variance/AnomReadVec.hh"
#include "common/PairStats.hh"

/*****************************************************************************/

namespace ca {
namespace variance_detection {

/*****************************************************************************/

/**
 * @class ClusterMergerImpl
 *
 * @brief This implements stage 2b of the indel finder
 *
 * Takes the set of clusters output by the ClusterFinder stage and merges
 * anomalous pair read clusters with compatible shadow/semi-aligned read
 * clusters - removing any read duplicates. Any anomalous pair read clusters
 * that cannot be so merged are not output; they are still available in the
 * output of the previous stage but it is pointless to put them through
 * assembly.
 */

class ClusterMergerImpl
{
public:
    ClusterMergerImpl(const std::string& inputFileName,
                      const std::string& outputFileName,
                      bool doAdjMerge,
                      int maxAdjMergeInterBreakPtDist,
                      unsigned int maxLinksPerCluster);

    void importClusters(PairStats& pairStats);
    void mergeClusters();
    void generateOutputFile();

private:
    typedef unsigned int ClusterId;

    typedef std::vector<ClusterId> ClusterIdVec;
    typedef ClusterIdVec::iterator ClusterIdVecIter;
    typedef ClusterIdVec::const_iterator ClusterIdVecCIter;

    typedef std::set<ClusterId> ClusterIdSet;
    typedef ClusterIdSet::const_iterator ClusterIdSetCIter;

    typedef std::map<std::string, ClusterIdVec> ReadClusterIdsMap;
    typedef ReadClusterIdsMap::const_iterator ReadClusterIdsMapCIter;

    typedef std::map<ClusterId, ClusterIdSet> MergeCandMap;
    typedef MergeCandMap::iterator MergeCandMapIter;
    typedef MergeCandMap::const_iterator MergeCandMapCIter;

    typedef std::map<ClusterId, ClusterId> AbsorptionMap;
    typedef AbsorptionMap::const_iterator AbsorptionMapCIter;

    void buildReadClusterIdsMap(ReadClusterIdsMap& readClusterIdsMap);
    void dumpReadClusterIdsMapStats(ReadClusterIdsMap& readClusterIdsMap);
    void findSharedReadMergeCandidates(ReadClusterIdsMap& readClusterIdsMap,
                                       MergeCandMap& mergeCandMap);
    void killSuperHubClusters(MergeCandMap& mergeCandMap);
    void findAdjacencyMergeCandidates(bool leftNotRight,
                                      MergeCandMap& mergeCandMap);
    void makeReverseMergeMap(MergeCandMap& mergeCandMap,
                             MergeCandMap& reverseMergeMap);
    void absorbOneMerge(MergeCandMap& mergeCandMap,
                        MergeCandMap& reverseMergeMap,
                        AbsorptionMap& absorbedByMap,
                        ClusterId absorberClusterId,
                        ClusterId absorbeeClusterId);
    void consolidateMerges(MergeCandMap& mergeCandMap);
    void dumpMergeCandidates(const std::string& label,
                             const MergeCandMap& mergeCandMap);
    void dumpMergeCandidateStats(const std::string& label,
                                 const MergeCandMap& mergeCandMap);
    void doMerges(MergeCandMap& mergeCandMap);

    const std::string myInputFileName;
    const std::string myOutputFileName;
    bool myDoAdjMerge;
    int myMaxAdjMergeInterBreakPtDist;
    unsigned int myMaxLinksPerCluster;

    typedef std::vector<AnomReadVec> AnomReadVecVec;
    typedef AnomReadVecVec::iterator AnomReadVecVecIter;
    typedef AnomReadVecVec::const_iterator AnomReadVecVecCIter;

    AnomReadVecVec myAnomReadVecVec;

    ReadClusterIdsMap myReadClusterIdsMap;

    MergeCandMap mySharedReadMergeCandMap;
    MergeCandMap myAdjacencyMergeCandMap;
    ClusterIdSet myKilledClusterIdSet;
};

/*****************************************************************************/

} // end namespace variance_detection
} // end namespace ca

/*****************************************************************************/