#include "variance/ClusterFinderImpl.hh"

/**
 * Project     : CASAVA
 * Module      : $RCSfile: ClusterFinderImpl.cpp,v $
 * @author     : Tony Cox
 * Copyright   : Copyright (c) Illumina 2008, 2009. All rights reserved.
 *
 ** This software is covered by the "Illumina Genome Analyzer Software
 ** License Agreement" and the "Illumina Source Code License Agreement",
 ** and certain third party copyright/licenses, and any user of this
 ** source file is bound by the terms therein (see accompanying files
 ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and
 ** Illumina_Source_Code_License_Agreement.pdf and third party
 ** copyright/license notices).
 *
 */


#define NEAREST_NEIGHBOUR_PEAK_FINDER 1
//#define DEBUG_CLF 1

#include "variance/AnomalousRead.hh"

using namespace casava::common;

namespace ca {
namespace variance_detection {


ClusterFinderImpl::ClusterFinderImpl
  ( double maxDist,
    unsigned int minGroupSize,
    double spanningReadThreshold,
    const std::string& inputFileName,
    const std::string& statsFileName,
    const std::string& outputFileName,
    const bool isQphred) :
    _maxDist(maxDist),
    _minGroupSize(minGroupSize),
    _spanningReadThreshold(spanningReadThreshold),
    _inputFileName(inputFileName),
    _statsFileName(statsFileName),
    _outputFileName(outputFileName),
    _isQphred(isQphred),
    _maxDistAligned(0.0),
    _maxDistShadow(20.0),
    _maxDistInterStrand(12.0)
{

  if (_inputFileName.empty())
  {
    cerr << "No value given for input file" << endl; // TBD exception?
  }
  if (_statsFileName.empty())
  {
    cerr << "No value given for stats file" << endl; // TBD exception?
  }
  if (_outputFileName.empty())
  {
    cerr << "No value given for output file" << endl; // TBD exception?
  }
        // Tony's default values:
        //_minPairedReadAlignmentScore=6;
        //_minSingleReadAlignmentScore=10;
        //_numStandardDevs=3;

        // need to verify if this threshold is applicable to different data sets
        //_spanningReadThreshold=25;
        //_numGroups=0;
        //_numSpanReads=0;
        //_numSingletons=0;
        //_numGoodSingletons=0;
} // ~c'tor

void ClusterFinderImpl::importSampleStats
( map<string, OffsetInfo>& sampleInfo )
{
#ifdef DEBUG_CLF
  cerr << "importSampleStats" << endl;
#endif

  std::string line, sampleName, field;
  double mean, sd, se;
  ifstream statsFile( _statsFileName.c_str() );
  if (!statsFile)
  {
    cerr << "Problem opening sample file " << _statsFileName << endl;
  } // ~if

  while (1)
  {
    getline( statsFile, line );
    if (statsFile.eof()) break;
    //    cerr << "Line : " << line << endl;
    stringstream ss(line);
    ss >>sampleName;
    sampleName+=" ";
    ss >>field;
    sampleName+=field;
    sampleName+=" ";
    ss >>field;
    sampleName+=field;
    ss >> mean;
    ss >> sd;
    ss >> se;
    cerr << sampleName << " = " << mean << " " << sd << " " << se << endl;
    sampleInfo[sampleName]=OffsetInfo();
    sampleInfo[sampleName].mean_=mean;
    sampleInfo[sampleName].sd_=sd;
  } // ~while

  statsFile.close();

#ifdef DEBUG_CLF
  cerr << "importSampleStats done" << endl;
#endif

} // ~ClusterFinderImpl::importSampleStats

/*****************************************************************************/

unsigned int
ClusterFinderImpl::addIfAnomalousPairRead(AnomalousReadMgr& anomalousReadMgr,
                                          const PairStats& pairStats,
                                          const Alignment& alignment)
{
    // FIXME : some duplication of IndelFinderImpl::addIfAnomalousPairRead.
    // Get the expected pair values for reads in the same lane as this read.
    RelOrient expectedRelOrient;
    double minSize(0.0);
    double maxSize(0.0);
    pairStats.getBounds(alignment.getMachineName(),
                        alignment.getRunNumber(),
                        alignment.getLaneNumber(),
                        alignment.getIndex(), expectedRelOrient, minSize, maxSize);

    // Get the actual pair values for this read.
    const unsigned int readNum(alignment.getReadNumber());
    unsigned int readLen(pairStats.getReadLen(alignment.getMachineName(), alignment.getRunNumber(),
                                              alignment.getLaneNumber(),
                                              alignment.getIndex(), readNum));
    // FIXME : Assumes only possible read numbers are 1 and 2.
    const unsigned int partnerReadNum(3 - readNum);
    unsigned int partnerReadLen(pairStats.getReadLen(alignment.getMachineName(), alignment.getRunNumber(),
            alignment.getLaneNumber(),
            alignment.getIndex(), partnerReadNum));
    bool isChimeric(false);
    long leftPos(-1);
    unsigned int insertSize(0);
    RelOrient relOrient;

    if (!SequenceUtils::getReadPairInfo(alignment, readLen, partnerReadLen,
                                        isChimeric, leftPos, insertSize,
                                        relOrient)) {
        // Skip unless both reads are mapped (and not to the same position).
        return false;
    }

    // Allow reads to be added to multiple anomalous pair types.
    typedef std::vector<AnomalousRead::Type> AnomReadTypeVec;
    typedef AnomReadTypeVec::const_iterator AnomReadTypeVecCIter;
    AnomReadTypeVec anomReadTypeVec;

    if (isChimeric) {
        anomReadTypeVec.push_back(AnomalousRead::ChimericPair);
    } else {
        if (!(relOrient == expectedRelOrient)) {
            if ((relOrient == RelOrient("Fm"))
                || (relOrient == RelOrient("Fp"))) {
                anomReadTypeVec.push_back(AnomalousRead::InversionPair);
            } else {
                anomReadTypeVec.push_back(AnomalousRead::DuplicationPair);
            }
        } else { // expected orientation
            if (insertSize < minSize) {
                anomReadTypeVec.push_back(AnomalousRead::InsertionPair);
            } else if (insertSize > maxSize) {
                anomReadTypeVec.push_back(AnomalousRead::DeletionPair);
            }
        }
    }

    for (AnomReadTypeVecCIter anomReadTypeVecCIter(anomReadTypeVec.begin());
         anomReadTypeVecCIter != anomReadTypeVec.end();
         ++anomReadTypeVecCIter) {
        AnomalousRead::Type anomReadType(*anomReadTypeVecCIter);

        anomalousReadMgr.addRead(anomReadType,
                                 ((anomReadType == AnomalousRead::ChimericPair)
                                  ? dynamic_cast<Singleton*>
                                  (new ChimericPairAnomRead(alignment,
                                                             pairStats,
                                                            readLen))
                                  : dynamic_cast<Singleton*>
                                  (new SingleChromPairAnomRead(alignment,
                                                               leftPos,
                                                               insertSize))));
    }

    return static_cast<unsigned int>(anomReadTypeVec.size());
}

/*****************************************************************************/

// Return true if an aligned read should be passed to the clustering process
// It's AlignCandReads responsibility to ensure shadow reads don't get
// erroneously assigned a low-confidence alignment but instead stay as
// shadows. ClusterFinderImpl's task is to
// - always include gapped alignments
// - never include reads containing Ns (we don't likes them)
// - don't include reads with high-confidence alignments (which are
// likely from shadows caused by repeats and not indels)
bool ClusterFinderImpl::wantAlignment( const Alignment& alignment )
{
  if (strchr(alignment.getData().c_str(),'N')!=NULL)
  { // if it contains "N"s we don't want it
    return false;
  } // ~if
  else if (alignment.getMatchDescriptor()[0] == '-')
  { // if it's a shadow we want it
    return true;
  } // ~else iif
  else
  {
    // if it's a gapped alignment we want it
    if (strpbrk(alignment.getMatchDescriptor().c_str(),"^$")!=NULL)
    {
      return true; // ~ include
    } //~if
    else
    {
      double adsc
          (SequenceUtils::getSemiAlignedReadMetric(alignment,_isQphred));
      if (adsc<_spanningReadThreshold)
        return false; // if it's a *good* alignment we *don't* want it
      else
        return true;
    } // ~else
  } // ~else

} // ~wantAlignment


void ClusterFinderImpl::importReads
( AnomalousReadMgr& anomalousReadMgr,
  map<string, OffsetInfo>& sampleInfo,
  PairStats& pairStats)
{
#ifdef DEBUG_CLF
    cerr << "importReads" << endl;
#endif

    Alignment alignment;
    int minOff,maxOff;
    //  const char* pAlign;
    std::string s, sampleName;

    // Now extract the singletons variable from anomalousReadMgr.
    std::vector<Singleton*>&
        singletons(anomalousReadMgr.
                   getVec(AnomalousRead::ShadowOrSemiAligned));

    int numAligned(0);
    int numAnomPairReads(0), numSemiAligned(0), numAlignedRejected(0);
    int numShadows(0),numShadowsRejected(0);
    int numRead(0);

    ifstream inputFile( _inputFileName.c_str() );

    if (!inputFile)
    {
        cerr << "Problem opening input file " << _inputFileName << endl;
    } // ~if
    std::string line;
    while (1)
    {

        //    getline( inputFile, line );

        inputFile >> minOff;
        if (inputFile.eof()) break;
        inputFile >> maxOff;
        inputFile.ignore();
        inputFile >> alignment;
        numRead++;
#ifdef DEBUG_CLF
        // if ((numRead%5000)==0) cerr << numRead << " " << sampleInfo.size() << endl;
        if (numRead>=217085) cerr << numRead << " " << alignment << endl;
#endif


    //      cout << "minOff: " << minOff << " maxOff: " << maxOff << " " << alignment;
        if (alignment.getMatchDescriptor().c_str()[0] != '-')
        {
            numAligned++;

            if (pairStats.isValid(alignment.getMachineName(), alignment.getRunNumber(),
                                  alignment.getLaneNumber(),
                                  alignment.getIndex())) {
                if (addIfAnomalousPairRead(anomalousReadMgr, pairStats,
                                           alignment) > 0) {
                    ++numAnomPairReads;
                }
            }

            singletons.push_back(new SingletonSemiAligned(alignment));
            singletons.back()->minPos_=minOff;
            singletons.back()->maxPos_=maxOff;
            // All alignments here have passed purity filtering hence
            // always have setPassed==true. We used setPassed as an
            // internal flag to denote whether or not we want to cluster
            // the alignment. We set it back to true before we output the
            // alignment again.
            if (!wantAlignment(alignment))
            {
                singletons.back()->alignment_.setPassed(false);
                numAlignedRejected++;
            } else {
                ++numSemiAligned;
            }
        } // ~if
        else
        {
            numShadows++;
            sampleName=SequenceUtils::getSampleName(alignment);

#ifdef DEBUG_CLF
            if (sampleInfo.find(sampleName)==sampleInfo.end())
            {
                cerr << "could not find sample info for sample name "
                     << sampleName << " found at line " << numRead
                     << " in file " << _inputFileName
                     << ", entry =  " << alignment << endl;
            } // ~if
#endif

            singletons.push_back( new SingletonNotMatched
                                  (alignment, minOff, maxOff,
                                   sampleInfo[sampleName].sd_ ));

            if (!wantAlignment(alignment))
            {
                singletons.back()->alignment_.setPassed(false);
                numShadowsRejected++;
            }
        } // ~else

    } // ~while

    cerr << "Read " << numAligned << " aligned reads, of which " << endl
         << "  " << numAnomPairReads << " were anomalous pair reads" << endl
         << "  " << numSemiAligned << " were semi-aligned reads" << endl
         << "  " << numAlignedRejected << " were excluded from clustering"
         << endl;
    cerr << "Read " << numShadows << " shadow reads, of which "
         << numShadowsRejected << " were excluded from clustering" << endl;

    inputFile.close();

#ifdef DEBUG_CLF
    cerr << "importReads done" << endl;
#endif


} // ~ClusterFinderImpl::importReads


void ClusterFinderImpl::printGroup( vector<Singleton*>& group,
                                    int& numGroups,
                                    ostream& outputFile )
{

  double gStart(0.0), gEnd(0.0);
  if (group.size()>=_minGroupSize)
  {
    outputFile << "CLUSTER_START Found cluster of size " << group.size()
               << endl;
  } // ~if

  for (vector<Singleton*>::iterator i(group.begin());
       i!=group.end();++i)
  {

    outputFile << (*i)->minPos_ << "\t"
               << (*i)->maxPos_ << "\t"
               << (*i)->alignment_;
    gStart+=(*i)->minPos_;
    gEnd+=(*i)->maxPos_;
  } // ~for
  gStart/=group.size();
  gEnd/=group.size();

  if (group.size()>=_minGroupSize)
  {
    outputFile << "CLUSTER_END group=" << ++numGroups
             << "|reads=" << group.size()
             << "|start=" << (int)gStart
             << "|end=" << (int)gEnd << endl;

  } // ~if

} // ~void ClusterFinderImpl::printGroup( vector<Singleton*>& group )


int numLeadingMatches( Alignment& alignment )
{
  int numLeadingMatches(0);
  int exactRun(0);
  int firstMismatch(-1);
  int firstIndel(-1);
  int numIndels(0);

  for (unsigned int i(0);i<alignment.getMatchDescriptor().size();i++)
  {
    if (isdigit(alignment.getMatchDescriptor()[i]))
    {
      exactRun*=10;
      exactRun+=
        (static_cast<int>(alignment.getMatchDescriptor()[i])-48);
    } // if
    else
    { // found a nondigit character, return leading values
      numLeadingMatches+=exactRun;
      exactRun=0;
      if (isalpha(alignment.getMatchDescriptor()[i]))
      {
        if (firstMismatch==-1) firstMismatch=numLeadingMatches;
        numLeadingMatches++;
      }
      else if (alignment.getMatchDescriptor()[i]=='^')
      {
        if (firstIndel==-1) firstIndel=numLeadingMatches;
        numIndels++;
      }
    } // ~else
  } // ~for
  // if exactly one indel is found, return position of that
  // else return position of first mismatch
  if (numIndels==1) return firstIndel;
  else return firstMismatch;
} // ~numLeadingMatches

#ifdef UNDER_CONSTRUCTION
int numLeadingMatches( Alignment& alignment )
{
  int numLeadingMatches(0);

  for (unsigned int i(0);i<alignment.getMatchDescriptor().size();i++)
  {
    if (isdigit(alignment.getMatchDescriptor()[i]))
    {
      numLeadingMatches*=10;
      numLeadingMatches+=
        (static_cast<int>(alignment.getMatchDescriptor()[i])-48);
    } // if
    else
    { // found a nondigit character, return leading values
      return numLeadingMatches;
    } // ~else
  } // ~for
  // only get here if descriptor is all digits, i.e. an exact match
  return -1;
} // ~numLeadingMatches
#endif


  void ClusterFinderImpl::clusterAlignedReads
  (  vector<SingletonEvent>& matched,
     vector<AnomReadGroup>& matchedClusters )
  {
    int lastPos(-500000);
    for (vector<SingletonEvent>::iterator i(matched.begin());
         i!=matched.end();++i)
    {
      if (i->pos_-lastPos>_maxDistAligned)
      {
#ifdef DEBUG_CLF
        if (!matchedClusters.empty())
          cerr << "MEAN=" << std::fixed
               << matchedClusters.back().positionStats_.getMean() << endl;
        cerr << "CLUSTER " << std::fixed << i->pos_ << " " << lastPos << endl;
#endif
        matchedClusters.
            push_back( AnomReadGroup(AnomalousRead::ShadowOrSemiAligned) );
      } // ~if
#ifdef DEBUG_CLF
      cerr << i->pos_ << " " << i->pRead_->alignment_;
#endif

      if (!matchedClusters.empty())
      {
          matchedClusters.back().insert(i->pRead_);
          matchedClusters.back().positionStats_.update(i->pos_);
      }
      lastPos=i->pos_;
    } // ~for
  } // ~clusterAlignedReads


  void ClusterFinderImpl::clusterShadowsForward
  (  vector<SingletonEvent>& shadowForward,
     vector<AnomReadGroup>& matchedClustersForward )
  {
    //    vector<SingletonEvent>::iterator j(shadowForward.begin());
    //    double upperLimit;
    int numAdded(0);

    map< Singleton*, AnomReadGroup* > active;

    if (matchedClustersForward.empty()) {
        if (!shadowForward.empty()) {
            // There are shadow reads but no semi-aligned clusters.
            // Even if the shadows could be clustered & assembled,
            // the contig is unlikely to align reliably -> discard.
            shadowForward.clear();
            return;
        }
    }

    vector<AnomReadGroup>::iterator i(matchedClustersForward.begin());
    vector<SingletonEvent>::iterator j(shadowForward.begin());
    while (shadowForward.end() != j)
    {
//cerr << "j pos= " << j->pos_ << endl;
//cerr << "i position stats= " << i->positionStats_.getMean() << endl;

      if (j->pos_<=i->positionStats_.getMean())
      {
        if (active.find(j->pRead_)!=active.end())
        {
          if (active[j->pRead_]!=NULL)
          {
            active[j->pRead_]->insert(j->pRead_);
            numAdded++;
          } // ~if
          active.erase(j->pRead_);
        } // ~if
        else
        {
          active[j->pRead_]=NULL;
        } // ~else
        if (++j==shadowForward.end()) break;
      } // ~if
      else
      {
        for ( map< Singleton*, AnomReadGroup* >::iterator k(active.begin());
               k!=active.end();++k)
        {
          k->second=&*i;
        } // ~for
        if (++i==matchedClustersForward.end()) break;
      } // ~else
    } // ~while

#ifdef XXX
    vector<SingletonEvent>::iterator j(shadowForward.begin());
    double upperLimit;
    int numAdded(0);


    for (vector<AnomReadGroup>::iterator i(matchedClustersForward.begin());
       i!=matchedClustersForward.end();++i)
    {
      while ( (j!=shadowForward.end())
              &&(j->pos_<i->positionStats_.getMean()-_maxDistShadow)) ++j;
      //              &&(j->pos_<i->positionStats_.getMean())) ++j;
      if (j==shadowForward.end()) break;
      upperLimit=i->positionStats_.getMean()+_maxDistShadow;
      while (j->pos_<=upperLimit)
      {
        i->insert(j->pRead_);
        // i->positionStats_.update(j->pos_); // TBD
        upperLimit=j->pos_+_maxDistShadow;
        numAdded++;
        if (++j==shadowForward.end()) break;
      }
    }

#endif
    cerr << "Added " << numAdded
         << " shadow reads to forward strand clusters" << endl;
    return;

  } // ~addShadowsForward


  void ClusterFinderImpl::clusterShadowsReverse
  (  vector<SingletonEvent>& shadowReverse,
     vector<AnomReadGroup>& matchedClustersReverse )
  {

    int numAdded(0);

    map< Singleton*, AnomReadGroup* > active;

    if (matchedClustersReverse.empty()) {
        if (!shadowReverse.empty()) {
            // There are shadow reads but no semi-aligned clusters.
            // Even if the shadows could be clustered & assembled,
            // the contig is unlikely to align reliably -> discard.
            shadowReverse.clear();
            return;
        }
    }

    vector<AnomReadGroup>::reverse_iterator i(matchedClustersReverse.rbegin());
    vector<SingletonEvent>::reverse_iterator j(shadowReverse.rbegin());
    while (shadowReverse.rend() != j)
    {
      if (j->pos_>=i->positionStats_.getMean())
      {
        if (active.find(j->pRead_)!=active.end())
        {
          if (active[j->pRead_]!=NULL)
          {
            active[j->pRead_]->insert(j->pRead_);
            numAdded++;
          } // ~if
          active.erase(j->pRead_);
        } // ~if
        else
        {
          active[j->pRead_]=NULL;
        } // ~else
        if (++j==shadowReverse.rend()) break;
      } // ~if
      else
      {
        for ( map< Singleton*, AnomReadGroup* >::iterator k(active.begin());
               k!=active.end();++k)
        {
          k->second=&*i;
        } // ~for
        if (++i==matchedClustersReverse.rend()) break;
      } // ~else
    } // ~while


#ifdef XXX
    vector<SingletonEvent>::reverse_iterator j(shadowReverse.rbegin());
    double upperLimit;
    int numAdded(0);
    for (vector<AnomReadGroup>::reverse_iterator
           i(matchedClustersReverse.rbegin());
       i!=matchedClustersReverse.rend();++i)
    {
      while ( (j!=shadowReverse.rend())
              &&(j->pos_>i->positionStats_.getMean()+_maxDistShadow)) ++j;
      if (j==shadowReverse.rend()) break;
      upperLimit=i->positionStats_.getMean()-_maxDistShadow;
      while (j->pos_>=upperLimit)
      {
        i->insert(j->pRead_);
        //        i->positionStats_.update(j->pos_); // TBD
        upperLimit=j->pos_-_maxDistShadow;
        numAdded++;
        if (++j==shadowReverse.rend()) break;
      }
    }

#endif
    cerr << "Added " << numAdded
         << " shadow reads to reverse strand clusters" << endl;
    return;

  } // ~addShadowsReverse


  void ClusterFinderImpl::clusterStrands
  ( vector<AnomReadGroup>& matchedClustersForward,
    vector<AnomReadGroup>& matchedClustersReverse )
  {
    //    double lastPos(-500000);
   // TBD do this with insert
    for (vector<AnomReadGroup>::iterator i(matchedClustersReverse.begin());
       i!=matchedClustersReverse.end();++i)
    {
      matchedClustersForward.push_back(*i);
    } // ~for

    sort (matchedClustersForward.begin(),
          matchedClustersForward.end(),
          SortAnomReadGroup());

    if (matchedClustersForward.size()<2) return;

    vector<AnomReadGroup>::iterator i(matchedClustersForward.begin()),
      prev(matchedClustersForward.begin());
    i++;
    for (;i!=matchedClustersForward.end();++i)
    {
#ifdef DEBUG_CLF
      cerr << i->size() << " " << i->positionStats_.getMean() << endl;
      if (!i->empty())
        cerr << (*i->begin())->alignment_;
#endif

      if ((prev->size()!=0)
          &&(i->size()!=0)
          &&((*prev->begin())->alignment_.getMatch().getStrand()
              == Match::Forward)
          &&((*i->begin())->alignment_.getMatch().getStrand()
             == Match::Reverse)
          &&(i->positionStats_.getMean()-prev->positionStats_.getMean()
             <=_maxDistInterStrand))
      {
#ifdef DEBUG_CLF
      cerr << "DOING_MERGE" << endl;
#endif
        for (AnomReadGroup::iterator j(i->begin());j!=i->end();j++)
        {
          prev->insert(*j);
        }
        i->clear();
      } //~if
      prev=i;
    } // ~for
  } // ~clusterStrands

int ClusterFinderImpl::getMostProbableBreakPoint(Alignment& alignment) {
		int breakPointPos = 0;

		const char* qualityString = alignment.getQuality().c_str();
		int lengthOfAlignment = strlen (qualityString);

		const char* type = alignment.getMatchDescriptor().c_str();
		bool noGaps = true;

		for (unsigned j=0;j<alignment.getMatchDescriptor().size();++j) {
//			cerr << j <<": type[j]= " << type[j] << endl;
			if (type[j] == '^' ) {noGaps = false; break; }
		}
//cerr <<"in 	getMostProbableBreakPoint, noGaps = " <<noGaps << endl;
		if (!noGaps) {
			int indelPos = -1;
			int exactRun=0;
			int firstMismatch=-1;
			int numLeadingMatches(0);
			  for (unsigned int i(0);i<alignment.getMatchDescriptor().size();i++)
  {
    if (isdigit(alignment.getMatchDescriptor()[i]))
    {
      exactRun*=10;
      exactRun+=
        (static_cast<int>(alignment.getMatchDescriptor()[i])-48);
    } // if
    else
    { // found a nondigit character, return leading values
      numLeadingMatches+=exactRun;
      exactRun=0;
      if (isalpha(alignment.getMatchDescriptor()[i]))
      {
        if (firstMismatch==-1) firstMismatch=numLeadingMatches;
        numLeadingMatches++;
      }
      else if (alignment.getMatchDescriptor()[i]=='^')
      {
        if (indelPos==-1) indelPos=numLeadingMatches;
        return (indelPos);
      }
    } // ~else
  } // ~for

} //end !noGaps

		double minScore = SequenceUtils::getBreakPointHypothesisScore(alignment, breakPointPos, _isQphred, true);


		for (int i=1; i<lengthOfAlignment; i++) {
			double score = SequenceUtils::getBreakPointHypothesisScore(alignment, i, _isQphred, true);
			if (score<minScore) {
				minScore = score;
				breakPointPos = i;
			}
			//cerr  << i << ":" << score << ":";
			score = SequenceUtils::getBreakPointHypothesisScore(alignment, i, _isQphred, false);
			if (score<minScore) {
				minScore = score;
				breakPointPos = i;
			}
			//cerr << score << "\t";
		}

		 if (alignment.getMatch().getStrand() == Match::Reverse) {
			//cerr << "breakPoint = " << breakPointPos << endl;
			breakPointPos--;
		 }

		//cerr << "\n";
		return breakPointPos;
}

/*****************************************************************************/

void
ClusterFinderImpl::addAnomPairTypeClusters(AnomalousReadMgr& anomalousReadMgr,
                                           AnomalousRead::Type anomReadType,
                                           vector<AnomReadGroup>&
                                           clustersSoFar)
{
    cerr << "For " << anomReadType << " :-" << std::endl;

    AnomalousReadMgr::SingletonEventVec sortedAnomReads;
    anomalousReadMgr.getSortedAnomReads(anomReadType, sortedAnomReads);

    cerr << "  Found " << sortedAnomReads.size() << " anom reads."
         << std::endl;

    AnomalousReadMgr::AnomReadGroupVec clusters;

    anomalousReadMgr.clusterAnomPairReads(anomReadType,
                                          sortedAnomReads,
                                          clusters);

    cerr << "  Made " << clusters.size() << " clusters." << std::endl;

    // Append to clustersSoFar.
    clustersSoFar.insert(clustersSoFar.end(),
                         clusters.begin(), clusters.end());
}

/*****************************************************************************/

void
ClusterFinderImpl::addAnomPairClusters(AnomalousReadMgr& anomalousReadMgr,
                                       vector<AnomReadGroup>& clustersSoFar)
{
    AnomalousReadMgr::TypeVec anomPairTypeVec;
    anomalousReadMgr.getAnomPairTypeVec(anomPairTypeVec);

    for (AnomalousReadMgr::TypeVecCIter typeCIter(anomPairTypeVec.begin());
         typeCIter != anomPairTypeVec.end(); ++typeCIter) {
        addAnomPairTypeClusters(anomalousReadMgr, *typeCIter, clustersSoFar);
    }

    // Final sort.
    sort(clustersSoFar.begin(), clustersSoFar.end(), SortAnomReadGroup());
}

/*****************************************************************************/

void ClusterFinderImpl::operator()( AnomalousReadMgr& anomalousReadMgr )
{
  //  const int maxDistAligned(1);
  // const int maxDistShadow(20);
  // const int maxDistInterStrand(7);

  ofstream outputFile(_outputFileName.c_str());
  if (!outputFile)
  {
    cerr << "Failed to open output file " << _outputFileName << " for writing"
         << endl;
    return; // TBD what to do here - throw exception??
  } // ~if

  if (anomalousReadMgr.totalNumReadsAdded() == 0)
  {
    cerr << "No reads to cluster, nothing to do" << endl;
    outputFile.close();
    return;
  }

  // Now extract the singletons variable from anomalousReadMgr.
  std::vector<Singleton*>&
      singletons(anomalousReadMgr.
                 getVec(AnomalousRead::ShadowOrSemiAligned));

  if (singletons.size() == 0) {
      cerr << "No shadow/semi-aligned reads to cluster." << endl;
  }


  //  std::vector<SingletonEvent> eventStarts;
  //  std::vector<SingletonEvent> eventEnds;
  vector<SingletonEvent>
    matchedForward, matchedReverse, shadowForward, shadowReverse;

  vector<AnomReadGroup>
    matchedForwardClusters, matchedReverseClusters;

  int numLeading;

  unsigned int numAlignedRejected(0),numClusters(0);
  //double posToSort;

  for ( vector<Singleton*>::iterator i(singletons.begin());
        i!=singletons.end();++i)
  {
    if (!(*i)->alignment_.getPassed())
    {
      (*i)->alignment_.setPassed(true);
      outputFile << (*i)->minPos_ << "\t"
                 << (*i)->maxPos_ << "\t"
                 << (*i)->alignment_;
      continue;
    } // ~if

    if (dynamic_cast<SingletonSemiAligned*>(*i)!=NULL)
    {
      //numLeading=numLeadingMatches( (*i)->alignment_ );
	  numLeading = getMostProbableBreakPoint( (*i)->alignment_ );
//cerr << "In CluterFinderImpl, after numLeading = " << numLeading << "\n";
   //   if (numLeading>=1)
    //  {
        if ((*i)->alignment_.getMatch().getStrand() == Match::Forward)
        {
          matchedForward.push_back
            ( SingletonEvent( (*i)->minPos_+numLeading, *i ));
        } // ~if
        else
        {
          matchedReverse.push_back
            ( SingletonEvent( (*i)->maxPos_-numLeading, *i ));
        } // ~else
      //}
     // else
     // {
        numAlignedRejected++;
     // }
    } // ~ if SemiAligned
    else
    {
      assert (dynamic_cast<SingletonNotMatched*>(*i)!=NULL);
      //posToSort=((*i)->minPos_+(*i)->maxPos_)/2.0;
      if ((*i)->alignment_.getMatch().getStrand() == Match::Forward)
      {
        //        cout << (*i)->minPos_ << " " << (*i)->maxPos_ << " " << (*i)->alignment_;
        //        cout << (*i)->minPos_ << " " << (*i)->alignment_.getMatch().getPosition() << endl;
//cerr << "shadow read:" << (*i)->alignment_ << endl;

        shadowForward.push_back( SingletonEvent( (*i)->minPos_, *i));
        shadowForward.push_back( SingletonEvent( (*i)->alignment_.getMatch().getPosition(), *i));

          //        shadowForward.push_back( SingletonEvent( posToSort, *i));
      } // ~if
      else
      {
        //        cout << (*i)->minPos_ << " " << (*i)->maxPos_ <<  " " << (*i)->alignment_;
        //        cout << (*i)->maxPos_ << " " << (*i)->alignment_.getMatch().getPosition() << endl;
        shadowReverse.push_back( SingletonEvent( (*i)->maxPos_, *i));
        shadowReverse.push_back( SingletonEvent( (*i)->alignment_.getMatch().getPosition(), *i));
        //        shadowReverse.push_back( SingletonEvent( posToSort, *i));
      } // ~else
    } // ~else NotMatched
  }
  //  assert(1==0);
  cerr << "For " << AnomalousRead::ShadowOrSemiAligned << " :-" << endl;
  cerr << numAlignedRejected << " further aligned reads were rejected" << endl;
  cerr << matchedForward.size() << " forward strand aligned reads" << endl;
  cerr << shadowForward.size()
       << " forward strand shadow + singleton reads" << endl;
  cerr << matchedReverse.size() << " reverse strand aligned reads" << endl;
  cerr << shadowReverse.size()
       << " reverse strand shadow + singleton reads" << endl;

  sort(matchedForward.begin(), matchedForward.end(), Sorter());
  sort(matchedReverse.begin(), matchedReverse.end(), Sorter());

  sort(shadowForward.begin(), shadowForward.end(), Sorter());
  sort(shadowReverse.begin(), shadowReverse.end(), Sorter());

  clusterAlignedReads( matchedForward, matchedForwardClusters );
  clusterAlignedReads( matchedReverse, matchedReverseClusters );

  clusterShadowsForward( shadowForward, matchedForwardClusters );
  clusterShadowsReverse( shadowReverse, matchedReverseClusters );

  cerr << "  Made " << matchedForwardClusters.size() << " forward clusters."
       << std::endl
       << "  Made " << matchedReverseClusters.size() << " reverse clusters."
       << std::endl;

  clusterStrands( matchedForwardClusters, matchedReverseClusters );

  //for (vector<AnomReadGroup>::iterator i(matchedForwardClusters.begin());
   //    i!=matchedForwardClusters.end();++i) {
    //i->print( _minGroupSize, numClusters, outputFile );
	//int breakpointPos = static_cast<int>(i->positionStats_.getMean());
	//cout << "breakpoint pos " << breakpointPos << "\t";
	//int eventType = i->identifyType();
	//cerr << i->anomReadType_ << endl;
	//EventChecker* eventChecker = new EventChecker::EventChecker(*i);
	//int eventType = eventChecker->getEventType();
	//cout << "event type " << eventType << "\n";
  //}


  cerr << "  Merged strands -> " << matchedForwardClusters.size()
       << " clusters." << std::endl;

  addAnomPairClusters(anomalousReadMgr, matchedForwardClusters);


  for (vector<AnomReadGroup>::iterator i(matchedForwardClusters.begin());
       i!=matchedForwardClusters.end();++i)
    i->print( _minGroupSize, numClusters, outputFile );

  //  for (vector<AnomReadGroup>::iterator i(matchedReverseClusters.begin());
  //      i!=matchedReverseClusters.end();++i)
  //   i->print( _minGroupSize, outputFile );

  cerr << "found " << numClusters << " clusters in all" << endl;
  outputFile.close();


} // ~void ClusterFinderImpl::operator()


#ifdef PROTOTYPING_NEW_VERSION
void ClusterFinderImpl::operator()( const AnomalousReadMgr& anomalousReadMgr )
{
  // Now extract the singletons variable from anomalousReadMgr.
  std::vector<Singleton*>&
      singletons(anomalousReadMgr.
                 getVec(AnomalousRead::ShadowOrSemiAligned));

  ofstream outputFile(_outputFileName.c_str());
  if (!outputFile)
  {
    cerr << "Failed to open stats file " << _outputFileName << " for writing"
         << endl;
    return; // TBD what to do here - throw exception??
  } // ~if

  if (singletons.size()==0)
  {
    cerr << "No reads to cluster, nothing to do" << endl;
    outputFile.close();
    outputFile.close();
    return;
  } // ~if

  std::vector<SingletonEvent> eventStarts;
  std::vector<SingletonEvent> eventEnds;


  for ( vector<Singleton*>::iterator i(singletons.begin());
        i!=singletons.end();++i)
  {
    (*i)->populateBounds( eventStarts, eventEnds);
  } // ~for


  assert(!eventEnds.empty());
  assert(!eventStarts.empty());

  assert(eventEnds.size()==eventStarts.size());


#ifdef NEAREST_NEIGHBOUR_PEAK_FINDER
        vector<SingletonEvent>::iterator
          pStart(eventStarts.begin()),pEnd(eventEnds.begin());

        for (;pStart!=eventStarts.end();++pStart,++pEnd)
          {
            pStart->pos_+=pEnd->pos_;
            pStart->pos_/=2.0;
          }
#endif

        sort(eventStarts.begin(), eventStarts.end(), Sorter());
        sort(eventEnds.begin(), eventEnds.end(), Sorter());


#ifdef NEAREST_NEIGHBOUR_PEAK_FINDER
        double lastPos=-5000000;
        Singleton* lastRead(NULL);
        //        const double maxDist(_options.maxDistance());
        //        const unsigned int minGroupSize(_options.minGroupSize());
        vector<Singleton*> group;
        int numGroups(0);

        for (pStart=eventStarts.begin();pStart!=eventStarts.end();++pStart)
        {
          if (!pStart->pRead_->alignment_.getPassed())
          {
            pStart->pRead_->alignment_.setPassed(true);
            outputFile << pStart->pRead_->minPos_ << "\t"
                       << pStart->pRead_->maxPos_ << "\t"
                       << pStart->pRead_->alignment_;
            continue;
          } // ~if

          if ((pStart->pos_-lastPos)<=_maxDist)
          {
            if (group.empty()) group.push_back(lastRead);
            group.push_back(pStart->pRead_);
          } // ~if
          else
          {

            printGroup( group, numGroups, outputFile );

            if (group.size()>0)
            {
              group.clear();
            } // ~if
            else if (lastRead!=NULL)
            {
              outputFile << lastRead->minPos_ << "\t"
                         << lastRead->maxPos_ << "\t"
                         << lastRead->alignment_;
            }
          } // ~else
          lastPos=pStart->pos_;
          lastRead=pStart->pRead_;

        } // ~for

        printGroup( group, numGroups, outputFile );

        if (group.size()>0)
        {
          group.clear();
        } // ~if
        else if (lastRead!=NULL)
        {
          outputFile << lastRead->minPos_ << "\t"
                     << lastRead->maxPos_ << "\t"
                     << lastRead->alignment_;
        } // ~else if


#endif


#ifdef GAUSSIAN_PEAK_FINDER
        vector<SingletonEvent>::iterator
          pStart(eventStarts.begin()),pEnd(eventEnds.begin());

        bool isInPeak(false);
        IndelMetricStore metric;
        float threshold(0.05);
        int numGroups(0);
        //        const unsigned int _minGroupSize(_options.minGroupSize());

        AnomReadGroup activeGroup, shadowGroup;

        for (int i(eventStarts.begin()->pos_); i!=eventEnds.back().pos_;++i)
        {
          while ((pStart!=eventStarts.end())&&(i>=pStart->pos_))
          {
            pStart->pRead_->populateMetric( metric );
            activeGroup.insert(pStart->pRead_);
            if (isInPeak==true)
              shadowGroup.insert(pStart->pRead_);
            pStart++;
          } // ~while

          if (metric.find(i)==metric.end())
            metric[i]=0;

          printf ("# %d %f zz\n", i, metric[i]);

          if ((metric[i]>=threshold)&&(isInPeak==false))
          {
            shadowGroup=activeGroup;
            isInPeak=true;
          } // ~if
          else if ((metric[i]<threshold)&&(isInPeak==true))
          {
            //            if (shadowGroup.size()>minGroupSize)
            //  {
              ++numGroups;
              shadowGroup.print(numGroups);
            //            }
            isInPeak=false;
          } // ~else if

          while ((pEnd!=eventEnds.end())&&(i>=pEnd->pos_))
          {
            activeGroup.erase(pEnd->pRead_);
            pEnd++;
          } // ~while
          // TBD flush metric occasionally
          // TBD check for active group at end
        } // ~for
#endif // GAUSSIAN_PEAK_FINDER


        outputFile.close();

} // ~void ClusterFinderImpl::operator()
#endif // PROTOTYPING_NEW_VERSION


}
} // end namespace casava{ namespace { applications