/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "contig.h"
#include    "consedParameters.h"
#include    "locatedFragment.h"
#include    "bIntersect.h"
#include    "mbtValVectorOfBool.h"
#include    "mbtValVector.h"
#include    "rwtptrorderedvector.h"
#include    "ntide.h"
#include    "fileDefines.h"
#include    "soAddCommas.h"
#include    "rwctokenizer.h"
#include    "autoReport.h"
#include    "consed.h"
#include    "assembly.h"
#include    "nNumberFromBase.h"
#include    "setErrorRateFromQuality.h"
#include    "dErrorRateFromQuality.h"
#include    "singletInfo.h"
#include    "readPHDAgainForHighQualitySegment.h"
#include    <stdlib.h>
#include    <stdio.h>
#include    <dirent.h>
#include    "rwcregexp.h"
#include    "getAllSinglets.h"
#include    "mbt_val_ord_offset_vec.h"
#include    "soLine.h"
#include    "max.h"
#include    <math.h>
#include    "findAncestralTrees.h"
#include    "transitionsTransversions.h"
#include    "soGetBranchNameWithoutTree.h"
#include    "primateSpecies.h"


static int nEstimatedTotalMutations = 0;
static int nConsPosConsidered = 0;

//const int n256 = 256; already defined in transitionsTransversions.h 

static RWTValOrderedVector<RWCString>   aArrayOfNamesOfBranches;
static RWTValOrderedVector<float>       aArrayOfNumberOfNonMutationsAtBranch;
static RWTValOrderedVector<float>       aArrayOfNumberOfDeaminationMutationsAtBranch;
static RWTValOrderedVector<float>       aArrayOfNumberOfInsertionMutationsAtBranch;
static RWTValOrderedVector<float>       aArrayOfNumberOfDeletionMutationsAtBranch;
static RWTValOrderedVector<float>*      pArraysAtBranch[n256][n256];




typedef RWTValVector<float> typeArrayOfNucleotideCounts;
static RWTPtrVector<typeArrayOfNucleotideCounts> 
   aArrayOfInternalNodes( nMaxInternalNode + 1,
                          NULL,
                          "aArrayOfInternalNodes" );

static int nBaseToIndex[n256];





static int nGetNumberOfDistinctBasesNotNs( const RWCString& soBases ) {

   RWCString soBases2( soBases );

   soBases2.removeSingleCharacterAllOccurrences( 'n' );

   // check how many bases remain

   const int nBaseTypes = 5;

   bool bBases[nBaseTypes];

   int n;

   for( n =0; n <nBaseTypes ; ++n ) {
      bBases[n] = false;
   }

   for( n = 0; n < soBases.length(); ++n ) {
      if ( soBases[n] == 'a' ) 
         bBases[0] = true;
      else if ( soBases[n] == 'c' )
         bBases[1] = true;
      else if ( soBases[n] == 'g' ) 
         bBases[2] = true;
      else if ( soBases[n] == 't' )
         bBases[3] = true;
      else if ( soBases[n] == '*' )
         bBases[4] = true;
   }

   int nNumberOfDistinctBases = 0;
   for( n = 0; n < nBaseTypes; ++n ) {
      if ( bBases[n] ) {
         ++nNumberOfDistinctBases;
      }
   }

   return( nNumberOfDistinctBases );
}






static RWCString soGetBranchNameWithTree( const int nInternalNode,
                                 const int nOnBackboneOrDownToRight,
                                 const int nTree ) {



   RWCString soToReturn = 
      soGetBranchNameWithoutTree( nInternalNode,
                                  nOnBackboneOrDownToRight );

   soToReturn += "_";
   soToReturn += RWCString( (long) nTree );

   return( soToReturn );
}



static int nGetIndexForBranch( const int nInternalNode,
                               const int nOnBackboneOrDownToRight ) {

   // internal node are 0, 1, 2, 3 so the indexes go from 0 to 7
   // then there is another for the branch to MMul

   if ( nInternalNode == -1 ) {
      return( nMaxInternalNode * 2 + 2 );
   }

   int nToReturn = nInternalNode * 2;
   if ( nOnBackboneOrDownToRight == nOnBackbone )
      return( nToReturn + 0 );
   else
      return( nToReturn + 1 );
}


// calls soGetBranchNameWithoutTree so nInternalNode means the same
// here as there


static RWCString soBasesForSubstitutions( "acgt" );
static RWCString soBasesWithPad("acgt*");

static void initializeArrays() {

   int nLeft;
   for( nLeft = 0; nLeft < n256; ++nLeft ) {
      for( int nRight = 0; nRight < n256; ++nRight ) {
         pArraysAtBranch[ nLeft ][ nRight ] = 0;
      }
   }



   for( nLeft = 0; nLeft < soBasesForSubstitutions.length(); ++nLeft ) {
      char cLeft = soBasesForSubstitutions[ nLeft ];

      for( int nRight = 0; nRight < soBasesForSubstitutions.length(); ++nRight ) {
         char cRight = soBasesForSubstitutions[ nRight ];

         if ( cLeft == cRight ) {
            pArraysAtBranch[ cLeft ][ cRight ] =
               &aArrayOfNumberOfNonMutationsAtBranch;
         }
         else {
            RWCString soName = "counts of mutations from " +
               RWCString( cLeft ) + " to " + RWCString( cRight );

            pArraysAtBranch[ cLeft ][ cRight ] =
               new RWTValOrderedVector<float>( 10, // array size
                                               soName ) ;
         }
      }
   }


   // now consider indels 

   int nBase;
   for( nBase = 0; nBase < soBasesForSubstitutions.length(); ++nBase ) {
      char cBase = soBasesForSubstitutions[ nBase ];

      pArraysAtBranch[ cBase ][ '*' ] = &aArrayOfNumberOfDeletionMutationsAtBranch;
      pArraysAtBranch[ '*' ][ cBase ] = &aArrayOfNumberOfInsertionMutationsAtBranch;
   }

   // what about ['*']['*'] ?  This will not count as anything

   
   for( int nInternalNode2 = 0; nInternalNode2 <= (nMaxInternalNode + 1 ); 
        ++nInternalNode2 ) {

      int nInternalNode = nInternalNode2;

      if ( nInternalNode == ( nMaxInternalNode + 1 ) ) {
         nInternalNode = -1;
      }


      for( int nBranchBelowInternalNode = 0; nBranchBelowInternalNode <= 1;
           ++nBranchBelowInternalNode ) {


         // just a trick so that there is only one MMul branch
         if ( nInternalNode == -1 && nBranchBelowInternalNode == 1 )
            continue;
         


         int nOnBackboneOrDownToRight;
         if ( nBranchBelowInternalNode == 0 )
            nOnBackboneOrDownToRight = nOnBackbone;
         else
            nOnBackboneOrDownToRight = nDownToRight;

         aArrayOfNamesOfBranches.insert( 
                   soGetBranchNameWithoutTree( nInternalNode,
                                      nOnBackboneOrDownToRight ) 
                   );


         aArrayOfNumberOfNonMutationsAtBranch.insert( 0.0 );
         aArrayOfNumberOfDeaminationMutationsAtBranch.insert( 0.0 );
         aArrayOfNumberOfInsertionMutationsAtBranch.insert( 0.0 );
         aArrayOfNumberOfDeletionMutationsAtBranch.insert( 0.0 );
         
         
         for( nLeft = 0; nLeft < soBasesForSubstitutions.length(); ++nLeft ) {
            char cLeft = soBasesForSubstitutions[ nLeft ];

            for( int nRight = 0; nRight < soBasesForSubstitutions.length(); ++nRight ) {

               if ( nRight == nLeft ) continue;

               char cRight = soBasesForSubstitutions[ nRight ];

               ( pArraysAtBranch[ cLeft ][ cRight ])->insert( 0.0 );
            }
         }

      } // for( int nBranchBelowInternalNode = 0; ...
   } //    for( int nInternalNode2 = 0; nInternalNode2 <= (nMaxInternalNode + 



   for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
        ++nInternalNode ) {

      RWCString soName = "aArrayOfInternalNodes nInternalNode = " +
         RWCString( (long) nInternalNode );

      aArrayOfInternalNodes[ nInternalNode ] =
         new RWTValVector<float>( soBasesWithPad.length(), // capacity:  acgt*
                                  0.0, // initial value
                                  soName );
   }


   int n;
   for( n = 0; n < n256; ++n ) {
      nBaseToIndex[n] = -1;
   }

   nBaseToIndex['a'] = 0;
   nBaseToIndex['c'] = 1;
   nBaseToIndex['g'] = 2;
   nBaseToIndex['t'] = 3;
   nBaseToIndex['*'] = 4;

} // static void initializeArrays() {

   





static unsigned char ucGetMaskForSpecies( RWCString& soSpecies ) {


   unsigned char ucSpeciesMask;

   if ( soSpecies == "PTro" ) {
      ucSpeciesMask = ucPTro;
   }
   else if ( soSpecies == "PPan" ) {
      ucSpeciesMask = ucPPan;
   }
   else if ( soSpecies == "GGor" ) {
      ucSpeciesMask = ucGGor;
   }
   else if ( soSpecies == "PPyg" ) {
      ucSpeciesMask = ucPPyg;
   }
   else if ( soSpecies == "MMul" ) {
      ucSpeciesMask = ucMMul;
   }
   else {
      ucSpeciesMask = 0;
   }

   
   return ucSpeciesMask;
}



static bool bAllNeededSpecies( unsigned char ucSpeciesMask ) {

   if ( ( ucSpeciesMask & ucPTroOrPPan ) &&
        ( ucSpeciesMask & ucPPygOrMMul ) &&
        ( ucSpeciesMask & ucGGor ) )
      return true;
   else
      return false;
}
   

static bool bNoMutationThisColumn( const RWCString& soBases ) {
   
   char cBase = 0;
   for( int n = 0; n < soBases.length(); ++n ) {
      if ( soBases[n] == 'n' ) continue;

      if ( !cBase ) {
         cBase = soBases[n];
      }
      else {
         if ( cBase != soBases[n] ) {
            return false;
         }
      }
   }

   return true;
}


static RWCString soGetTreeWithNoMutation( const RWCString soPresentDayBasesHumanInFront ) {

   RWCString soPresentDayBasesPPanInFront = soPresentDayBasesHumanInFront;
   
   // PPan
   soPresentDayBasesPPanInFront[0] = soPresentDayBasesHumanInFront[1];
   // PTro
   soPresentDayBasesPPanInFront[1] = soPresentDayBasesHumanInFront[2];
   // HSap
   soPresentDayBasesPPanInFront[2] = soPresentDayBasesHumanInFront[0];

   // find base

   char cBase = 0;
   int n;
   for( n = 0; n < soPresentDayBasesPPanInFront.length(); ++n ) {
      if ( soPresentDayBasesPPanInFront[n] != 'n' ) {
         cBase = soPresentDayBasesPPanInFront[n];
         break;
      }
   }
   
   assert( cBase );

   // trees look like:
   // g(g(t(t(t(t,t),t),t),g?),g)


   RWCString soTreeLeft;
   RWCString soTreeRight;

   for( n = soPresentDayBasesPPanInFront.length() - 1; n >= 1;
        --n ) {
      soTreeLeft += RWCString( cBase );
      soTreeLeft += "(";

      soTreeRight = "," + RWCString( cBase ) +
         ( soPresentDayBasesPPanInFront[n] == 'n' ? "?" : "" ) +
         ")" + soTreeRight;
   }

   // we have to handle PPan separately from the loop above because it
   // might have a "?" next to it

   RWCString soTree = soTreeLeft + 
      RWCString( cBase ) + 
      ( soPresentDayBasesPPanInFront[0] == 'n' ? "?" : "" ) +
      soTreeRight;

   return( soTree );
}


static bool bIsDeaminationMutation( 
        const int nInternalNode,
        const int nOnBackboneOrDownToRight,
        const RWCString& soDeaminationMutations ) {

   RWCString soBranchName = soGetBranchNameWithoutTree( nInternalNode,
                                                        nOnBackboneOrDownToRight );
   if ( soDeaminationMutations.index( soBranchName ) == RW_NPOS )
      return false;
   else
      return true;
}

      
   



static float fGetProbabilityOfDeaminationMutation( 
        const int nInternalNode,
        const int nOnBackboneOrDownToRight,
        const int nTree,
        mbtValOrderedVectorOfRWCString* pArrayOfDeaminationMutations ) {
   

   RWCString soBranchNameWithTree = 
      soGetBranchNameWithTree( nInternalNode,
                               nOnBackboneOrDownToRight,
                               nTree );

   if ( !pArrayOfDeaminationMutations )
      return( 0.0 );

   int nIndexOfDeaminationMutation = 
      pArrayOfDeaminationMutations->index( soBranchNameWithTree );

   if ( nIndexOfDeaminationMutation == RW_NPOS )
      return( 0.0 );
   else
      return( 1.0 );
}




static void countNucleotidesAtInternalNodes( const RWCString& soOneTree2 ) {


   RWCString soOneTree = soOneTree2;


   soOneTree.removeSingleCharacterAllOccurrences( '?' );
   soOneTree.removeSingleCharacterAllOccurrences( '(' );
   soOneTree.removeSingleCharacterAllOccurrences( ')' );
   soOneTree.removeSingleCharacterAllOccurrences( ',' );

   // now look for mutations

   // nInternal node is the string offset and also that used in
   // soGetBranchNameWithoutTree but without using -1

   for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
        ++nInternalNode ) {

      // notice that "nInternalNode" refers to the string position
      // within the tree.  So 0 is the top node and 4 is the node
      // going to PPan and PTro

      char cAncestralBase = soOneTree[ nInternalNode ];

      RWTValVector<float>* pArrayOfCounts =
         aArrayOfInternalNodes[ nInternalNode ];

      int nBaseIndex = nBaseToIndex[ cAncestralBase ];


      if ( nBaseIndex != -1 ) {

         // will need to be changed for maximum likelihood model

         (*pArrayOfCounts)[ nBaseIndex ] += 1.0;

      }
   }
}





void Contig :: countAllMutations( autoReport* pAutoReport ) {


   setTransitionsTransversionsTableIfNecessary();
   initializeArrays();


   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   mbtValVector<unsigned char> aSpeciesInAcceptableColumns( 1, // will be changed
                                                    1, // starting position
                                                    0, // initial value
                                                    "aSpeciesInAcceptableColumns" );


   RWTValVector<RWCString> aArrayOfOneTreeAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfOneTreeAtEachConsPos" );

   RWTValVector<RWCString> aArrayOfBasesAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfBasesAtEachConsPos" );


   RWTValVector<RWCString> aArrayOfLenientlyFilteredBasesAtEachConsPos(
          1, // size--will be changed
          // + 1 so we can use nConsPos (1-based) directly as an index
          "", // initial value
          "aArrayOfLenientlyFilteredBasesAtEachConsPos" );


   RWTValVector<RWCString> aDeaminationMutationsAtEachConsPos( 
          1, // size--will be changed
          "", // initial value
          "aDeaminationMutationsAtEachConsPos" );

   // this is different than above in that if a CpG occurs, both
   // columns are marked, rather than just the first column

   RWTValVector<char> aAncestralCpGAtEachConsPos2(
          1,// size--will be changed
          cNoCpG, // initial value
          "aAncestralCpGAtEachConsPos" );



   LocatedFragment* pHumanRead;

   applyFilters( pAutoReport,
                 aCombinedReads,
                 aSpeciesInAcceptableColumns,
                 aArrayOfOneTreeAtEachConsPos,
                 aArrayOfBasesAtEachConsPos,
                 aArrayOfLenientlyFilteredBasesAtEachConsPos,
                 aDeaminationMutationsAtEachConsPos,
                 aAncestralCpGAtEachConsPos2,
                 pHumanRead );



   int nConsPos;
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {
      
      if ( aArrayOfOneTreeAtEachConsPos[ nConsPos ].isNull() ) continue;

      countNucleotidesAtInternalNodes( aArrayOfOneTreeAtEachConsPos[ nConsPos ] );

   }


   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( aArrayOfOneTreeAtEachConsPos[ nConsPos ].isNull() ) continue;

      // for comparison with Graham's data

      if ( pCP->bAutoReportPrintPositionsForGraham_ ) {
         
         if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
                  & ucPTro ) ) continue;
      
         if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
                  & ucMMul ) ) continue;
         
         if ( aAncestralCpGAtEachConsPos2[ nConsPos ] != cNoCpG ) 
            continue;
      }

      // end for comparison with Graham's data

      fprintf( pAO, "using site %d %s\n", nUnpaddedIndex( nConsPos ),
               aArrayOfBasesAtEachConsPos[ nConsPos ].data() );
      

      countMutationsAtConsPos( 
                      aArrayOfOneTreeAtEachConsPos[ nConsPos ],
                      aArrayOfBasesAtEachConsPos[ nConsPos ],
                      nConsPos,
                      aDeaminationMutationsAtEachConsPos[ nConsPos ] );
   }



   // print out estimate (sanity check)

   fprintf( pAO, "estimate: %d mutations out of %d positions\n",
            nEstimatedTotalMutations,
            nConsPosConsidered );

   // now print out the counts for each branch

   fprintf( pAO, "allMutations {\n" );


   // nInternalNode means the same as it does in soGetBranchNameWithoutTree

   for( int nInternalNode2 = 0; nInternalNode2 <= ( nMaxInternalNode + 1); 
        ++nInternalNode2 ) {

      // fuss so that nInternalNode goes as 0, 1, 2, 3, -1
      int nInternalNode = nInternalNode2;
      if ( nInternalNode2 == ( nMaxInternalNode + 1 ) ) {
         nInternalNode = -1;
      }

      int nRealInternalNode = ( nInternalNode == -1  ) ? 
         0 : nInternalNode;

      RWTValVector<float>* pArrayOfNucleotideCounts =
         aArrayOfInternalNodes[ nRealInternalNode ];


      for( int nBranchBelowInternalNode = 0; nBranchBelowInternalNode <= 1;
           ++nBranchBelowInternalNode ) {

         // just a trick so that there is only one MMul branch
         if ( nInternalNode == -1 && nBranchBelowInternalNode == 1 ) 
            continue;


         int nOnBackboneOrDownToRight;
         if ( nBranchBelowInternalNode == 0 )
            nOnBackboneOrDownToRight = nOnBackbone;
         else
            nOnBackboneOrDownToRight = nDownToRight;

         RWCString soBranchName = soGetBranchNameWithoutTree( nInternalNode,
                                                              nOnBackboneOrDownToRight );

         int nIndexOfBranch = nGetIndexForBranch( nInternalNode,
                                          nOnBackboneOrDownToRight );

         assert( aArrayOfNamesOfBranches[ nIndexOfBranch ] ==
                 soBranchName );



         float fTotalSubstitutions = 0.0;

         int nLeft;
         for( nLeft = 0; nLeft < soBasesForSubstitutions.length(); ++nLeft ) {
            char cLeft = soBasesForSubstitutions[ nLeft ];
            for( int nRight = 0; nRight < soBasesForSubstitutions.length();
                 ++nRight ) {

               if ( nLeft == nRight ) continue;

               char cRight = soBasesForSubstitutions[ nRight ];

               fTotalSubstitutions += 
                  (*(pArraysAtBranch[ cLeft ][ cRight ]))[ nIndexOfBranch ];
            }
         }


         float fTotalAtBranch = 
            aArrayOfNumberOfNonMutationsAtBranch[ nIndexOfBranch ] +
            aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexOfBranch ] +
            aArrayOfNumberOfDeletionMutationsAtBranch[ nIndexOfBranch ] +
            aArrayOfNumberOfInsertionMutationsAtBranch[ nIndexOfBranch ] +
            fTotalSubstitutions;



         // to avoid division by zero
         if ( fTotalAtBranch == 0.0 ) {
            fTotalAtBranch = 0.1;
         }


         fprintf( pAO, "%s: same: %.2f subst: %.2f deam: %.2f ins: %.2f del: %.2f deam rate %.2f nondeam subst rate: %.2f %s %.1f\n",
                  aArrayOfNamesOfBranches[ nIndexOfBranch ].data(),
                  aArrayOfNumberOfNonMutationsAtBranch[ nIndexOfBranch ],
                  fTotalSubstitutions,
                  aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexOfBranch ],
                  aArrayOfNumberOfInsertionMutationsAtBranch[ nIndexOfBranch ],
                  aArrayOfNumberOfDeletionMutationsAtBranch[ nIndexOfBranch ],
                  aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexOfBranch ] * 100.0 / fTotalAtBranch,
                  fTotalSubstitutions * 100.0 / fTotalAtBranch,
                  soGetName().data(),
                  fTotalAtBranch );

         fprintf( pAO, "nucleotides at source node:" );

         for( int nSourceBase = 0; nSourceBase < soBasesWithPad.length();
              ++nSourceBase ) {
            char c = soBasesWithPad[ nSourceBase ];

            float fSourceBases = (*pArrayOfNucleotideCounts)[ nSourceBase ];

            fprintf( pAO, " %c: %.3f", c, fSourceBases );
         }
         fprintf( pAO, "\n" );


         for( nLeft = 0; nLeft < soBasesForSubstitutions.length(); ++nLeft ) {
            char cLeft = soBasesForSubstitutions[ nLeft ];



            float fSourceBases = (*pArrayOfNucleotideCounts)[ nLeft ];
            

            for( int nRight = 0; nRight < soBasesForSubstitutions.length();
                 ++nRight ) {

               if ( nLeft == nRight ) continue;

               char cRight = soBasesForSubstitutions[ nRight ];

               fprintf( pAO, "   subst %c -> %c %.2f %c: %.1f\n",
                        cLeft,
                        cRight,
                        (*(pArraysAtBranch[ cLeft ][ cRight ]))[ nIndexOfBranch ],
                        cLeft,
                        fSourceBases
                        );
            }
         }

      } // for( int nBranchBelowInternalNode = 0; 
   } //    for( int nInternalNode2 = 0; 


   fprintf( pAO, "} allMutations\n" );
}







void Contig :: checkTreesAndRecordDeaminationMutations( 
                  RWTValOrderedVector<RWCString>* pTreesPositionA,
                  RWTValOrderedVector<RWCString>* pTreesPositionB,
                  mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionA,
                  mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionB,
                  RWTValOrderedVector<float>*& pArrayOfProbsOfDeaminationMutationsPositionA,
                  RWTValOrderedVector<float>*& pArrayOfProbsOfDeaminationMutationsPositionB ) {


   mbtValOrderedVectorOfRWCString aDeaminationMutationsPositionA;
   mbtValOrderedVectorOfRWCString aDeaminationMutationsPositionB;

   
   for( int nTreeA = 0; nTreeA < pTreesPositionA->length(); ++nTreeA ) {
      
      RWCString soTreeA = (*pTreesPositionA)[ nTreeA ];

      // get rid of ? for now

      soTreeA.removeSingleCharacterAllOccurrences('?');
      soTreeA.removeSingleCharacterAllOccurrences('(');
      soTreeA.removeSingleCharacterAllOccurrences(')');
      soTreeA.removeSingleCharacterAllOccurrences(',');
      
      for( int nTreeB = 0; nTreeB < pTreesPositionB->length(); ++nTreeB ) {

         RWCString soTreeB = (*pTreesPositionB)[ nTreeB ];

         soTreeB.removeSingleCharacterAllOccurrences( '?' );
         soTreeB.removeSingleCharacterAllOccurrences( '(' );
         soTreeB.removeSingleCharacterAllOccurrences( ')' );
         soTreeB.removeSingleCharacterAllOccurrences( ',' );


         // now we have a pair of trees, soTreeA and soTreeB

         // abcdef
         // ^top internal node, leading to MMul
         //  ^ next internal node, leading to PPyg
         //   ^ next internal node, leading to GGor
         //    ^ next internal node, leading to HSap, I hope!
         //     ^ next internal node, leading to PTro
         //      ^ PPan
         //       ^ PTro
         //        ^ GGor
         //    etc.

         // g(g(g(g(g,g),g),a),g?,g)
         // is changed to
         // gggggggagg
         // a(c(c(c(*,c),c),c),a,*)
         // is changed to:
         // accc*ccca*
         // ^ node leading to MMul and PPyg
         //  ^ node leading to GGor
         //   ^ node leading to HSap
         //    ^ node leading to PTro and PPan
         //     ^ PPan
         //      ^ PTro
         //       ^ HSap
         //        ^ GGor
         //         ^ PPyg
         //          ^ MMul



         if ( pCP->bAutoReportDeaminationMutationsDeterminedByMoreAccurateMethod_ ) {

            // so there are 4 ancestral nodes to be concerned about

            // it turns out (lucky?) that this nInternalNode actually
            // is the same as that in soGetBranchNameWithoutTree and
            // is also the string position in the tree.  There is 
            // no nInternalNode == -1 here, though.
            // 0 refers to the internal node where PPyg and MMul come together.

            for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
                 ++nInternalNode ) {

               // notice that "nInternalNode" refers to the string position
               // within the tree.  So 0 is the top node and 4 is the node
               // going to PPan and PTro

               RWCString soAncestralSequence = 
                  RWCString( soTreeA[ nInternalNode ] ) + 
                  RWCString( soTreeB[ nInternalNode ] );


               // special case of MMul in which the deamination 
               // mutation occurs on the short part of the branch
               // that goes down to the left to the top internal node.
               // In such a case, the MMul present day base is
               // actually older than the top internal node.
               // Thus we look for a CpG at the MMul present day bases
               // and some deamination mutation at the top internal node.


               if ( nInternalNode == 0 ) {
                  int nIndexOfPresentDayBases = 9;

                  RWCString soPresentDayBases =
                     RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
                     RWCString( soTreeB[ nIndexOfPresentDayBases ] );

                  if ( soPresentDayBases == "cg" ) {
                     if ( soAncestralSequence == "ca" ) {
                        aDeaminationMutationsPositionB.insert(
                                                              soGetBranchNameWithTree( -1,
                                                                                       nDownToRight,
                                                                                       nTreeB ) );
                     }
                     else if ( soAncestralSequence == "tg" ) {
                        aDeaminationMutationsPositionA.insert(
                                                              soGetBranchNameWithTree( -1,
                                                                                       nDownToRight,
                                                                                       nTreeA ) );
                     }
                  }
               }
                     

               if ( soAncestralSequence != "cg" ) continue;

               // if reached here, ancestral sequence is a CpG
               // is there a mutation from this position?

               // first, check if there is a mutation down-right to
               // the corresponding present-day node

               int nIndexOfPresentDayBases =
                  nSubtractInternalNodeFromThisToGetPresentDayIndex - 
                  nInternalNode;

               RWCString soPresentDayBases =
                  RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
                  RWCString( soTreeB[ nIndexOfPresentDayBases ] );

               if ( soPresentDayBases == "ca" ) {

                  aDeaminationMutationsPositionB.insert( 
                                                        soGetBranchNameWithTree( nInternalNode, nDownToRight,
                                                                                 nTreeB ) );
               
               }
               else if ( soPresentDayBases == "tg" ) {

                  aDeaminationMutationsPositionA.insert(
                                                        soGetBranchNameWithTree( nInternalNode, nDownToRight,
                                                                                 nTreeA )
                                                        );
               }


               if ( nInternalNode == 0 ) {
               
                  // case of top internal node, leading to both MMul and PPyg
                  // we've already considered the branch leading to PPyg (above)
                  // in which nIndexOfPresentDayBases = 8
                  // Now consider the branch leading to MMul, with
                  // nIndexOfPresentDayBases = 9

                  nIndexOfPresentDayBases = 
                     nSubtractInternalNodeFromThisToGetPresentDayIndex + 1;

                  soPresentDayBases =
                     RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
                     RWCString( soTreeB[ nIndexOfPresentDayBases ] );

                  if ( soPresentDayBases == "ca" ) {

                     aDeaminationMutationsPositionB.insert( 
                                                           soGetBranchNameWithTree( -1, nDownToRight, nTreeB ) 
                                                           );
                  }
                  else if ( soPresentDayBases == "tg" ) {
                  
                     aDeaminationMutationsPositionA.insert(
                                                           soGetBranchNameWithTree( -1, nDownToRight, nTreeA )
                                                           );
                  }
               }



               // if nInternalNode is 0-3, the next node is the
               // descendant internal node.  if nInternalNode is
               // 4, the next node is PPan

               RWCString soDescendantInternalNode =
                  RWCString( soTreeA[ nInternalNode + 1 ] ) +
                  RWCString( soTreeB[ nInternalNode + 1 ] );

               if ( soDescendantInternalNode == "ca" ) {

                  aDeaminationMutationsPositionB.insert(
                                                        soGetBranchNameWithTree( nInternalNode, nOnBackbone, nTreeB )
                                                        );
               }
               else if ( soDescendantInternalNode == "tg" ) {

                  aDeaminationMutationsPositionA.insert(
                                                        soGetBranchNameWithTree( nInternalNode, nOnBackbone, nTreeA ) 
                                                        );
               }

            } //         for( int nInternalNode = 0; nInternalNode < 5; 


         } //          if ( pCP->bAutoReportDeaminationMutationsDeterminedByMoreAccurateMethod_ ) {
         else {

            //     ^ PPan
            //      ^ PTro
            //       ^ HSap
            //        ^ GGor
            //         ^ PPyg
            //          ^ MMul


            RWCString soPTro = 
               RWCString( soTreeA.cGetLastChar(-5) ) +
               RWCString( soTreeB.cGetLastChar(-5) );
               
            RWCString soMMul =
               RWCString( soTreeA.cGetLastChar(-1) ) +
               RWCString( soTreeB.cGetLastChar(-1) );

            RWCString soHSap =
               RWCString( soTreeA.cGetLastChar(-4) ) +
               RWCString( soTreeB.cGetLastChar(-4) );


            int nNumberOfCpGs = 0;
            int nNumberOfTpGs = 0;
            int nNumberOfCpAs = 0;

            for( int n = 1; n <= 3; ++n ) {

               RWCString soDinucleotide( (size_t) 2 );
               if ( n == 1 ) 
                  soDinucleotide = soPTro;
               else if ( n == 2 ) 
                  soDinucleotide = soHSap;
               else if ( n == 3 )
                  soDinucleotide = soMMul;

               if ( soDinucleotide == "cg" ) 
                  ++nNumberOfCpGs;
               else if ( soDinucleotide == "tg" )
                  ++nNumberOfTpGs;
               else if ( soDinucleotide == "ca" )
                  ++nNumberOfCpAs;
            }

            int nNumberNotEqualZero = 0;
            if ( nNumberOfCpGs != 0 )
               ++nNumberNotEqualZero;

            if ( nNumberOfTpGs != 0 ) 
               ++nNumberNotEqualZero;

            if ( nNumberOfCpAs != 0 )
               ++nNumberNotEqualZero;

            if ( nNumberNotEqualZero >= 2 ) {
               // declare it a CpG deamination mutation!

               if ( nNumberOfTpGs != 0 ) {

                  // a CpG deamination mutation in the left position
                  // don't bother finding where it occurred for now

                  aDeaminationMutationsPositionA.insert( soGetBranchNameWithTree( -1, nDownToRight, nTreeA ) );

               }

               if ( nNumberOfCpAs != 0 ) {
                  // a CpG deamination mutation in the right position.
                  // don't bother finding where it occurred for now

                  aDeaminationMutationsPositionB.insert( soGetBranchNameWithTree( -1, nDownToRight, nTreeB ) );
               }


            }

            

         }

               

      } //      for( int nTreeB = 0; nTreeB < pTreesPositionB->length
   } // for( int nTreeA = 0; nTreeA < pTreesPositionA->length()


   aDeaminationMutationsPositionA.resort();
   aDeaminationMutationsPositionB.resort();
   

   // are there any deamination mutations that are in every pair of 
   // trees?  Count each.

   
   RWCString soLastDeaminationMutation;
   int nNumberOfPairsOfTreesWithCurrentDeaminationMutation;


   int nMutation;

   // add sentinel
   aDeaminationMutationsPositionA.insert( "sentinel" );
   
   for( nMutation = 0; nMutation < aDeaminationMutationsPositionA.length();
        ++nMutation ) {

      RWCString soCurrentDeaminationMutation = 
         aDeaminationMutationsPositionA[ nMutation ];

      if ( soCurrentDeaminationMutation == soLastDeaminationMutation ) {
         ++nNumberOfPairsOfTreesWithCurrentDeaminationMutation;
      }
      else {
         // just changed--first finish off last deamination mutation

         if ( !soLastDeaminationMutation.isNull() ) {

            float fFractionOfTreesInOtherColumnSupportingThisAsADeaminationMutation =
               (float) nNumberOfPairsOfTreesWithCurrentDeaminationMutation /
               (float) pTreesPositionB->length();  // yep, B
               

            if ( !pArrayOfDeaminationMutationsPositionA ) {
               pArrayOfDeaminationMutationsPositionA =
                  new mbtValOrderedVectorOfRWCString( 5 ); // initial size

               pArrayOfProbsOfDeaminationMutationsPositionA =
                  new RWTValOrderedVector<float>( 5,
                                                  "pArrayOfProbsOfDeaminationMutationsPositionA" );

            }

            pArrayOfDeaminationMutationsPositionA->insert( soLastDeaminationMutation );
            pArrayOfProbsOfDeaminationMutationsPositionA->insert( fFractionOfTreesInOtherColumnSupportingThisAsADeaminationMutation );

         }

         // now set up for next deamination mutation

         soLastDeaminationMutation = soCurrentDeaminationMutation;
         nNumberOfPairsOfTreesWithCurrentDeaminationMutation = 1;
      }
   }


   soLastDeaminationMutation = "";
   
   aDeaminationMutationsPositionB.insert( "sentinel" );

   for( nMutation = 0; nMutation < aDeaminationMutationsPositionB.length();
        ++nMutation ) {
      RWCString soCurrentDeaminationMutation = 
         aDeaminationMutationsPositionB[ nMutation ];

      if ( soCurrentDeaminationMutation == soLastDeaminationMutation ) {
         ++nNumberOfPairsOfTreesWithCurrentDeaminationMutation;
      }
      else {
         // just changed--first finish off last deamination mutation

         if ( !soLastDeaminationMutation.isNull() ) {

            
            float fFractionOfTreesInOtherColumnSupportingThisAsADeaminationMutation =
               (float) nNumberOfPairsOfTreesWithCurrentDeaminationMutation /
               (float) pTreesPositionA->length();  // yep, A

            if ( !pArrayOfDeaminationMutationsPositionB ) {
               pArrayOfDeaminationMutationsPositionB =
                  new mbtValOrderedVectorOfRWCString( 5 ); // initial size

               pArrayOfProbsOfDeaminationMutationsPositionB =
                  new RWTValOrderedVector<float>( 5,
                                                  "pArrayOfProbsOfDeaminationMutationsPositionB" );
            }

            pArrayOfDeaminationMutationsPositionB->insert( 
                soLastDeaminationMutation );

            pArrayOfProbsOfDeaminationMutationsPositionB->insert( 
                fFractionOfTreesInOtherColumnSupportingThisAsADeaminationMutation );

         }

         // now set up for next deamination mutation

         soLastDeaminationMutation = soCurrentDeaminationMutation;
         nNumberOfPairsOfTreesWithCurrentDeaminationMutation = 1;
      }
   }


} // void Contig :: checkTreesAndRecordDeaminationMutations( 


// void Contig :: checkTreesAndRecordDeaminationMutations( 
//                   const RWCString& soTreePositionA,
//                   const RWCString& soTreePositionB,

//                   mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionA,
//                   mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionB ) {


//    mbtValOrderedVectorOfRWCString aDeaminationMutationsPositionA;
//    mbtValOrderedVectorOfRWCString aDeaminationMutationsPositionB;


//    RWCString soTreeA = soTreePositionA;
//    RWCString soTreeB = soTreePositionB;

//    // just for backward compatibility
//    int nTreeA = 0;
//    int nTreeB = 0;
   

//    // get rid of ? for now

//    soTreeA.removeSingleCharacterAllOccurrences('?');
//    soTreeA.removeSingleCharacterAllOccurrences('(');
//    soTreeA.removeSingleCharacterAllOccurrences(')');
//    soTreeA.removeSingleCharacterAllOccurrences(',');
      
//    soTreeB.removeSingleCharacterAllOccurrences( '?' );
//    soTreeB.removeSingleCharacterAllOccurrences( '(' );
//    soTreeB.removeSingleCharacterAllOccurrences( ')' );
//    soTreeB.removeSingleCharacterAllOccurrences( ',' );


//    // now we have a pair of trees, soTreeA and soTreeB

//    // abcdef
//    // ^top internal node, leading to MMul
//    //  ^ next internal node, leading to PPyg
//    //   ^ next internal node, leading to GGor
//    //    ^ next internal node, leading to HSap, I hope!
//    //     ^ next internal node, leading to PTro
//    //      ^ PPan
//    //       ^ PTro
//    //        ^ GGor
//    //    etc.

//    // g(g(g(g(g,g),g),a),g?,g)
//    // is changed to
//    // gggggggagg
//    // a(c(c(c(*,c),c),c),a,*)
//    // is changed to:
//    // accc*ccca*
//    // ^ node leading to MMul and PPyg
//    //  ^ node leading to GGor
//    //   ^ node leading to HSap
//    //    ^ node leading to PTro and PPan
//    //     ^ PPan
//    //      ^ PTro
//    //       ^ HSap
//    //        ^ GGor
//    //         ^ PPyg
//    //          ^ MMul



//    // so there are 4 ancestral nodes to be concerned about

//    // it turns out (lucky?) that this nInternalNode actually
//    // is the same as that in soGetBranchNameWithoutTree and
//    // is also the string position in the tree.  There is 
//    // no nInternalNode == -1 here, though.
//    // 0 refers to the internal node where PPyg and MMul come together.

//    for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
//         ++nInternalNode ) {

//       // notice that "nInternalNode" refers to the string position
//       // within the tree.  So 0 is the top node and 4 is the node
//       // going to PPan and PTro

//       RWCString soAncestralSequence = 
//          RWCString( soTreeA[ nInternalNode ] ) + 
//          RWCString( soTreeB[ nInternalNode ] );


//       // special case of MMul in which the deamination 
//       // mutation occurs on the short part of the branch
//       // that goes down to the left to the top internal node.
//       // In such a case, the MMul present day base is
//       // actually older than the top internal node.
//       // Thus we look for a CpG at the MMul present day bases
//       // and some deamination mutation at the top internal node.


//       if ( nInternalNode == 0 ) {
//          int nIndexOfPresentDayBases = 9;

//          RWCString soPresentDayBases =
//             RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
//             RWCString( soTreeB[ nIndexOfPresentDayBases ] );

//          if ( soPresentDayBases == "cg" ) {
//             if ( soAncestralSequence == "ca" ) {
//                aDeaminationMutationsPositionB.insert(
//                                                      soGetBranchNameWithTree( -1,
//                                                                               nDownToRight,
//                                                                               nTreeB ) );
//             }
//             else if ( soAncestralSequence == "tg" ) {
//                aDeaminationMutationsPositionA.insert(
//                                                      soGetBranchNameWithTree( -1,
//                                                                               nDownToRight,
//                                                                               nTreeA ) );
//             }
//          }
//       }
                     

//       if ( soAncestralSequence != "cg" ) continue;

//       // if reached here, ancestral sequence is a CpG
//       // is there a mutation from this position?

//       // first, check if there is a mutation down-right to
//       // the corresponding present-day node

//       int nIndexOfPresentDayBases =
//          nSubtractInternalNodeFromThisToGetPresentDayIndex - 
//          nInternalNode;

//       RWCString soPresentDayBases =
//          RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
//          RWCString( soTreeB[ nIndexOfPresentDayBases ] );

//       if ( soPresentDayBases == "ca" ) {

//          aDeaminationMutationsPositionB.insert( 
//                                                soGetBranchNameWithTree( nInternalNode, nDownToRight,
//                                                                         nTreeB ) );
               
//       }
//       else if ( soPresentDayBases == "tg" ) {

//          aDeaminationMutationsPositionA.insert(
//                                                soGetBranchNameWithTree( nInternalNode, nDownToRight,
//                                                                         nTreeA )
//                                                );
//       }


//       if ( nInternalNode == 0 ) {
               
//          // case of top internal node, leading to both MMul and PPyg
//          // we've already considered the branch leading to PPyg (above)
//          // in which nIndexOfPresentDayBases = 8
//          // Now consider the branch leading to MMul, with
//          // nIndexOfPresentDayBases = 9

//          nIndexOfPresentDayBases = 
//             nSubtractInternalNodeFromThisToGetPresentDayIndex + 1;

//          soPresentDayBases =
//             RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
//             RWCString( soTreeB[ nIndexOfPresentDayBases ] );

//          if ( soPresentDayBases == "ca" ) {

//             aDeaminationMutationsPositionB.insert( 
//                                                   soGetBranchNameWithTree( -1, nDownToRight, nTreeB ) 
//                                                   );
//          }
//          else if ( soPresentDayBases == "tg" ) {
                  
//             aDeaminationMutationsPositionA.insert(
//                                                   soGetBranchNameWithTree( -1, nDownToRight, nTreeA )
//                                                   );
//          }
//       }



//       // if nInternalNode is 0-3, the next node is the
//       // descendant internal node.  if nInternalNode is
//       // 4, the next node is PPan

//       RWCString soDescendantInternalNode =
//          RWCString( soTreeA[ nInternalNode + 1 ] ) +
//          RWCString( soTreeB[ nInternalNode + 1 ] );

//       if ( soDescendantInternalNode == "ca" ) {

//          aDeaminationMutationsPositionB.insert(
//                                                soGetBranchNameWithTree( nInternalNode, nOnBackbone, nTreeB )
//                                                );
//       }
//       else if ( soDescendantInternalNode == "tg" ) {

//          aDeaminationMutationsPositionA.insert(
//                                                soGetBranchNameWithTree( nInternalNode, nOnBackbone, nTreeA ) 
//                                                );
//       }

//    } //         for( int nInternalNode = 0; nInternalNode < 5; 
               



//    if ( aDeaminationMutationsPositionA.length() > 0 &&
//         !pArrayOfDeaminationMutationsPositionA ) {

//       pArrayOfDeaminationMutationsPositionA =
//          new mbtValOrderedVectorOfRWCString( 5 ); // initial size
//    }

//    int nDeam;
//    for( nDeam = 0; nDeam < aDeaminationMutationsPositionA.length();
//         ++nDeam ) {
//       pArrayOfDeaminationMutationsPositionA->insert( aDeaminationMutationsPositionA[ nDeam ] );
//    }

//    if ( aDeaminationMutationsPositionB.length() > 0 &&
//         !pArrayOfDeaminationMutationsPositionB ) {
//       pArrayOfDeaminationMutationsPositionB =
//          new mbtValOrderedVectorOfRWCString( 5 ); // initial size
//    }

//    for( nDeam = 0; nDeam < aDeaminationMutationsPositionB.length();
//         ++nDeam ) {
//       pArrayOfDeaminationMutationsPositionB->insert( aDeaminationMutationsPositionB[ nDeam ] );
//    }

// }



void Contig :: countMutationsAtConsPos(
                  const RWCString& soOneTree2,
                  const RWCString& soPresentDayBases,
                  const int nConsPos,
                  const RWCString& soDeaminationMutations ) {



   RWCString soOneTree = soOneTree2;

   float fRoughMutationCount = 0.0;

   assert( !soOneTree.isNull() );

   mbtValOrderedVectorOfRWCString aListOfMutations;


   soOneTree.removeSingleCharacterAllOccurrences( '?' );
   soOneTree.removeSingleCharacterAllOccurrences( '(' );
   soOneTree.removeSingleCharacterAllOccurrences( ')' );
   soOneTree.removeSingleCharacterAllOccurrences( ',' );

   // now look for mutations

   // nInternal node is the string offset and also that used in
   // soGetBranchNameWithoutTree but without using -1

   // notice that "nInternalNode" refers to the string position
   // within the tree.  So 0 is the top node and 3 is the node
   // going to PPan and PTro.  Node 1 leads to GGor, node 2 leads
   // to HSap.

   for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
        ++nInternalNode ) {

      // notice that "nInternalNode" refers to the string position
      // within the tree.  So 0 is the top node and 3 is the node
      // going to PPan and PTro

      char cAncestralBase = soOneTree[ nInternalNode ];
         
      // there are two paths--one to the next ancestral node,
      // the other to a present-day node.  (In the case of the 0th 
      // node, there is another path--that to MMul.)

      int nIndexOfPresentDayBases = 
         nSubtractInternalNodeFromThisToGetPresentDayIndex - nInternalNode;

      char cPresentDayBase = soOneTree[ nIndexOfPresentDayBases ];

      char cNextAncestralBaseOrPPan = soOneTree[ nInternalNode + 1 ];


      // first consider the branch on the backbone


      RWCString soBackboneBranchName = soGetBranchNameWithoutTree( nInternalNode, 
                                                                   nOnBackbone );

      int nIndexOfBackboneBranch = nGetIndexForBranch( nInternalNode, nOnBackbone );

      assert( soBackboneBranchName ==
              aArrayOfNamesOfBranches[ nIndexOfBackboneBranch ] );

      if ( ! ( cAncestralBase == '*' && cNextAncestralBaseOrPPan == '*' ) ) {

         bool bIsDeaminationMutationn = 
            bIsDeaminationMutation( nInternalNode,
                                    nOnBackbone,
                                    soDeaminationMutations );


         if ( bIsDeaminationMutationn ) {
            aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexOfBackboneBranch ]
               += 1;
         }
         else {
            (*(pArraysAtBranch[ cAncestralBase ][ cNextAncestralBaseOrPPan ] ))[
                                                                             nIndexOfBackboneBranch ]
               += 1.0;

            // note:  the above includes non-mutations and indel mutations,
            // as well as the 12 substitutions

         }



         // just for sanity check
         if ( cAncestralBase != cNextAncestralBaseOrPPan ) {
            fRoughMutationCount += 1.0;

            if ( cAncestralBase != '*' &&
                 cNextAncestralBaseOrPPan != '*'  ) {


               RWCString soLocus = 
                  ConsEd::pGetAssembly()->filGetAceFileFullPathname().soGetProjectBeforeEditDir();

               fprintf( pAO, "mutation %s %15s %4d %s %c %s %c\n",
                        soBackboneBranchName.data(),
                        soLocus.data(),
                        nUnpaddedIndex( nConsPos ),
                        soPresentDayBases.data(),
                        cTransitionsTransversions[cAncestralBase][ cNextAncestralBaseOrPPan],
                        ( bIsDeaminationMutationn ? "yes_deam" : "no_deam"),
                        toupper( ntGetCons( nConsPos ).cGetBase() ) );
            } //   if ( nInternalNode == 0 ) {


         }
         // end sanity check
            
      }


      // now consider the branch down and to right

      int nIndexForBranchDownAndToRight =
         nGetIndexForBranch( nInternalNode, nDownToRight );

      RWCString soBranchNameDownAndToRight =
         soGetBranchNameWithoutTree( nInternalNode, nDownToRight );

      assert( soBranchNameDownAndToRight ==
              aArrayOfNamesOfBranches[ nIndexForBranchDownAndToRight ] );


      fprintf( pAO, "branch: %s %d %c %c 1\n",
               soBranchNameDownAndToRight.data(),
               nUnpaddedIndex( nConsPos ),
               cAncestralBase,
               cPresentDayBase );


      if ( !( cAncestralBase == '*' && cPresentDayBase == '*' ) ) {


         bool bIsDeaminationMutationn =
            bIsDeaminationMutation( nInternalNode,
                                    nDownToRight,
                                    soDeaminationMutations );


         if ( bIsDeaminationMutationn ) {
            aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexForBranchDownAndToRight ]
               += 1;
         }
         else {
            (*(pArraysAtBranch[ cAncestralBase ][ cPresentDayBase ] ))[ 
                                                                    nIndexForBranchDownAndToRight ]
               += 1.0;
         }

         // sanity check
         if ( cAncestralBase != cPresentDayBase ) {
            fRoughMutationCount += 1.0;
               
            if ( cAncestralBase != '*' && cPresentDayBase != '*' ) {


               RWCString soLocus = 
                  ConsEd::pGetAssembly()->filGetAceFileFullPathname().soGetProjectBeforeEditDir();

               fprintf( pAO, "mutation %s %15s %4d %s %c %s %c\n",
                        soBranchNameDownAndToRight.data(),
                        soLocus.data(),
                        nUnpaddedIndex( nConsPos ),
                        soPresentDayBases.data(),
                        cTransitionsTransversions[cAncestralBase][cPresentDayBase],
                        ( bIsDeaminationMutationn ? "yes_deam" : "no_deam" ),
                        toupper( ntGetCons( nConsPos ).cGetBase() ) );
            } 

         }
         // end sanity check
      }

      // one more branch to consider:  that going to MMul

      if ( nInternalNode == 0 ) {
            
         char cMMulBase = soOneTree[ nSubtractInternalNodeFromThisToGetPresentDayIndex + 1];

         int nIndexForMMulBranch = nGetIndexForBranch( -1, 0 );

         if ( !( cAncestralBase == '*' && cMMulBase == '*' ) ) {
               
            // mutation.  could it be a deamination mutation?

            bool bIsDeaminationMutationn =
               bIsDeaminationMutation( -1, // nInternalNode for MMul branch
                                       nDownToRight,
                                       soDeaminationMutations );

            if ( bIsDeaminationMutationn ) {
               aArrayOfNumberOfDeaminationMutationsAtBranch[ nIndexForMMulBranch ]
                  += 1;
            }
            else {

               (*(pArraysAtBranch[ cAncestralBase ][ cMMulBase ] ) )[
                                                                  nIndexForMMulBranch ]
                  += 1.0;
            }


            // sanity check
            if ( cAncestralBase != cMMulBase ) {
               fRoughMutationCount += 1.0;


               RWCString soLocus = 
                  ConsEd::pGetAssembly()->filGetAceFileFullPathname().soGetProjectBeforeEditDir();

               fprintf( pAO, "mutation MMul_ex %15s %4d %s %c %s %c\n",

                        soLocus.data(),
                        nUnpaddedIndex( nConsPos ),
                        soPresentDayBases.data(),
                        cTransitionsTransversions[cAncestralBase][cPresentDayBase],
                        ( bIsDeaminationMutationn ? "yes_deam" : "no_deam" ),
                        toupper( ntGetCons( nConsPos ).cGetBase() ) );







            }
            // end sanity check

         } //             if ( !( cAncestralBase == '*' && cMMulBase == '*'
      } //   if ( nInternalNode == 0 ) {
   } //       for( int nInternalNode = 0; nInternalNode < 5; ...


   // sanity check on the number of mutations

   int nNumberOfDistinctBases = 
      nGetNumberOfDistinctBasesNotNs( soPresentDayBases );


   float fDiff = fabs( nNumberOfDistinctBases - 1 - fRoughMutationCount );


   if ( fDiff > 0.001 ) {

      fprintf( pAO, "sanity check failed: %d distinct bases %.6f rough mutations %s %s %d\n",
            nNumberOfDistinctBases,
            fRoughMutationCount,
            soPresentDayBases.data(),
            soGetName().data(),
            nUnpaddedIndex( nConsPos ) );
   }


   nEstimatedTotalMutations += ( nNumberOfDistinctBases - 1 );
   ++nConsPosConsidered;
} // countMutationsAtConsPos(


                              



void Contig :: printMutationsWithContext( autoReport* pAutoReport ) {

   setTransitionsTransversionsTableIfNecessary();
   initializeArrays();

   // this differs from printFlankedColumns3 in that it records the
   // trees and attempts to identify which mutations are deamination
   // mutations

   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }


   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );

   //   aGoodReads.resort();  inefficient--just check every one


   // this will change when we delete columns

   int nPaddedContigLength = nGetPaddedConsLength();

   mbtValVectorOfBool aSomeCombinedReadHasNoPad( nPaddedContigLength,
                                                 1,
                                                 "Contig::aSomeCombinedReadHasNoPad" );

   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   // the point of the look below is to set aSomeCombinedReadHasNoPad
   // and to set the single signal arrays (aListOfSingleSignalArrays)

   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;
      
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // If we are in a contig that doesn't have a human read (rare, and
      // pathological), ignore this contig.

      if ( !pHumanRead ) return;


      // single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;

         if ( n == 0 )
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;

         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );
         aListOfSingleSignalArrays.insert( pSingleSignalArray );

         if ( n == 0 )
            pForwardReadSingleSignalArray = pSingleSignalArray;
         else
            pReverseReadSingleSignalArray = pSingleSignalArray;
      }



      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         if ( !pHumanRead->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         if ( cCombinedBase != '*' )
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
      }
   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_...


   // remove columns in which all combined reads have a pad

   // first do it to single signal arrays
   int nRead;
   for( nRead = 0; nRead < aListOfSingleSignalArrays.length();
        ++nRead ) {

      MBTValOrderedOffsetVector<char>* pSingleSignalArray =
         aListOfSingleSignalArrays[ nRead ];

      for( int nConsPos = nGetEndConsensusIndex();
           nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
         
         if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
            pSingleSignalArray->removeAt( nConsPos );
         }
      }
   } //  for( int nRead = 0;

   int nConsPos;
   for( nConsPos = nGetEndConsensusIndex();
        nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
      if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
         deleteColumn( nConsPos, false );
      }
   }

   // new padded length after deleting columns

   nPaddedContigLength = nGetPaddedConsLength();

   // create a combined read, one for each species
   // each combined read will be the length of the entire consensus

   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      RWCString soCombinedReadName = soSpecies + ".comb";
      
      LocatedFragment* pCombinedRead =
         new LocatedFragment( soCombinedReadName,
                              1, // aligned position within contig
                              false, // bComplemented
                              this ); // contig

      aCombinedReads[ nSpecies ] = pCombinedRead;

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         pCombinedRead->appendNtide( Ntide( 'n', 0 ) );
      }
   }




   // Now create bit arrays, one for each species


   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteria( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesMeetingCriteria" );

   mbtValVector<unsigned char> aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesAndAgreeingWithHuman" );

   mbtValVector<RWCString> aProblemsWithSpeciesAtConsPos( 
                                      nGetPaddedConsLength(),
                                      1, // starts at 1
                                      "iiiii", // initial value, "i" for initial
                                      "Contig::aProblemsWithSpeciesAtConsPos"
                                      );

   mbtValVector<RWCString> aProblemsForFlankingWithSpeciesAtConsPos( 
                                      nGetPaddedConsLength(),
                                      1, // starts at 1
                                      "iiiii", // initial value, "i" for initial
                                      "Contig::aProblemsForFlankingWithSpeciesAtConsPos"
                                      );

   mbtValVector<RWCString> aProblemsIncludingFlankingWithSpeciesAtConsPos( 
                                      nGetPaddedConsLength(),
                                      1, // starts at 1
                                      "iiiii", // initial value, "i" for initial
                                      "Contig::aProblemsWithSpeciesAtConsPos"
                                      );


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pCombinedRead = aCombinedReads[ nSpecies ];
      assert( pCombinedRead->soGetName().bContains( soSpecies ) );


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read

         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // case in which both forward and reverse reads are not good-matching
      
      if ( !pForwardRead && !pReverseRead ) {

         for( int nConsPos = nGetStartConsensusIndex();
              nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

            ( aProblemsWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ]
               = cProblemNoGoodRead;
         }
         
         continue; // next species
      }


      // If we are in a contig that doesn't contain a human read,
      // ignore the contig

      if ( !pHumanRead ) return;

      // find single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      for( int nSingleSignalArray = 0;
           nSingleSignalArray < aListOfReadsInSingleSignalArrays.length();
           ++nSingleSignalArray ) {

         if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
              pForwardRead ) {
            
            pForwardReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
         else if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
                   pReverseRead ) {
            pReverseReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
      }

      if ( pForwardRead ) {
         assert( pForwardReadSingleSignalArray );
      }

      if ( pReverseRead ) {
         assert( pReverseReadSingleSignalArray );
      }

      // at this point, we have whiever reads are available for that
      // species and their single-signal arrays

      unsigned char ucSpeciesMask = ucGetMaskForSpecies( soSpecies );

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         
         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         char cCombinedBase = 0;
         int nCombinedQuality = -666;
         char cProblem = ' ';

         bGetCombinedReadAtBaseAndReportProblem(
                                       cNotForFlanking,
                                       pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality,
                                       cProblem
                                       );


         ( aProblemsWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ] =
               cProblem;

         pCombinedRead->Sequence::setBaseAtPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                cCombinedBase );

         pCombinedRead->Sequence::setQualityAtSeqPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                nCombinedQuality );

         if ( cProblem == cProblemNone ) {
            aMaskOfSpeciesMeetingCriteria[nConsPos] |= ucSpeciesMask;
         }

         bGetCombinedReadAtBaseAndReportProblem(
                                       cForFlanking,
                                       pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality,
                                       cProblem
                                       );



         if ( cProblem == cProblemNone ) {


            // agreeing pads do not count as flanking bases.  To be
            // sure, explicitly check for this.  (Even though we deleted
            // columns of pads, the might be some columns not removed
            // that have a pad in human and some species but not others.)

            if ( ( cCombinedBase == ntGetCons( nConsPos ).cGetBase() ) &&
                 !ntGetCons( nConsPos ).bIsPad() ) {
               aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPos ] 
                  |= ucSpeciesMask;
            }
            else {
               cProblem = cProblemFlankingNotAgreeing;
            }
         }


         ( aProblemsForFlankingWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ] =
               cProblem;

      } //  for( int nConsPos = nGetStartConsensusIndex();
   } //    for( nSpecies = 0; nSpecies < ...


   // now we have finished setting 3 key arrays:
   // aCombinedReads
   // aMaskOfSpeciesMeetingCriteria
   // aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman

   // with these, we can figure out the flanked columns and
   // print out the bases in those columns



   mbtValVector<unsigned char> aAcceptableSpeciesAtConsPos( nGetPaddedConsLength(),
                                                    1, // starting position
                                                    0, // initial value
                                                    "aAcceptableSpeciesAtConsPos" );



   int nConsPosOfStartingColumn;
   for( nConsPosOfStartingColumn = nGetStartConsensusIndex(); 
        nConsPosOfStartingColumn <= nGetEndConsensusIndex() - 2; 
        ++nConsPosOfStartingColumn ) {

      int nConsPosOfEndingColumn = nConsPosOfStartingColumn + 2;

      // only interested in cases in which the 2 flanking columns which
      // some species in common.  Otherwise there is not a single
      // species meeting the flanking criteria.

      if (  aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
            aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ] ) {

         int nConsPos = nConsPosOfStartingColumn + 1;

         // we have a pair of flanking columns.  However, the
         // columns between the flanking columns may not 
         // be ok.  For example, it might be that the flanking
         // gorilla is single signal, but one of the internal
         // columns doesn't have single signal for gorilla.  
         // I think in such a case, we could use the columns
         // that are ok, but just not use the column in which
         // gorilla is not ok.  We should not use any species
         // that isn't supported by the flanking columns, though.

         unsigned char ucFlankingColumnSpecies =
            aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
            aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ] ;

         assert( ucFlankingColumnSpecies );

         if ( ucFlankingColumnSpecies & aMaskOfSpeciesMeetingCriteria[ nConsPos ] ) {

            unsigned char ucAcceptableSpeciesAtThisColumn =
                        aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                        ucFlankingColumnSpecies;

            aAcceptableSpeciesAtConsPos[ nConsPos ] =
               ucAcceptableSpeciesAtThisColumn;

            // the other species at this position can fail in 2 ways:
            // no flanking bases or not meeting criteria at this position
            // no flanking bases can be due to failing the flanking criteria
            // (e.g., in low quality segment) or not agreeing with human

            // is this the point at which we should record this info?


            for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

               RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
               unsigned char ucSpeciesMask = 
                  ucGetMaskForSpecies( soSpecies );

               char cProblem = ' ';

               if ( ucAcceptableSpeciesAtThisColumn & ucSpeciesMask ) {
                  cProblem = cProblemNone;
               }
               else if ( !( aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                            ucSpeciesMask ) ) {
                  // this base/species has a problem

                  cProblem = 
                     ( aProblemsWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ];
               }
               else {
                  // so the base itself is ok.  So the flanking
                  // bases must have a problem with them

                  assert( !( ucFlankingColumnSpecies & ucSpeciesMask ) );

                  // fails flanking.  Does it fail due to the flanking
                  // bases not agreeing with human or due to them not
                  // not meeting flanking criteria?

                  if ( ( aProblemsForFlankingWithSpeciesAtConsPos[ nConsPos - 1 ] )[ nSpecies ] != cProblemNone ) {
                     cProblem = ( aProblemsForFlankingWithSpeciesAtConsPos[ nConsPos - 1 ] )[ nSpecies ];
                  }
                  else if (  ( aProblemsForFlankingWithSpeciesAtConsPos[ nConsPos + 1 ] )[ nSpecies ] != cProblemNone ) {
                     cProblem = ( aProblemsForFlankingWithSpeciesAtConsPos[ nConsPos + 1 ] )[ nSpecies ];
                  }
                  else {
                     assert( false );
                  }

                  // if the problem is with a flanking base, we don't 
                  // report the specific problem, but rather that the
                  // problem was with the flanking base


                  if ( cProblem != cProblemFlankingNotAgreeing ) {
                     cProblem = cProblemFlankingOther;
                  }
               }


               ( aProblemsIncludingFlankingWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ] =
                  cProblem;


            } //   for( int nSpecies = 0; nSpecies <

         } //  if ( ucFlankingColumnSpecies & aMaskOfSpeciesMeetingCriteria[ 
      } //        if (  aMaskOfSpeciesMeetingFlankingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ...
   } //    for( int nConsPosOfStartingColumn = nGetStartConsensusIndex(); 


   // now get ancestral trees and hence deamination mutations


   RWTPtrVector<arrayOfRWCString> aArrayOfTreesAtEachConsPos( 
          nPaddedContigLength + 1, 
          // + 1so we can use nConsPos (1-based) directly as an index
          0,
          "aArrayOfTreesAtEachConsPos" );


   RWTValVector<RWCString> aArrayOfBasesAtEachConsPos( 
          nPaddedContigLength + 1,
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfBasesAtEachConsPos" );


    for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( ! aAcceptableSpeciesAtConsPos[ nConsPos ] ) continue;


      RWCString soColumnOfBases( (size_t) 11 ); // 6 bases and 5 spaces

      
      soColumnOfBases = ntGetCons( nConsPos ).cGetBase(); // start with human

      for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

         RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
         unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

         char cBase;

         if ( ucSpeciesMask & aAcceptableSpeciesAtConsPos[ nConsPos ] ) {

            // if reached here, soSpecies is a flanked species
            // which, at this columns, meets the criteria
                        
            // so print out the base

            LocatedFragment* pCombinedRead =
               aCombinedReads[ nSpecies ];

            assert( pCombinedRead );

            cBase = pCombinedRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
                           
         }
         else {
            // always the same number of bases on each line
            cBase = 'n';
         }

         soColumnOfBases += cBase;

      }  //  for( int nSpecies = 0; nSpecies <


      aArrayOfBasesAtEachConsPos[ nConsPos ] = soColumnOfBases;


      RWTValOrderedVector<RWCString> aTrees;

      findAncestralTrees( soColumnOfBases, aTrees );

      arrayOfRWCString* pTrees = new
         RWTValOrderedVector<RWCString>( aTrees );

      aArrayOfTreesAtEachConsPos[ nConsPos ] = pTrees;

   } //    for( nConsPos = nGetStartConsensusIndex(); 


   // record all deamination mutations in these arrays

   RWTPtrVector<mbtValOrderedVectorOfRWCString> 
      aArrayOfDeaminationMutationsAtEachConsPos( 
          nPaddedContigLength + 1, // so can
          // use nConsPos directly
          0, // initial 
          "aArrayOfDeaminationMutationsAtEachConsPos" );

   typedef RWTValOrderedVector<float> arrayOfFloat;

   RWTPtrVector<arrayOfFloat>
      aArrayOfProbabilitiesOfDeaminationMutationsAtEachConsPos(
           nPaddedContigLength + 1,  // so can use nConsPos directly
           0, // initial value
           "aArrayOfProbabilitiesOfDeaminationMutationsAtEachConsPos" );

   for( nConsPos = nGetStartConsensusIndex() + 1; 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( !aArrayOfTreesAtEachConsPos[ nConsPos - 1 ] ) continue;
      if ( !aArrayOfTreesAtEachConsPos[ nConsPos ] ) continue;


      checkTreesAndRecordDeaminationMutations( 
                 aArrayOfTreesAtEachConsPos[ nConsPos - 1 ],
                 aArrayOfTreesAtEachConsPos[ nConsPos ],
                 aArrayOfDeaminationMutationsAtEachConsPos[ nConsPos - 1 ],
                 aArrayOfDeaminationMutationsAtEachConsPos[ nConsPos ],
                 aArrayOfProbabilitiesOfDeaminationMutationsAtEachConsPos[ nConsPos - 1 ],
                 aArrayOfProbabilitiesOfDeaminationMutationsAtEachConsPos[ nConsPos ] );

   }



   // now used aAcceptableSpeciesAtConsPos to print out which
   // species will be used for each position


   for( nConsPos = nGetStartConsensusIndex() + 1; 
        nConsPos <= nGetEndConsensusIndex() - 1; 
        ++nConsPos ) {

      if ( ! aAcceptableSpeciesAtConsPos[ nConsPos ] ) continue;

      RWCString soColumnOfBases( (size_t) 6 ); // 6 bases and no spaces
      RWCString soColumnOfProblems( (size_t) 5 ); // 5 non-human

      
      soColumnOfBases = ntGetCons( nConsPos ).cGetBase(); // start with human

      char cHumanBase = ntGetCons( nConsPos ).cGetBase();
      

      for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

         RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
         unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

         LocatedFragment* pCombinedRead =
            aCombinedReads[ nSpecies ];

         assert( pCombinedRead );

         char cBase = pCombinedRead->ntGetFragFromConsPos( nConsPos ).cGetBase();

         soColumnOfProblems += 
               ( aProblemsIncludingFlankingWithSpeciesAtConsPos[ nConsPos ] )[ nSpecies ];

         soColumnOfBases += cBase;


      }  //  for( int nSpecies = 0; nSpecies <


      // now look for any deamination mutations


      RWTValOrderedVector<float>*
         pArrayOfProbabilitiesOfDeaminationMutationsAtEachSpecies = 
         aArrayOfProbabilitiesOfDeaminationMutationsAtEachConsPos[ nConsPos ];

      RWCString soDeaminationMutationFlags;

      bool bDeam = false;
      if ( pArrayOfProbabilitiesOfDeaminationMutationsAtEachSpecies ) {

         for( int nDeam = 0; 
              nDeam < pArrayOfProbabilitiesOfDeaminationMutationsAtEachSpecies->length();
              ++nDeam ) {

            if ( (*pArrayOfProbabilitiesOfDeaminationMutationsAtEachSpecies)[ nDeam ] >= 0.5 ) {
               bDeam = true;
               break;
            }
         }
      }
           

         
      char cBaseBefore = ntGetCons( nConsPos - 1 ).cGetBase();
      char cBaseAfter  = ntGetCons( nConsPos + 1 ).cGetBase();


      fprintf( pAO, "%c%c %s e%s %4d %s\n",
               cBaseBefore,
               cBaseAfter,
               soColumnOfBases.data(),
               soColumnOfProblems.data(),
               nUnpaddedIndex( nConsPos ),
               ( bDeam ? "deam  " : "nodeam" )

               );

   } //    for( nConsPos = nGetStartConsensusIndex(); 


}


void Contig :: printCrudeChimpHumanMutations( autoReport* pAutoReport ) {

   setTransitionsTransversionsTableIfNecessary();

   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   mbtValVector<unsigned char> aSpeciesInAcceptableColumns( 1, // will be changed
                                                    1, // starting position
                                                    0, // initial value
                                                    "aSpeciesInAcceptableColumns" );


   RWTValVector<RWCString> aArrayOfOneTreeAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfOneTreeAtEachConsPos" );

   RWTValVector<RWCString> aArrayOfBasesAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfBasesAtEachConsPos" );

   RWTValVector<RWCString> aArrayOfLenientlyFilteredBasesAtEachConsPos(
          1, // size--will be changed
          // + 1 so we can use nConsPos (1-based) directly as an index
          "", // initial value
          "aArrayOfLenientlyFilteredBasesAtEachConsPos" );


   RWTValVector<RWCString> aDeaminationMutationsAtEachConsPos( 
          1, // size--will be changed
          "", // initial value
          "aDeaminationMutationsAtEachConsPos" );

   RWTValVector<char> aAncestralCpGAtEachConsPos(
          1,// size--will be changed
          cNoCpG, // initial value
          "aAncestralCpGAtEachConsPos" );


   // this is different than above in that if a CpG occurs, both
   // columns are marked, rather than just the first column

   RWTValVector<char> aAncestralCpGAtEachConsPos2(
          1,// size--will be changed
          cNoCpG, // initial value
          "aAncestralCpGAtEachConsPos" );



   LocatedFragment* pHumanRead;

   applyFilters( pAutoReport,
                 aCombinedReads,
                 aSpeciesInAcceptableColumns,
                 aArrayOfOneTreeAtEachConsPos,
                 aArrayOfBasesAtEachConsPos,
                 aArrayOfLenientlyFilteredBasesAtEachConsPos,
                 aDeaminationMutationsAtEachConsPos,
                 aAncestralCpGAtEachConsPos2,
                 pHumanRead );



   RWCString soLocus = 
      ConsEd::pGetAssembly()->filGetAceFileFullPathname().soGetProjectBeforeEditDir();

   int nConsPos;
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( aArrayOfOneTreeAtEachConsPos[ nConsPos ].isNull() ) continue;

      // for comparison with Graham's data

      if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
               & ucPTro ) ) continue;
      
      if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
               & ucMMul ) ) continue;
         
      if ( aAncestralCpGAtEachConsPos2[ nConsPos ] != cNoCpG ) 
         continue;

         
      // end for comparison with Graham's data

      RWCString soBases = aArrayOfBasesAtEachConsPos[ nConsPos ];

      char cPTro = soBases[nPTro];
      char cHSap = soBases[nHSap];
      char cMMul = soBases[nMMul];

      // can't exclude it here, since some of these do not have indels
      // in Grahams data.  
      // Exclude it in perl.

      // we are not interested in indels
//       if ( cPTro == '*' ||
//            cHSap == '*' ||
//            cMMul == '*' )  continue;

      //      if ( cPTro == cHSap && cHSap == cMMul ) continue;

      RWCString soBranch;
      char cBase1;
      char cBase2;
      if ( cPTro == cHSap && cHSap == cMMul ) {
         soBranch = "o";
      }
      else if ( cPTro == cMMul ) {
         soBranch = "HSap_ex";
      }
      else if ( cHSap == cMMul ) {
         soBranch = "PTro_ex";
      }
      else {
         // neither human nor chimp matches mmul--not useful for this analysis
         soBranch = "o";
      }

      // change pads from * to -

      soBases.translate( "*", "-" );
      
      fprintf( pAO, "mutation %s %15s %4d %s %c\n",
               soBranch.data(),
               soLocus.data(),
               nUnpaddedIndex( nConsPos ),
               soBases.data(),
               cTransitionsTransversions[cHSap][cPTro] );
   }
}