/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "contig.h"
#include    "consedParameters.h"
#include    "locatedFragment.h"
#include    "bIntersect.h"
#include    "mbtValVectorOfBool.h"
#include    "mbtValVector.h"
#include    "rwtptrorderedvector.h"
#include    "ntide.h"
#include    "fileDefines.h"
#include    "soAddCommas.h"
#include    "rwctokenizer.h"
#include    "autoReport.h"
#include    "consed.h"
#include    "assembly.h"
#include    "nNumberFromBase.h"
#include    "setErrorRateFromQuality.h"
#include    "dErrorRateFromQuality.h"
#include    "singletInfo.h"
#include    "readPHDAgainForHighQualitySegment.h"
#include    <stdlib.h>
#include    <stdio.h>
#include    <dirent.h>
#include    "rwcregexp.h"
#include    "getAllSinglets.h"
#include    "mbt_val_ord_offset_vec.h"
#include    "soLine.h"
#include    "max.h"
#include    <math.h>
#include    "findAncestralTrees.h"
#include    "ucGetMaskForSpecies.h"
#include    "bAllNeededSpecies.h"
#include    "assert.h"
#include    "kimuraBranchProb.h"
#include    "soGetBranchNameWithoutTree.h"


static const RWCString soHCClade("hc" );
static const RWCString soHGClade("hg" );
static const RWCString soCGClade("cg" );


static char cGetAgreeingBasesNoNs( const char cA, const char cB ) {

   if ( cA == cB && cA != 'n' ) {
      return( cA );
   }
   else if ( cA != cB ) {
      if ( cA == 'n' ) 
         return( cB );
      else if ( cB == 'n' )
         return( cA );
      else {
         return( 0 );
      }
   }
   else {
      // bases both equal n
      assert( false );
   }
}


static bool bIsAllPadsOrNs( const RWCString& soBases ) {
   for( int n = 0; n < soBases.length(); ++n ) {
      if ( soBases[n] != '*' &&
           soBases[n] != 'n' ) {
         return false;
      }
   }

   return true;
}


void Contig :: chooseTreesUsingBadData( 
         const RWCString& soColumnOfBadBases2,
         RWTValOrderedVector<RWCString>& aTrees,
         RWTValVector<bool>* pChosenTrees ) {


   // convert soColumnOfBadBases2 to a form with the following order:
   // from:
   // HSap PPan PTro GGor PPyg MMul
   // to this form:
   // PPan PTro HSap GGor PPyg MMul

   char cHSap = soColumnOfBadBases2[0];
   RWCString soColumnOfBadBases = soColumnOfBadBases2.soGetRestOfString( 1 );
   
   soColumnOfBadBases.insert( 2, RWCString( cHSap ) );


   RWTValVector<bool> aTreesMatching( (size_t) aTrees.length(),
                                         false,
                                         "aTreesMatching" );


   int nNumberOfMatches = 0;

   int nTree;
   for( nTree = 0; nTree < aTrees.length(); ++nTree ) {

      if ( !(*pChosenTrees)[ nTree ] ) continue;

      RWCString soTree = aTrees[ nTree ];

      soTree.removeSingleCharacterAllOccurrences('?');
      soTree.removeSingleCharacterAllOccurrences('(');
      soTree.removeSingleCharacterAllOccurrences(')');
      soTree.removeSingleCharacterAllOccurrences(',');

      // looks like:
      // abcdef
      // ^to PPyg and MMul
      //  ^to GGor
      //   ^ to HSap
      //    ^ to PPan and PTro
      //     ^PPan PTro HSap GGor PPyg MMul
      // 01234     5    6    7    8    9

      RWCString soPresentDayBasesFromTree = soTree(4, 6 );

      if ( soColumnOfBadBases.length() != 
           soPresentDayBasesFromTree.length() ) {
         RWCString soError = "bad bases: " +
            soColumnOfBadBases + " not same length as present-day " + 
            soPresentDayBasesFromTree;
         THROW_ERROR( soError );
      }

      bool bMatches = true;
      for( int nSpecies = 0; nSpecies < soColumnOfBadBases.length(); ++nSpecies ) {
         if ( ( soColumnOfBadBases[nSpecies] != 'n' ) &&
              ( soColumnOfBadBases[nSpecies] != soPresentDayBasesFromTree[nSpecies] ) ) {
            bMatches = false;
            break;
         }
      }


      if ( bMatches ) {
         aTreesMatching[ nTree ] = true;
         ++nNumberOfMatches;
      }
   }


   if ( nNumberOfMatches == 0  ) {
      // do not use bad data to choose the tree
      return;
   }
   else {
      for( nTree = 0; nTree < aTreesMatching.length(); ++nTree ) {

         (*pChosenTrees)[ nTree ] = aTreesMatching[ nTree ];
      }
   }
}

            
void Contig :: printFlankedColumns2( autoReport* pAutoReport ) {


   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }


   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );

   //   aGoodReads.resort();  inefficient--just check every one


   // this will change when we delete columns

   int nPaddedContigLength = nGetPaddedConsLength();

   mbtValVectorOfBool aSomeCombinedReadHasNoPad( nPaddedContigLength,
                                                 1,
                                                 "Contig::aSomeCombinedReadHasNoPad" );

   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   // the point of the look below is to set aSomeCombinedReadHasNoPad
   // and to set the single signal arrays (aListOfSingleSignalArrays)

   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;
      
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // If we are in a contig that doesn't have a human read (rare, and
      // pathological), ignore this contig.

      if ( !pHumanRead ) return;


      // single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;

         if ( n == 0 )
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;

         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );
         aListOfSingleSignalArrays.insert( pSingleSignalArray );

         if ( n == 0 )
            pForwardReadSingleSignalArray = pSingleSignalArray;
         else
            pReverseReadSingleSignalArray = pSingleSignalArray;
      }


      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         if ( !pHumanRead->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         if ( cCombinedBase != '*' )
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
      }
   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_...


   // remove columns in which all combined reads have a pad

   // first do it to single signal arrays
   int nRead;
   for( nRead = 0; nRead < aListOfSingleSignalArrays.length();
        ++nRead ) {

      MBTValOrderedOffsetVector<char>* pSingleSignalArray =
         aListOfSingleSignalArrays[ nRead ];

      for( int nConsPos = nGetEndConsensusIndex();
           nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
         
         if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
            pSingleSignalArray->removeAt( nConsPos );
         }
      }
   } //  for( int nRead = 0;

   int nConsPos;
   for( nConsPos = nGetEndConsensusIndex();
        nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
      if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
         deleteColumn( nConsPos, false );
      }
   }

   // new padded length after deleting columns

   nPaddedContigLength = nGetPaddedConsLength();

   // create a combined read, one for each species
   // each combined read will be the length of the entire consensus

   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      RWCString soCombinedReadName = soSpecies + ".comb";
      
      LocatedFragment* pCombinedRead =
         new LocatedFragment( soCombinedReadName,
                              1, // aligned position within contig
                              false, // bComplemented
                              this ); // contig

      aCombinedReads[ nSpecies ] = pCombinedRead;

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         pCombinedRead->appendNtide( Ntide( 'n', 0 ) );
      }
   }


   // Now create bit arrays, one for each species


   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteria( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesMeetingCriteria" );

   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesAndAgreeingWithHuman" );


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pCombinedRead = aCombinedReads[ nSpecies ];
      assert( pCombinedRead->soGetName().bContains( soSpecies ) );


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read

         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }


      // If we are in a contig that doesn't contain a human read,
      // ignore the contig

      if ( !pHumanRead ) return;

      // find single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      for( int nSingleSignalArray = 0;
           nSingleSignalArray < aListOfReadsInSingleSignalArrays.length();
           ++nSingleSignalArray ) {

         if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
              pForwardRead ) {
            
            pForwardReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
         else if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
                   pReverseRead ) {
            pReverseReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
      }

      if ( pForwardRead ) {
         assert( pForwardReadSingleSignalArray );
      }

      if ( pReverseRead ) {
         assert( pReverseReadSingleSignalArray );
      }

      // at this point, we have whiever reads are available for that
      // species and their single-signal arrays

      unsigned char ucSpeciesMask = ucGetMaskForSpecies( soSpecies );

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         
         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         pCombinedRead->Sequence::setBaseAtPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                cCombinedBase );

         pCombinedRead->Sequence::setQualityAtSeqPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                nCombinedQuality );
         
         aMaskOfSpeciesMeetingCriteria[nConsPos] |= ucSpeciesMask;

         // agreeing pads do not count as flanking bases.  To be
         // sure, explicitly check for this.  (Even though we deleted
         // columns of pads, the might be some columns not removed
         // that have a pad in human and some species but not others.)

         if ( ( cCombinedBase == ntGetCons( nConsPos ).cGetBase() ) &&
              !ntGetCons( nConsPos ).bIsPad() ) {
            aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPos ] 
               |= ucSpeciesMask;
         }
      } //  for( int nConsPos = nGetStartConsensusIndex();
   } //    for( nSpecies = 0; nSpecies < ...


   // now we have finished setting 3 key arrays:
   // aCombinedReads
   // aMaskOfSpeciesMeetingCriteria
   // aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman

   // with these, we can figure out the flanked columns and
   // print out the bases in those columns


   mbtValVector<unsigned char> aColumnsAlreadyUsedSpecies( nGetPaddedConsLength(),
                                                    1, // starting position
                                                    0, // initial value
                                                    "aColumnsAlreadyUsedSpecies" );


   int nConsPosOfStartingColumn;
   for( nConsPosOfStartingColumn = nGetStartConsensusIndex(); 
        nConsPosOfStartingColumn <= nGetEndConsensusIndex(); 
        ++nConsPosOfStartingColumn ) {

      
      if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {

         for( int nConsPosOfEndingColumn = nConsPosOfStartingColumn + 2;
              nConsPosOfEndingColumn <=
                 MIN( nConsPosOfStartingColumn + 4, nGetEndConsensusIndex() );
              ++nConsPosOfEndingColumn ) {

            if ( bAllNeededSpecies( 
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ] ) ) {

               // we have a pair of flanking columns.  However, the
               // columns between the flanking columns may not 
               // be ok.  For example, it might be that the flanking
               // gorilla is single signal, but one of the internal
               // columns doesn't have single signal for gorilla.  
               // I think in such a case, we could use the columns
               // that are ok, but just not use the column in which
               // gorilla is not ok.  We should not use any species
               // that isn't supported by the flanking columns, though.

               unsigned char ucFlankingColumnSpecies =
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ];

               // now look at all columns between these flanking columns

               for( int nConsPos = nConsPosOfStartingColumn + 1;
                    nConsPos <= nConsPosOfEndingColumn - 1;
                    ++nConsPos ) {

                  // do not use the same column more than once


                  if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                                          ucFlankingColumnSpecies ) ) {

                     unsigned char ucFlankedSpeciesAtThisColumn =
                        aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                        ucFlankingColumnSpecies;

                     // use this new flanking of the column if:
                     // 1) this column has not been used
                     //    or
                     // 2) the new set of species is a proper superset
                     //    of the old one

                     // the negation of the above is:
                     // 1)  the column has been used
                     //      and
                     // 2)  the new set of species is not a proper superset
                     //    of the old one

                     if ( aColumnsAlreadyUsedSpecies[ nConsPos ] != 0 &&
                          ( ( aColumnsAlreadyUsedSpecies[ nConsPos ] & 
                              ucFlankedSpeciesAtThisColumn ) !=
                            aColumnsAlreadyUsedSpecies[ nConsPos ] ) ) continue;

                     aColumnsAlreadyUsedSpecies[ nConsPos ] |=
                        ucFlankedSpeciesAtThisColumn;


                  } //   if ( bAllNeededSpecies(
               } //  for( int nConsPos = nConsPosOfStartingColumn + 1;
            } // 
         }
      } // if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {
   } //    for( int nConsPosOfStartingColumn = nGetStartConsensusIndex(); 


   fprintf( pAO, "printFlankedColumns {\n" );

   fprintf( pAO, "order of species: HSap" );
   for( nSpecies = 0; nSpecies <
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
      fprintf( pAO, " %s", soSpecies.data() );
   }
   fprintf( pAO, "\n" );


   // now used aColumnsAlreadyUsedSpecies to print out which
   // species will be used for each position

   
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( ! aColumnsAlreadyUsedSpecies[ nConsPos ] ) continue;

      
      fprintf( pAO, "%d %d %c",
               nUnpaddedIndex( nConsPos ),
               nConsPos,
               ntGetCons( nConsPos ).cGetBase() );

      for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

         RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
         unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

         if ( ucSpeciesMask & aColumnsAlreadyUsedSpecies[ nConsPos ] ) {

            // if reached here, soSpecies is a flanked species
            // which, at this columns, meets the criteria
                        
            // so print out the base

            LocatedFragment* pCombinedRead =
               aCombinedReads[ nSpecies ];

            assert( pCombinedRead );
                           
            fprintf( pAO, " %c",
                     pCombinedRead->ntGetFragFromConsPos( nConsPos ).cGetBase() );
         }
         else {
            // always the same number of bases on each line
            fprintf( pAO, " ?" );
         }
      }  //  for( int nSpecies = 0; nSpecies <

      // print contig name (which has the locus name in it
      fprintf( pAO, " %s\n", soGetName().data() );
   }


   fprintf( pAO, "} printFlankedColumns\n" );

}


static bool bNoMutationThisColumn( const RWCString& soBases ) {
   
   char cBase = 0;
   for( int n = 0; n < soBases.length(); ++n ) {
      if ( soBases[n] == 'n' ) continue;

      if ( !cBase ) {
         cBase = soBases[n];
      }
      else {
         if ( cBase != soBases[n] ) {
            return false;
         }
      }
   }

   return true;
}


void Contig :: printFlankedColumns3( autoReport* pAutoReport ) {


   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }


   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );

   //   aGoodReads.resort();  inefficient--just check every one


   // this will change when we delete columns

   int nPaddedContigLength = nGetPaddedConsLength();

   mbtValVectorOfBool aSomeCombinedReadHasNoPad( nPaddedContigLength,
                                                 1,
                                                 "Contig::aSomeCombinedReadHasNoPad" );

   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   // the point of the look below is to set aSomeCombinedReadHasNoPad
   // and to set the single signal arrays (aListOfSingleSignalArrays)

   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;
      
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // If we are in a contig that doesn't have a human read (rare, and
      // pathological), ignore this contig.

      if ( !pHumanRead ) return;


      // single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;

         if ( n == 0 )
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;

         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );
         aListOfSingleSignalArrays.insert( pSingleSignalArray );

         if ( n == 0 )
            pForwardReadSingleSignalArray = pSingleSignalArray;
         else
            pReverseReadSingleSignalArray = pSingleSignalArray;
      }


      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         if ( !pHumanRead->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         if ( cCombinedBase != '*' )
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
      }
   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_...


   // remove columns in which all combined reads have a pad

   // first do it to single signal arrays
   int nRead;
   for( nRead = 0; nRead < aListOfSingleSignalArrays.length();
        ++nRead ) {

      MBTValOrderedOffsetVector<char>* pSingleSignalArray =
         aListOfSingleSignalArrays[ nRead ];

      for( int nConsPos = nGetEndConsensusIndex();
           nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
         
         if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
            pSingleSignalArray->removeAt( nConsPos );
         }
      }
   } //  for( int nRead = 0;

   int nConsPos;
   for( nConsPos = nGetEndConsensusIndex();
        nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
      if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
         deleteColumn( nConsPos, false );
      }
   }

   // new padded length after deleting columns

   nPaddedContigLength = nGetPaddedConsLength();

   // create a combined read, one for each species
   // each combined read will be the length of the entire consensus

   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      RWCString soCombinedReadName = soSpecies + ".comb";
      
      LocatedFragment* pCombinedRead =
         new LocatedFragment( soCombinedReadName,
                              1, // aligned position within contig
                              false, // bComplemented
                              this ); // contig

      aCombinedReads[ nSpecies ] = pCombinedRead;

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         pCombinedRead->appendNtide( Ntide( 'n', 0 ) );
      }
   }


   // Now create bit arrays, one for each species


   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteria( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesMeetingCriteria" );

   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesAndAgreeingWithHuman" );


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pCombinedRead = aCombinedReads[ nSpecies ];
      assert( pCombinedRead->soGetName().bContains( soSpecies ) );


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read

         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }


      // If we are in a contig that doesn't contain a human read,
      // ignore the contig

      if ( !pHumanRead ) return;

      // find single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      for( int nSingleSignalArray = 0;
           nSingleSignalArray < aListOfReadsInSingleSignalArrays.length();
           ++nSingleSignalArray ) {

         if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
              pForwardRead ) {
            
            pForwardReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
         else if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
                   pReverseRead ) {
            pReverseReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
      }

      if ( pForwardRead ) {
         assert( pForwardReadSingleSignalArray );
      }

      if ( pReverseRead ) {
         assert( pReverseReadSingleSignalArray );
      }

      // at this point, we have whiever reads are available for that
      // species and their single-signal arrays

      unsigned char ucSpeciesMask = ucGetMaskForSpecies( soSpecies );

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         
         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         pCombinedRead->Sequence::setBaseAtPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                cCombinedBase );

         pCombinedRead->Sequence::setQualityAtSeqPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                nCombinedQuality );
         
         aMaskOfSpeciesMeetingCriteria[nConsPos] |= ucSpeciesMask;

         // agreeing pads do not count as flanking bases.  To be
         // sure, explicitly check for this.  (Even though we deleted
         // columns of pads, the might be some columns not removed
         // that have a pad in human and some species but not others.)

         if ( ( cCombinedBase == ntGetCons( nConsPos ).cGetBase() ) &&
              !ntGetCons( nConsPos ).bIsPad() ) {
            aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPos ] 
               |= ucSpeciesMask;
         }
      } //  for( int nConsPos = nGetStartConsensusIndex();
   } //    for( nSpecies = 0; nSpecies < ...


   // now we have finished setting 3 key arrays:
   // aCombinedReads
   // aMaskOfSpeciesMeetingCriteria
   // aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman

   // with these, we can figure out the flanked columns and
   // print out the bases in those columns


   mbtValVector<unsigned char> aColumnsAlreadyUsedSpecies( nGetPaddedConsLength(),
                                                    1, // starting position
                                                    0, // initial value
                                                    "aColumnsAlreadyUsedSpecies" );


   int nConsPosOfStartingColumn;
   for( nConsPosOfStartingColumn = nGetStartConsensusIndex(); 
        nConsPosOfStartingColumn <= nGetEndConsensusIndex(); 
        ++nConsPosOfStartingColumn ) {

      
      if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {

         for( int nConsPosOfEndingColumn = nConsPosOfStartingColumn + 2;
              nConsPosOfEndingColumn <=
                 MIN( nConsPosOfStartingColumn + 4, nGetEndConsensusIndex() );
              ++nConsPosOfEndingColumn ) {

            if ( bAllNeededSpecies( 
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ] ) ) {

               // we have a pair of flanking columns.  However, the
               // columns between the flanking columns may not 
               // be ok.  For example, it might be that the flanking
               // gorilla is single signal, but one of the internal
               // columns doesn't have single signal for gorilla.  
               // I think in such a case, we could use the columns
               // that are ok, but just not use the column in which
               // gorilla is not ok.  We should not use any species
               // that isn't supported by the flanking columns, though.

               unsigned char ucFlankingColumnSpecies =
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ];

               // now look at all columns between these flanking columns

               for( int nConsPos = nConsPosOfStartingColumn + 1;
                    nConsPos <= nConsPosOfEndingColumn - 1;
                    ++nConsPos ) {

                  // do not use the same column more than once


                  if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                                          ucFlankingColumnSpecies ) ) {

                     unsigned char ucFlankedSpeciesAtThisColumn =
                        aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                        ucFlankingColumnSpecies;

                     // use this new flanking of the column if:
                     // 1) this column has not been used
                     //    or
                     // 2) the new set of species is a proper superset
                     //    of the old one

                     // the negation of the above is:
                     // 1)  the column has been used
                     //      and
                     // 2)  the new set of species is not a proper superset
                     //    of the old one

                     if ( aColumnsAlreadyUsedSpecies[ nConsPos ] != 0 &&
                          ( ( aColumnsAlreadyUsedSpecies[ nConsPos ] & 
                              ucFlankedSpeciesAtThisColumn ) !=
                            aColumnsAlreadyUsedSpecies[ nConsPos ] ) ) continue;

                     aColumnsAlreadyUsedSpecies[ nConsPos ] |=
                        ucFlankedSpeciesAtThisColumn;


                  } //   if ( bAllNeededSpecies(
               } //  for( int nConsPos = nConsPosOfStartingColumn + 1;
            } // 
         }
      } // if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {
   } //    for( int nConsPosOfStartingColumn = nGetStartConsensusIndex(); 


   fprintf( pAO, "printFlankedColumns {\n" );

   fprintf( pAO, "order of species: HSap" );
   for( nSpecies = 0; nSpecies <
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
      fprintf( pAO, " %s", soSpecies.data() );
   }
   fprintf( pAO, "\n" );


   // now used aColumnsAlreadyUsedSpecies to print out which
   // species will be used for each position


   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( ! aColumnsAlreadyUsedSpecies[ nConsPos ] ) continue;


      RWCString soColumnOfBases( (size_t) 11 ); // 6 bases and 5 spaces

      
      fprintf( pAO, "%d %d %c",
               nUnpaddedIndex( nConsPos ),
               nConsPos,
               ntGetCons( nConsPos ).cGetBase() );

      soColumnOfBases = ntGetCons( nConsPos ).cGetBase(); // start with human

      for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

         RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
         unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

         char cBase;

         if ( ucSpeciesMask & aColumnsAlreadyUsedSpecies[ nConsPos ] ) {

            // if reached here, soSpecies is a flanked species
            // which, at this columns, meets the criteria
                        
            // so print out the base

            LocatedFragment* pCombinedRead =
               aCombinedReads[ nSpecies ];

            assert( pCombinedRead );

            cBase = pCombinedRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
                           
         }
         else {
            // always the same number of bases on each line
            cBase = 'n';
         }

         soColumnOfBases += cBase;

         fprintf( pAO, " %c", cBase );

      }  //  for( int nSpecies = 0; nSpecies <


      // print contig name (which has the locus name in it
      fprintf( pAO, " %s", soGetName().data() );


      if ( bNoMutationThisColumn( soColumnOfBases ) ) {
         fprintf( pAO, " same\n" );
      }
      else {

         RWTValOrderedVector<RWCString> aTrees;

         findAncestralTrees( soColumnOfBases, aTrees );

         for( int nTree = 0; nTree < aTrees.length(); ++nTree ) {
            fprintf( pAO, " %s", aTrees[ nTree ].data() );
         }

         fprintf( pAO, "\n" );
      }

   } //    for( nConsPos = nGetStartConsensusIndex(); 


   fprintf( pAO, "} printFlankedColumns\n" );

}


void Contig :: countAcceptableColumnsWithNoneOnLeft( autoReport* pAutoReport ) {

   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }


   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );

   //   aGoodReads.resort();  inefficient--just check every one


   // this will change when we delete columns

   int nPaddedContigLength = nGetPaddedConsLength();

   mbtValVectorOfBool aSomeCombinedReadHasNoPad( nPaddedContigLength,
                                                 1,
                                                 "Contig::aSomeCombinedReadHasNoPad" );

   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   // the point of the look below is to set aSomeCombinedReadHasNoPad
   // and to set the single signal arrays (aListOfSingleSignalArrays)

   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;
      
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // If we are in a contig that doesn't have a human read (rare, and
      // pathological), ignore this contig.

      if ( !pHumanRead ) return;


      // single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;

         if ( n == 0 )
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;

         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );
         aListOfSingleSignalArrays.insert( pSingleSignalArray );

         if ( n == 0 )
            pForwardReadSingleSignalArray = pSingleSignalArray;
         else
            pReverseReadSingleSignalArray = pSingleSignalArray;
      }


      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         if ( !pHumanRead->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         if ( cCombinedBase != '*' )
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
      }
   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_...


   // remove columns in which all combined reads have a pad

   // first do it to single signal arrays
   int nRead;
   for( nRead = 0; nRead < aListOfSingleSignalArrays.length();
        ++nRead ) {

      MBTValOrderedOffsetVector<char>* pSingleSignalArray =
         aListOfSingleSignalArrays[ nRead ];

      for( int nConsPos = nGetEndConsensusIndex();
           nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
         
         if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
            pSingleSignalArray->removeAt( nConsPos );
         }
      }
   } //  for( int nRead = 0;

   int nConsPos;
   for( nConsPos = nGetEndConsensusIndex();
        nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
      if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
         deleteColumn( nConsPos, false );
      }
   }

   // new padded length after deleting columns

   nPaddedContigLength = nGetPaddedConsLength();

   // create a combined read, one for each species
   // each combined read will be the length of the entire consensus

   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      RWCString soCombinedReadName = soSpecies + ".comb";
      
      LocatedFragment* pCombinedRead =
         new LocatedFragment( soCombinedReadName,
                              1, // aligned position within contig
                              false, // bComplemented
                              this ); // contig

      aCombinedReads[ nSpecies ] = pCombinedRead;

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         pCombinedRead->appendNtide( Ntide( 'n', 0 ) );
      }
   }


   // Now create bit arrays, one for each species


   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteria( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesMeetingCriteria" );

   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesAndAgreeingWithHuman" );


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pCombinedRead = aCombinedReads[ nSpecies ];
      assert( pCombinedRead->soGetName().bContains( soSpecies ) );


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         // check if this is a good read

         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }


      // If we are in a contig that doesn't contain a human read,
      // ignore the contig

      if ( !pHumanRead ) return;

      // find single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      for( int nSingleSignalArray = 0;
           nSingleSignalArray < aListOfReadsInSingleSignalArrays.length();
           ++nSingleSignalArray ) {

         if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
              pForwardRead ) {
            
            pForwardReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
         else if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
                   pReverseRead ) {
            pReverseReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
      }

      if ( pForwardRead ) {
         assert( pForwardReadSingleSignalArray );
      }

      if ( pReverseRead ) {
         assert( pReverseReadSingleSignalArray );
      }

      // at this point, we have whiever reads are available for that
      // species and their single-signal arrays

      unsigned char ucSpeciesMask = ucGetMaskForSpecies( soSpecies );

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         
         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         if ( !bGetCombinedReadAtBase( pForwardRead,
                                       pForwardReadSingleSignalArray,
                                       pReverseRead,
                                       pReverseReadSingleSignalArray,
                                       nConsPos,
                                       cCombinedBase,
                                       nCombinedQuality ) ) continue;

         pCombinedRead->Sequence::setBaseAtPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                cCombinedBase );

         pCombinedRead->Sequence::setQualityAtSeqPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                nCombinedQuality );
         
         aMaskOfSpeciesMeetingCriteria[nConsPos] |= ucSpeciesMask;

         // agreeing pads do not count as flanking bases.  To be
         // sure, explicitly check for this.  (Even though we deleted
         // columns of pads, the might be some columns not removed
         // that have a pad in human and some species but not others.)

         if ( ( cCombinedBase == ntGetCons( nConsPos ).cGetBase() ) &&
              !ntGetCons( nConsPos ).bIsPad() ) {
            aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPos ] 
               |= ucSpeciesMask;
         }
      } //  for( int nConsPos = nGetStartConsensusIndex();
   } //    for( nSpecies = 0; nSpecies < ...


   // now we have finished setting 3 key arrays:
   // aCombinedReads
   // aMaskOfSpeciesMeetingCriteria
   // aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman

   // with these, we can figure out the flanked columns and
   // print out the bases in those columns


   mbtValVector<unsigned char> aSpeciesInAcceptableColumns( nGetPaddedConsLength(),
                                                    1, // starting position
                                                    0, // initial value
                                                    "aSpeciesInAcceptableColumns" );


   int nConsPosOfStartingColumn;
   for( nConsPosOfStartingColumn = nGetStartConsensusIndex(); 
        nConsPosOfStartingColumn <= nGetEndConsensusIndex(); 
        ++nConsPosOfStartingColumn ) {

      
      if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {

         for( int nConsPosOfEndingColumn = nConsPosOfStartingColumn + 2;
              nConsPosOfEndingColumn <=
                 MIN( nConsPosOfStartingColumn + 4, nGetEndConsensusIndex() );
              ++nConsPosOfEndingColumn ) {

            if ( bAllNeededSpecies( 
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                   aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ] ) ) {

               // we have a pair of flanking columns.  However, the
               // columns between the flanking columns may not 
               // be ok.  For example, it might be that the flanking
               // gorilla is single signal, but one of the internal
               // columns doesn't have single signal for gorilla.  
               // I think in such a case, we could use the columns
               // that are ok, but just not use the column in which
               // gorilla is not ok.  We should not use any species
               // that isn't supported by the flanking columns, though.

               unsigned char ucFlankingColumnSpecies =
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                  aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfEndingColumn ];

               // now look at all columns between these flanking columns

               for( int nConsPos = nConsPosOfStartingColumn + 1;
                    nConsPos <= nConsPosOfEndingColumn - 1;
                    ++nConsPos ) {

                  // do not use the same column more than once


                  if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                                          ucFlankingColumnSpecies ) ) {

                     unsigned char ucFlankedSpeciesAtThisColumn =
                        aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                        ucFlankingColumnSpecies;

                     // use this new flanking of the column if:
                     // 1) this column has not been used
                     //    or
                     // 2) the new set of species is a proper superset
                     //    of the old one

                     // the negation of the above is:
                     // 1)  the column has been used
                     //      and
                     // 2)  the new set of species is not a proper superset
                     //    of the old one

                     if ( aSpeciesInAcceptableColumns[ nConsPos ] != 0 &&
                          ( ( aSpeciesInAcceptableColumns[ nConsPos ] & 
                              ucFlankedSpeciesAtThisColumn ) !=
                            aSpeciesInAcceptableColumns[ nConsPos ] ) ) continue;

                     aSpeciesInAcceptableColumns[ nConsPos ] |=
                        ucFlankedSpeciesAtThisColumn;


                  } //   if ( bAllNeededSpecies(
               } //  for( int nConsPos = nConsPosOfStartingColumn + 1;
            } // 
         }
      } // if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteriaAndAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {
   } //    for( int nConsPosOfStartingColumn = nGetStartConsensusIndex(); 


   fprintf( pAO, "printFlankedColumns {\n" );

   fprintf( pAO, "order of species: HSap" );
   for( nSpecies = 0; nSpecies <
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
      fprintf( pAO, " %s", soSpecies.data() );
   }
   fprintf( pAO, "\n" );


   // now use aSpeciesInAcceptableColumns to print out which
   // species will be used for each position


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); 
        ++nSpecies ) {

      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
      unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

      int nColumnsWithNoneOnLeft = 0;
      int nColumnsWithSomeOrNoneOnLeft = 0;
      
      for( nConsPos = nGetStartConsensusIndex(); 
           nConsPos <= nGetEndConsensusIndex(); 
           ++nConsPos ) {


         if ( aSpeciesInAcceptableColumns[ nConsPos ] & ucSpeciesMask ) {
            
            ++nColumnsWithSomeOrNoneOnLeft;
            if ( nConsPos == nGetStartConsensusIndex() ) {
               ++nColumnsWithNoneOnLeft;
            }
            else if ( !(  aSpeciesInAcceptableColumns[ nConsPos - 1 ] & ucSpeciesMask ) ) {
               ++nColumnsWithNoneOnLeft;
            }
         }
      }

      fprintf( pAO, "%s %d usable columns with none on left out of %d acceptable\n",
               soSpecies.data(),
               nColumnsWithNoneOnLeft,
               nColumnsWithSomeOrNoneOnLeft );

   }            

   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); 
        ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
      unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

      int nColumnsWithNoneOnOneSide = 0;
      int nColumnsWithSomeOrNoneOnOneSide = 0;
      
      for( nConsPos = nGetStartConsensusIndex(); 
           nConsPos <= nGetEndConsensusIndex(); 
           ++nConsPos ) {


         if ( aSpeciesInAcceptableColumns[ nConsPos ] & ucSpeciesMask ) {
            
            ++nColumnsWithSomeOrNoneOnOneSide;
            if ( nConsPos == nGetStartConsensusIndex() ||
                 nConsPos == nGetEndConsensusIndex()) {
               ++nColumnsWithNoneOnOneSide;
            }
            else if ( !(  aSpeciesInAcceptableColumns[ nConsPos - 1 ] & ucSpeciesMask ) ||
                      !( aSpeciesInAcceptableColumns[ nConsPos + 1 ] & ucSpeciesMask ) ) {
               ++nColumnsWithNoneOnOneSide;
            }
         }
      }

      fprintf( pAO, "%s %d usable columns with none on one side out of %d acceptable\n",
               soSpecies.data(),
               nColumnsWithNoneOnOneSide,
               nColumnsWithSomeOrNoneOnOneSide );

   }            


   int nColumnsWithNoneOnOneSide = 0;
   int nColumnsWithSomeOrNoneOnOneSide = 0;

   for( nConsPos = nGetStartConsensusIndex();
        nConsPos <= nGetEndConsensusIndex();
        ++nConsPos ) {

      if ( aSpeciesInAcceptableColumns[ nConsPos ] ) {

         ++nColumnsWithSomeOrNoneOnOneSide;
         if ( nConsPos == nGetStartConsensusIndex() ||
              nConsPos == nGetEndConsensusIndex()) {
            ++nColumnsWithNoneOnOneSide;
         }
         else if ( !aSpeciesInAcceptableColumns[ nConsPos - 1] ||
                   !aSpeciesInAcceptableColumns[ nConsPos + 1] ) {
            ++nColumnsWithNoneOnOneSide;
         }
      }
   }

   fprintf( pAO, "%d usable real columns with none on one side out of %d acceptable\n",
            nColumnsWithNoneOnOneSide,
            nColumnsWithSomeOrNoneOnOneSide );
      

}

static RWCString soGetTreeWithNoMutation( const RWCString soPresentDayBasesHumanInFront ) {

   RWCString soPresentDayBasesPPanInFront = soPresentDayBasesHumanInFront;
   
   // PPan
   soPresentDayBasesPPanInFront[0] = soPresentDayBasesHumanInFront[1];
   // PTro
   soPresentDayBasesPPanInFront[1] = soPresentDayBasesHumanInFront[2];
   // HSap
   soPresentDayBasesPPanInFront[2] = soPresentDayBasesHumanInFront[0];

   // find base

   char cBase = 0;
   int n;
   for( n = 0; n < soPresentDayBasesPPanInFront.length(); ++n ) {
      if ( soPresentDayBasesPPanInFront[n] != 'n' ) {
         cBase = soPresentDayBasesPPanInFront[n];
         break;
      }
   }
   
   assert( cBase );

   // trees look like:
   // g(g(t(t(t(t,t),t),t),g?),g)


   RWCString soTreeLeft;
   RWCString soTreeRight;

   for( n = soPresentDayBasesPPanInFront.length() - 1; n >= 1;
        --n ) {
      soTreeLeft += RWCString( cBase );
      soTreeLeft += "(";

      soTreeRight = "," + RWCString( cBase ) +
         ( soPresentDayBasesPPanInFront[n] == 'n' ? "?" : "" ) +
         ")" + soTreeRight;
   }

   // we have to handle PPan separately from the loop above because it
   // might have a "?" next to it

   RWCString soTree = soTreeLeft + 
      RWCString( cBase ) + 
      ( soPresentDayBasesPPanInFront[0] == 'n' ? "?" : "" ) +
      soTreeRight;

   return( soTree );
}


void Contig :: adjustReadQualitiesAndRecalculateHighQualitySegments(
        autoReport* pAutoReport,
        const RWTPtrOrderedVector<LocatedFragment>& aGoodReads ) {

   for( int nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;

      for( int nRead = 0; nRead < aGoodReads.length(); ++nRead ) {
         LocatedFragment* pLocFrag = aGoodReads[ nRead ];
         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // there is only any adjusting to be done if we have
      // both a forward and a reverse good-hit read

      if ( !pForwardRead || !pReverseRead ) continue;

      int nIntersectLeft;
      int nIntersectRight;

      if ( !bIntersect( pForwardRead->nGetAlignClipStart(),
                        pForwardRead->nGetAlignClipEnd(),
                        pReverseRead->nGetAlignClipStart(),
                        pReverseRead->nGetAlignClipEnd(),
                        nIntersectLeft,
                        nIntersectRight ) ) continue;

      
      for( int nConsPos = nIntersectLeft; nConsPos <= nIntersectRight;
           ++nConsPos ) {
         if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase()
              ==
              pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

            int nCombinedQuality =
               pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
               +
               pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

            nCombinedQuality = MIN( nCombinedQuality, 90 );

            pForwardRead->setQualityAtConsPos( nConsPos, nCombinedQuality );
            pReverseRead->setQualityAtConsPos( nConsPos, nCombinedQuality );
         }
      }

      // now adjust the high quality segments

      pForwardRead->calculateHighQualitySegmentOfRead();
      pReverseRead->calculateHighQualitySegmentOfRead();
   } //  for( int nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();


}


void Contig :: applyFilters( 
        autoReport* pAutoReport,
        RWTPtrVector<LocatedFragment>& aCombinedReads,
        mbtValVector<unsigned char>& aSpeciesInAcceptableColumns,
        RWTValVector<RWCString>& aArrayOfOneTreeAtEachConsPos,
        RWTValVector<RWCString>& aArrayOfBasesAtEachConsPos,
        RWTValVector<RWCString>& aArrayOfLenientlyFilteredBasesAtEachConsPos,
        RWTValVector<RWCString>& aDeaminationMutationsAtEachConsPos,
        RWTValVector<char>& aAncestralCpGAtEachConsPos2,
                             // aAncestralCpGAtEachConsPos has just
                             // the left position marked for an
                             // ancestral CpG
                             // aAncestralCpGAtEachConsPos2 has both
                             // positions marked for an ancestral CpG
        LocatedFragment*& pHumanRead ) {


   // this differs from printFlankedColumns3 in that it records the
   // trees and attempts to identify which mutations are deamination
   // mutations

   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }


   RWTPtrOrderedVector<LocatedFragment> aGoodReads( (size_t) 10 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


         if ( pLocFrag->soGetName() == soLine ) {
            // the fake read for the 2nd alignment
            assert( !pLocFrag->soGetName().bContains( "b." ) );

            aGoodReads.insert( pLocFrag );
         }
      }
   }

   fclose( pGoodReads );

   //   aGoodReads.resort();  inefficient--just check every one

   // find pHumanRead

   pHumanRead = NULL;

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );
      if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
         pHumanRead = pLocFrag;
         break;
      }
   }

   assert( pHumanRead );


   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   // set the single signal arrays (aListOfSingleSignalArrays)

   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      
      for( int nRead = 0; nRead < aGoodReads.length(); ++nRead ) {
         LocatedFragment* pLocFrag = aGoodReads[ nRead ];

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;

         if ( n == 0 )
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;

         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );
         aListOfSingleSignalArrays.insert( pSingleSignalArray );

      }

   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_...


   if ( pCP->bAutoReportDeleteColumnsOfPadsBeforeAdjustingReadQualityValues_ ) {
      

      deleteColumnsOfPads( true, // bAddQualityValuesOfStrands
                           pAutoReport,
                           pHumanRead,
                           aGoodReads,
                           aListOfReadsInSingleSignalArrays,
                           aListOfSingleSignalArrays );

      adjustReadQualitiesAndRecalculateHighQualitySegments( pAutoReport,
                                                            aGoodReads );
   }
   else {
      
      adjustReadQualitiesAndRecalculateHighQualitySegments( pAutoReport,
                                                            aGoodReads );

      deleteColumnsOfPads( false, // bAddQualityValuesOfStrands
                           pAutoReport,
                           pHumanRead,
                           aGoodReads,
                           aListOfReadsInSingleSignalArrays,
                           aListOfSingleSignalArrays );

   }


   // new padded length after deleting columns

   int nPaddedContigLength = nGetPaddedConsLength();

   // create a combined read, one for each species
   // each combined read will be the length of the entire consensus


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      RWCString soCombinedReadName = soSpecies + ".comb";
      
      LocatedFragment* pCombinedRead =
         new LocatedFragment( soCombinedReadName,
                              1, // aligned position within contig
                              false, // bComplemented
                              this ); // contig

      aCombinedReads[ nSpecies ] = pCombinedRead;

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         pCombinedRead->appendNtide( Ntide( 'n', 0 ) );
      }
   }


   // Now create bit arrays, one for each species


   mbtValVector<unsigned char> aMaskOfSpeciesMeetingCriteria( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesMeetingCriteria" );

   mbtValVector<unsigned char> aMaskOfSpeciesAgreeingWithHuman( nGetPaddedConsLength(),
                                      1, // starts at 1
                                      0, // initial value
                                      "Contig::aMaskOfSpeciesAndAgreeingWithHuman" );


   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length();
        ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pCombinedRead = aCombinedReads[ nSpecies ];
      assert( pCombinedRead->soGetName().bContains( soSpecies ) );


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;


      for( int nRead = 0; nRead < aGoodReads.length(); ++nRead ) {
         LocatedFragment* pLocFrag = aGoodReads( nRead );

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;


         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }


      // find single signal arrays

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      for( int nSingleSignalArray = 0;
           nSingleSignalArray < aListOfReadsInSingleSignalArrays.length();
           ++nSingleSignalArray ) {

         if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
              pForwardRead ) {
            
            pForwardReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
         else if ( aListOfReadsInSingleSignalArrays[ nSingleSignalArray ] ==
                   pReverseRead ) {
            pReverseReadSingleSignalArray =
               aListOfSingleSignalArrays[ nSingleSignalArray ];
         }
      }

      if ( pForwardRead ) {
         assert( pForwardReadSingleSignalArray );
      }

      if ( pReverseRead ) {
         assert( pReverseReadSingleSignalArray );
      }


      // at this point, we have whiever reads are available for that
      // species and their single-signal arrays

      unsigned char ucSpeciesMask = ucGetMaskForSpecies( soSpecies );

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         
         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         char cCombinedBase = 0;
         int nCombinedQuality = -666;

         bool bCombinedReadALittleOK;
         bool bCombinedReadOK = 
            bGetCombinedReadAtBase3( false, // bAddQualityValuesOfStrands
                                     pForwardRead,
                                     pForwardReadSingleSignalArray,
                                     pReverseRead,
                                     pReverseReadSingleSignalArray,
                                     nConsPos,
                                     cCombinedBase,
                                     nCombinedQuality,
                                     bCombinedReadALittleOK);

         if ( bCombinedReadOK || bCombinedReadALittleOK ) {
            pCombinedRead->Sequence::setBaseAtPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                cCombinedBase );

            pCombinedRead->Sequence::setQualityAtSeqPos(
                pCombinedRead->nGetFragIndexFromConsPos( nConsPos ),
                nCombinedQuality );

            // agreeing pads do not count as flanking bases.  To be
            // sure, explicitly check for this.  (Even though we deleted
            // columns of pads, the might be some columns not removed
            // that have a pad in human and some species but not others.)
            // Note that we have lowered the requirement for flanking
            // bases--they need not be single signal, nor above a quality
            // threshold nor in high quality segment.  They must be 
            // aligned and good-hit reads.

            if ( ( cCombinedBase == ntGetCons( nConsPos ).cGetBase() ) &&
                 !ntGetCons( nConsPos ).bIsPad() ) {
               aMaskOfSpeciesAgreeingWithHuman[ nConsPos ] 
                  |= ucSpeciesMask;
            }

         }
         
         if ( bCombinedReadOK ) 
            aMaskOfSpeciesMeetingCriteria[nConsPos] |= ucSpeciesMask;


      } //  for( int nConsPos = nGetStartConsensusIndex();
   } //    for( nSpecies = 0; nSpecies < ...


   // now we have finished setting 3 key arrays:
   // aCombinedReads
   // aMaskOfSpeciesMeetingCriteria
   // aMaskOfSpeciesAgreeingWithHuman

   // with these, we can figure out the flanked columns and
   // print out the bases in those columns


   aSpeciesInAcceptableColumns.reshape( nGetPaddedConsLength(),
                                        0 ); // initial value

   
   const int nWidthOfFlankedRegion = 3;

   int nConsPosOfStartingColumn;
   for( nConsPosOfStartingColumn = nGetStartConsensusIndex(); 
        nConsPosOfStartingColumn <= nGetEndConsensusIndex(); 
        ++nConsPosOfStartingColumn ) {

      
      if ( bAllNeededSpecies( aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {

         for( int nConsPosOfEndingColumn = nConsPosOfStartingColumn + 2;
              nConsPosOfEndingColumn <=
                 MIN( nConsPosOfStartingColumn + nWidthOfFlankedRegion + 1, nGetEndConsensusIndex() );
              ++nConsPosOfEndingColumn ) {

            if ( bAllNeededSpecies( 
                   aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                   aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfEndingColumn ] ) ) {

               // we have a pair of flanking columns.  However, the
               // columns between the flanking columns may not 
               // be ok.  For example, it might be that the flanking
               // gorilla is single signal, but one of the internal
               // columns doesn't have single signal for gorilla.  
               // I think in such a case, we could use the columns
               // that are ok, but just not use the column in which
               // gorilla is not ok.  We should not use any species
               // that isn't supported by the flanking columns, though.

               unsigned char ucFlankingColumnSpecies =
                  aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfStartingColumn ] &
                  aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfEndingColumn ];

               // now look at all columns between these flanking columns

               for( int nConsPos = nConsPosOfStartingColumn + 1;
                    nConsPos <= nConsPosOfEndingColumn - 1;
                    ++nConsPos ) {

                  // do not use the same column more than once


                  if ( bAllNeededSpecies( aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                                          ucFlankingColumnSpecies ) ) {

                     unsigned char ucFlankedSpeciesAtThisColumn =
                        aMaskOfSpeciesMeetingCriteria[ nConsPos ] &
                        ucFlankingColumnSpecies;

                     // use this new flanking of the column if:
                     // 1) this column has not been used
                     //    or
                     // 2) the new set of species is a proper superset
                     //    of the old one

                     // the negation of the above is:
                     // 1)  the column has been used
                     //      and
                     // 2)  the new set of species is not a proper superset
                     //    of the old one

                     if ( aSpeciesInAcceptableColumns[ nConsPos ] != 0 &&
                          ( ( aSpeciesInAcceptableColumns[ nConsPos ] & 
                              ucFlankedSpeciesAtThisColumn ) !=
                            aSpeciesInAcceptableColumns[ nConsPos ] ) ) continue;

                     aSpeciesInAcceptableColumns[ nConsPos ] |=
                        ucFlankedSpeciesAtThisColumn;


                  } //   if ( bAllNeededSpecies(
               } //  for( int nConsPos = nConsPosOfStartingColumn + 1;
            } // 
         }
      } // if ( bAllNeededSpecies( aMaskOfSpeciesAgreeingWithHuman[ nConsPosOfStartingColumn ] ) ) {
   } //    for( int nConsPosOfStartingColumn = nGetStartConsensusIndex(); 


   int nArraySize = nGetEndConsensusIndex() - nGetStartConsensusIndex() + 1;

   aArrayOfOneTreeAtEachConsPos.reshape( nArraySize + 1, 
                                         "" ); // initial value
          // + 1so we can use nConsPos (1-based) directly as an index

   aArrayOfBasesAtEachConsPos.reshape( nArraySize + 1, 
          // + 1so we can use nConsPos (1-based) directly as an index
                                       "" ); // initial value

   aArrayOfLenientlyFilteredBasesAtEachConsPos.reshape( 
         nArraySize + 1,
         "" ); // initial value

   typedef RWTValVector<bool> typeArrayOfBool;

   RWTPtrVector<typeArrayOfBool> 
      aArrayOfChosenTreesAtEachConsPos( (size_t) nArraySize + 1,
                                              0, // initial values
                                              "aArrayOfChosenTreesAtEachConsPos" );

   RWTPtrVector<arrayOfRWCString>
      aArrayOfTreesAtEachConsPos( (size_t) nArraySize + 1,
                                  0, // initial values
                                  "aArrayofTreesAtEachConsPos" );


   int nConsPos;
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      // Jan 18 changed so that bases are returned even for
      // columns that do not pass all filters
      //      if ( ! aSpeciesInAcceptableColumns[ nConsPos ] ) continue;


      RWCString soColumnOfBases( (size_t) 6 ); 
      RWCString soColumnOfBadBases( (size_t) 6 ); 

      
      soColumnOfBases = ntGetCons( nConsPos ).cGetBase(); // start with human
      soColumnOfBadBases = soColumnOfBases;

      for( int nSpecies = 0; nSpecies <
              pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

         RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];
         unsigned char ucSpeciesMask = 
            ucGetMaskForSpecies( soSpecies );

         LocatedFragment* pCombinedRead =
            aCombinedReads[ nSpecies ];

         assert( pCombinedRead );

         char cBadBase = pCombinedRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
         char cBase;


         if ( ucSpeciesMask & aSpeciesInAcceptableColumns[ nConsPos ] ) {

            // if reached here, soSpecies is a flanked species
            // which, at this columns, meets the criteria
                        
            // so print out the base

            cBase = cBadBase;

         }
         else {
            // always the same number of bases on each line
            cBase = 'n';

         }


         // soColumnOfBases in in order HSap PPan PTro ...
         if ( soSpecies != "HSap" ) {
            soColumnOfBases += cBase;
            soColumnOfBadBases += cBadBase;
         }

      }  //  for( int nSpecies = 0; nSpecies <


      assert( soColumnOfBadBases.length() == 
              soColumnOfBases.length() );


      aArrayOfBasesAtEachConsPos[ nConsPos ] = soColumnOfBases;
      aArrayOfLenientlyFilteredBasesAtEachConsPos[ nConsPos ] = soColumnOfBadBases;


      RWTValOrderedVector<RWCString> aTrees;


      findAncestralTrees( soColumnOfBases, aTrees );


      arrayOfRWCString* pTrees = new
         RWTValOrderedVector<RWCString>( aTrees );

      aArrayOfTreesAtEachConsPos[ nConsPos ] = pTrees;

      aArrayOfChosenTreesAtEachConsPos[ nConsPos ] =
         new RWTValVector<bool>( (size_t) aTrees.length(),  
                                 true, // initial value
                                 "aArrayOfChosenTreesAtEachConsPos( " +
                                 RWCString( (long) nConsPos ) + ")" );


      if ( aTrees.length() > 1 &&
           pCP->bAutoReportChooseTreesUsingBadData_ ) {

         chooseTreesUsingBadData( soColumnOfBadBases, 
                                  *pTrees,
                                  aArrayOfChosenTreesAtEachConsPos[ nConsPos ] );
      }


   } //    for( nConsPos = nGetStartConsensusIndex(); 


   // if we are only interested in single parsimonious tree sites, delete
   // those that are not

   if ( pCP->bAutoReportIgnoreMultipleTrees_ ) {

      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         if ( aArrayOfTreesAtEachConsPos[ nConsPos ] ) {
            if ( aArrayOfTreesAtEachConsPos[ nConsPos ]->length() > 1 ) {
               aSpeciesInAcceptableColumns[ nConsPos ] = 0;

               aArrayOfTreesAtEachConsPos[ nConsPos ] = 0;
            }
         }
      }
   }


   // choosen trees based on deamination mutations


   for( nConsPos = nGetStartConsensusIndex() + 1; 
        nConsPos <= nGetEndConsensusIndex(); 
        ++nConsPos ) {

      if ( !aArrayOfTreesAtEachConsPos[ nConsPos - 1 ] ) continue;
      if ( !aArrayOfTreesAtEachConsPos[ nConsPos ] ) continue;


      checkForDeaminationMutationsAmongMultipleTrees( 
                       aArrayOfTreesAtEachConsPos[ nConsPos - 1 ],
                       aArrayOfTreesAtEachConsPos[ nConsPos ],
                       nConsPos - 1,
                       aArrayOfChosenTreesAtEachConsPos[ nConsPos - 1 ],
                       aArrayOfChosenTreesAtEachConsPos[ nConsPos ] );

   }


   if ( pCP->bAutoReportChooseTreesUsingKimura_ ) {


      // choose most likely tree at each position

      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex();
           ++nConsPos ) {

         arrayOfRWCString* pArrayOfTrees = aArrayOfTreesAtEachConsPos[ nConsPos ];

         if ( pArrayOfTrees ) {

            RWTValVector<bool>* pChosenTrees = 
               aArrayOfChosenTreesAtEachConsPos[ nConsPos ];

            assert( pChosenTrees );

            assert( pChosenTrees->length() == pArrayOfTrees->length() );

            if ( pArrayOfTrees->length() == 1 ) {
               (*pChosenTrees)[0] = true;
            }
            else {


               float fHighestProbability = 0.0;
               int nIndexOfBestTree = -666;
               RWCString soBestTree;

               
               int nTree;
               for( nTree = 0; nTree < pArrayOfTrees->length(); ++nTree ) {


                  if ( !(*pChosenTrees)[ nTree ] ) continue;
      
                  RWCString soTree = (*pArrayOfTrees)[ nTree ];


                  float fProbOfOneTree = fGetProbabilityOfTree( soTree );


                  if ( fProbOfOneTree > fHighestProbability ) {
                     fHighestProbability = fProbOfOneTree;
                     soBestTree = soTree;
                     nIndexOfBestTree = nTree;
                  }
               }

               for( nTree = 0; nTree < pArrayOfTrees->length(); ++nTree ) {

                  if ( nTree == nIndexOfBestTree ) 
                     (*pChosenTrees)[ nTree ] = true;
                  else
                     (*pChosenTrees)[ nTree ] = false;

               }

            } //  if ( pArrayOfTrees->length() == 1 ) { else
         } //  if ( pArrayOfTrees ) {

      } //   for( nConsPos = nGetStartConsensusIndex();
   } //    if ( pCP->bAutoReportChooseTreesUsingKimura_ ) {


   // now shouldn't I only return the trees that are chosen?

   for( nConsPos = nGetStartConsensusIndex();
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

      RWTValVector<bool>* pChosenTrees =
         aArrayOfChosenTreesAtEachConsPos[ nConsPos ];

      arrayOfRWCString* pArrayOfTrees = aArrayOfTreesAtEachConsPos[ nConsPos ];

      if ( pArrayOfTrees ) {


         int nNumberOfChosenTrees = 0;

         for( int nTree = 0; nTree < pChosenTrees->length(); ++nTree ) {

            if ( (*pChosenTrees)[ nTree ] ) {

               ++nNumberOfChosenTrees;

               if ( nNumberOfChosenTrees == 1 ){

                  RWCString soTree = (*pArrayOfTrees)[ nTree ];

                  aArrayOfOneTreeAtEachConsPos[ nConsPos ] = soTree;
               }
            }
         }

         if ( nNumberOfChosenTrees != 1 ) {
            aArrayOfOneTreeAtEachConsPos[ nConsPos ] = "";
            aSpeciesInAcceptableColumns[ nConsPos ] = 0;
         }  
      }

   }


   // at this point, I need to go through the chosen trees and check
   // for deamination mutations (again), now that I have chosen the trees.

   // now look at mutations in which the ancestral sequence is a CpG
   // and it clearly is a deamination mutation.  Mark them as such.

   aDeaminationMutationsAtEachConsPos.reshape( nArraySize + 1, "" );


   // if a CpG occurs, both
   // columns are marked, rather than just the first column

   aAncestralCpGAtEachConsPos2.reshape( nArraySize + 1, 
                                        cNoCpG ); // initial value


   checkForDeaminationMutationsAmongUniqueTrees( aArrayOfOneTreeAtEachConsPos,
                                                 aDeaminationMutationsAtEachConsPos,
                                                 aAncestralCpGAtEachConsPos2 );


   if ( pCP->bAutoReportOnlyAllowSitesThatAreBetweenAcceptableSites_ ) {

      RWTValVector<bool> aSitesBetweenTwoGoodSites(  
         nGetPaddedConsLength() + 1,
         false, // initial value
         "aSitesBetweenTwoGoodSites" );


      for( nConsPos = nGetStartConsensusIndex() + 1;
           nConsPos <= nGetEndConsensusIndex() - 1; ++nConsPos ) {

         if ( bAllNeededSpecies( aSpeciesInAcceptableColumns[ nConsPos - 1 ] )
              &&
              bAllNeededSpecies( aSpeciesInAcceptableColumns[ nConsPos + 1 ] ) ) {
            aSitesBetweenTwoGoodSites[ nConsPos ] = true;
         }
      }

      // now use this to zero the sites that are not between two good sites

      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         if ( !aSitesBetweenTwoGoodSites[ nConsPos ] ) {
            aSpeciesInAcceptableColumns[ nConsPos ] = 0;
            aArrayOfOneTreeAtEachConsPos[ nConsPos ] = "";
         }
      }
   } //  if ( pCP->bAutoReportOnlyAllowSitesThatAreBetweenAcceptableSites_ ) {


} // applyFilters
                             

void Contig :: checkForDeaminationMutationsAmongUniqueTrees( 
        RWTValVector<RWCString>& aArrayOfOneTreeAtEachConsPos,
        RWTValVector<RWCString>& aDeaminationMutationsAtEachConsPos,
        RWTValVector<char>& aAncestralCpGAtEachConsPos2 ) {

   for( int nConsPos = nGetStartConsensusIndex() + 1;
        nConsPos <= nGetEndConsensusIndex();
        ++nConsPos ) {

      RWCString soTreeA = aArrayOfOneTreeAtEachConsPos[ nConsPos - 1];
      RWCString soTreeB = aArrayOfOneTreeAtEachConsPos[ nConsPos ];

      if ( soTreeA.isNull() || soTreeB.isNull() ) continue;


      soTreeA.removeSingleCharacterAllOccurrences('?');
      soTreeA.removeSingleCharacterAllOccurrences('(');
      soTreeA.removeSingleCharacterAllOccurrences(')');
      soTreeA.removeSingleCharacterAllOccurrences(',');

      soTreeB.removeSingleCharacterAllOccurrences('?');
      soTreeB.removeSingleCharacterAllOccurrences('(');
      soTreeB.removeSingleCharacterAllOccurrences(')');
      soTreeB.removeSingleCharacterAllOccurrences(',');

      // abcdef
      // ^ top internal node, leading to PPyg and MMul 0
      //  ^ next internal node, leading to GGor 1
      //   ^ next internal node, leading to HSap, I hope! 2
      //    ^ next internal node, leading to PTro 3
      //     ^ PPan
      //      ^ PTro
      //       ^ GGor
      //    etc.

      // so there are 4 internal nodes to be concerned about

      bool bSomeInternalNodeIsACpG = false;

      RWTValOrderedVector<RWCString> aDeaminationMutationsPositionA( (size_t) 3 );
      
      RWTValOrderedVector<RWCString> aDeaminationMutationsPositionB( (size_t) 3 );

      
      for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode;
           ++nInternalNode ) {

         RWCString soAncestralSequence =
            RWCString( soTreeA[ nInternalNode ] ) +
            RWCString( soTreeB[ nInternalNode ] );

         // special case of MMul in which the 
         // deamination mutation occurs on the short part of the 
         // branch that goes down to the right to the top
         // internal node.  In such a case, the MMul present day
         // base is actually older than the top internal node.
         // Thus we look for a CpG at the MMul present day bases
         // and some TpG or CpA at the top internal node.

         if ( nInternalNode == 0 ) {
            int nIndexOfPresentDayBases = 9;
            
            RWCString soPresentDayBases =
               RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
               RWCString( soTreeB[ nIndexOfPresentDayBases ] );

            if ( soPresentDayBases == "cg" ) {
               
               bSomeInternalNodeIsACpG = true;

               if ( soAncestralSequence == "ca" ) {
                  
                  aDeaminationMutationsPositionB.insert(
                     soGetBranchNameWithoutTree( -1, nDownToRight )
                     );

               }
               else if ( soAncestralSequence == "tg" ) {

                  aDeaminationMutationsPositionA.insert(
                     soGetBranchNameWithoutTree( -1, nDownToRight )
                     );
               }
            }
         } //  if ( nInternalNode == 0 ) {

         if ( soAncestralSequence != "cg" ) continue;

         bSomeInternalNodeIsACpG = true;

         // if reached here, ancestral sequence is a CpG
         // is there a mutation from this position?

         // check down-right to the corresponding present-day node
         // then check down-left to next internal node

         int nIndexOfPresentDayBases = 8 - nInternalNode;

         RWCString soPresentDayBases =
            RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
            RWCString( soTreeB[ nIndexOfPresentDayBases ] );

         if ( soPresentDayBases == "ca" ) {

         
            aDeaminationMutationsPositionB.insert(
               soGetBranchNameWithoutTree( nInternalNode, nDownToRight )
               );

         }
         else if ( soPresentDayBases == "tg" ) {
            

            aDeaminationMutationsPositionA.insert(
               soGetBranchNameWithoutTree( nInternalNode, nDownToRight)
               );
         }

         // case of top internal node which has the additional
         // external branch leading down to MMul

         if ( nInternalNode == 0 ) {

            nIndexOfPresentDayBases = nMMulPresentDayNode;

            soPresentDayBases =
               RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
               RWCString( soTreeB[ nIndexOfPresentDayBases ] );

            if ( soPresentDayBases == "ca" ) {

               aDeaminationMutationsPositionB.insert( 
                     soGetBranchNameWithoutTree( -1, nDownToRight ) );
            }
            else if ( soPresentDayBases == "tg" ) {
               aDeaminationMutationsPositionA.insert(
                     soGetBranchNameWithoutTree( -1, nDownToRight ) );

            }
         }

         // if nInternalNode is 0-2 (PPyg, GGor, HSap), the next node
         // is the descendant internal node.  If nInternalNode is 3,
         // the next node is PPan.
         
         RWCString soDescendantInternalNode =
            RWCString( soTreeA[ nInternalNode + 1 ] ) +
            RWCString( soTreeB[ nInternalNode + 1 ] );

         if ( soDescendantInternalNode == "ca" ) {
            aDeaminationMutationsPositionB.insert( 
                  soGetBranchNameWithoutTree( nInternalNode, nOnBackbone ) );
         }
         else if ( soDescendantInternalNode == "tg" ) {
            aDeaminationMutationsPositionA.insert(
                  soGetBranchNameWithoutTree( nInternalNode, nOnBackbone ) );
         }

         
      } //       for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode;

      
      if ( bSomeInternalNodeIsACpG ) {
         aAncestralCpGAtEachConsPos2[ nConsPos - 1 ] = cDefinitelyCpG;
         aAncestralCpGAtEachConsPos2[ nConsPos     ] = cDefinitelyCpG;
      }


      int nDeam;
      for( nDeam = 0; nDeam < aDeaminationMutationsPositionA.length();
           ++nDeam ) {
         if ( !aDeaminationMutationsAtEachConsPos[ nConsPos - 1 ].isNull() )
            aDeaminationMutationsAtEachConsPos[ nConsPos - 1 ] += " ";

         aDeaminationMutationsAtEachConsPos[ nConsPos - 1 ] +=
            aDeaminationMutationsPositionA[ nDeam ];
      }

      for( nDeam = 0; nDeam < aDeaminationMutationsPositionB.length();
           ++nDeam ) {
         if ( !aDeaminationMutationsAtEachConsPos[nConsPos].isNull() )
            aDeaminationMutationsAtEachConsPos[ nConsPos ] += " ";

         aDeaminationMutationsAtEachConsPos[ nConsPos ] +=
            aDeaminationMutationsPositionB[ nDeam ];
      }

   } //    for( int nConsPos = nGetStartConsensusIndex() + 1;

}


float Contig :: fGetProbabilityOfTree( const RWCString& soTreeConst ) {

   RWCString soTree = soTreeConst;

   if ( !pBranchProbabilities_ ) {
      pBranchProbabilities_ = new kimuraBranchProb();
   }

   soTree.removeSingleCharacterAllOccurrences('?');
   soTree.removeSingleCharacterAllOccurrences('(');
   soTree.removeSingleCharacterAllOccurrences(')');
   soTree.removeSingleCharacterAllOccurrences(',');

   // for each branch, figure out its probability

   // notice that this use of internal node labels them from top
   // of the tree down:  0 is about PPyg and MMul, 1 is above GGor,
   // 2 is above HSap, and 3 is above PPan and PTro

   float fTotalProb = 1.0;

   for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode;
        ++nInternalNode ) {

      char cAncestralBase = soTree[ nInternalNode ];

      if ( nInternalNode == 0 ) {
         // special case of MMul,

         int nIndexOfPresentDayBase = 9;
         char cPresentDayBase = soTree[ nIndexOfPresentDayBase ];

         fTotalProb *= 
            pBranchProbabilities_->fGetBranchProbability( nInternalNode, 
                                                          nIndexOfPresentDayBase, 
                                   cAncestralBase, cPresentDayBase );
   
      }

      int nIndexOfPresentDayBase = 8 - nInternalNode;
      char cPresentDayBase = soTree[ nIndexOfPresentDayBase ];
      
      fTotalProb *=
         pBranchProbabilities_->fGetBranchProbability( nInternalNode, 
                                nIndexOfPresentDayBase, 
                                cAncestralBase, cPresentDayBase );

      // if nInternalNode is 0-3, the next node is the
      // descendant internal node.  if nInternalNode is
      // 4, the next node is PPan

      char cDescendantInternalBase = soTree[ nInternalNode + 1 ];

      fTotalProb *=
         pBranchProbabilities_->fGetBranchProbability( nInternalNode,
                                nInternalNode + 1, 
                                cAncestralBase, 
                                cDescendantInternalBase );
   }

   return( fTotalProb );


}


void Contig :: printFlankedColumns4( autoReport* pAutoReport ) {


   RWTPtrVector<LocatedFragment> aCombinedReads( (size_t) pAutoReport->aArrayOfSpecies_.length(),
                                                 NULL, // initial value
                                                 "aCombinedReads" );

   mbtValVector<unsigned char> aSpeciesInAcceptableColumns( 1, // will be changed
                                                    1, // starting position
                                                    0, // initial value
                                                    "aSpeciesInAcceptableColumns" );


   RWTValVector<RWCString> aArrayOfOneTreeAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfOneTreeAtEachConsPos" );

   RWTValVector<RWCString> aArrayOfBasesAtEachConsPos( 
          1, // size--will be changed
          // + 1so we can use nConsPos (1-based) directly as an index
          "",
          "aArrayOfBasesAtEachConsPos" );

   RWTValVector<RWCString> aArrayOfLenientlyFilteredBasesAtEachConsPos(
          1, // size--will be changed
          // + 1 so we can use nConsPos (1-based) directly as an index
          "", // initial value
          "aArrayOfLenientlyFilteredBasesAtEachConsPos" );


   RWTValVector<RWCString> aDeaminationMutationsAtEachConsPos( 
          1, // size--will be changed
          "", // initial value
          "aDeaminationMutationsAtEachConsPos" );

   // this is different than above in that if a CpG occurs, both
   // columns are marked, rather than just the first column

   RWTValVector<char> aAncestralCpGAtEachConsPos2(
          1,// size--will be changed
          cNoCpG, // initial value
          "aAncestralCpGAtEachConsPos" );


   LocatedFragment* pHumanRead;


   applyFilters( pAutoReport,
                 aCombinedReads,
                 aSpeciesInAcceptableColumns,
                 aArrayOfOneTreeAtEachConsPos,
                 aArrayOfBasesAtEachConsPos,
                 aArrayOfLenientlyFilteredBasesAtEachConsPos,
                 aDeaminationMutationsAtEachConsPos,
                 aAncestralCpGAtEachConsPos2,
                 pHumanRead );


   if ( pCP->bAutoReportUseAnnotationFormat_ ) {

      // figure out chromosome

      RWCString soTemp =
         ConsEd::pGetAssembly()->filGetAceFileFullPathname().soGetProjectBeforeEditDir();
      
      // this isn't quite the chromosome, rather it is something like:
      // hs1-10143913

      // split on '-' and take the first token

      RWCTokenizer tok( soTemp );

      soTemp = tok('-');

      // now it is like: hs1
      // replace the "hs" with "chr"

      assert( soTemp.bStartsWithAndRemove("hs") );

      RWCString soChromosome = "chr" + soTemp;


      fprintf( pAO, "printCladeInformativeColumns {\n" );

      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         if ( ! aSpeciesInAcceptableColumns[ nConsPos ] ) continue;

         RWCString soColumnOfBases = aArrayOfBasesAtEachConsPos[ nConsPos ];

         char cChimp = cGetAgreeingBasesNoNs( soColumnOfBases[nPPan],
                                              soColumnOfBases[nPTro] );

         // check if they disagree
         if ( cChimp == 0 ) continue;

         if ( soColumnOfBases[nHSap] == cChimp &&
              cChimp == soColumnOfBases[nGGor] ) continue;

         char cPPygMMul = cGetAgreeingBasesNoNs( soColumnOfBases[nPPyg],
                                                 soColumnOfBases[nMMul ] );

         // they disagree
         if ( cPPygMMul == 0 ) continue;

         // if reached here, PTro and PPan agree, MMul and PPyg agree

         char cHSap = soColumnOfBases[nHSap];
         char cGGor = soColumnOfBases[nGGor];

         RWCString soClade;
         char cNonHumanBase = ' ';
         if ( ( cHSap == cChimp ) && ( cGGor == cPPygMMul ) ) {
            soClade = soHCClade;
            cNonHumanBase = cGGor;
         }
         else if ( ( cHSap == cGGor ) && ( cChimp == cPPygMMul ) ) {
            soClade = soHGClade;
            cNonHumanBase = cChimp;
         }
         else if ( ( cChimp == cGGor ) && ( cHSap == cPPygMMul ) ) {
            soClade = soCGClade;
            cNonHumanBase = cChimp;
         }
         else {
            // this includes cases in which only one species is different
            // or there are 3 species that are all different
            continue;
         }

         RWCString soDeamination;


         if ( aDeaminationMutationsAtEachConsPos[ nConsPos ].isNull() ) {
            soDeamination = "nodeam";
         }
         else {
            soDeamination = "deam  ";
         }

         char cAncestralCpG = aAncestralCpGAtEachConsPos2[ nConsPos ];

         fprintf( pAO, "clade: %2s hum: %c other: %c %s chr: %11s %5s unpad: %4d %s cpg: %c\n",
                  soClade.data(),
                  cHSap,
                  cNonHumanBase,
                  soDeamination.data(),
                  soAddCommas( nUnpaddedIndexStartingAtUserDefinedPosition2( nConsPos ) ).data(),
                  soChromosome.data(),
                  nUnpaddedIndex( nConsPos ),
                  soGetName().data(), // contig name
                  cAncestralCpG );


      } //  for( nConsPos = nGetStartConsensusIndex();

      fprintf( pAO, "} printCladeInformativeColumns\n" );

   }
   else if ( pCP->bAutoReportPrintCpGMutations_ ) {
      fprintf( pAO, "CpG mutations {\n" );

      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
         if ( !aDeaminationMutationsAtEachConsPos[ nConsPos ].isNull() ) {
            fprintf( pAO, "%d %s\n",
                     nUnpaddedIndex( nConsPos ),
                     soGetName().data() );
         }
      }

      fprintf( pAO, "} CpG mutations\n" );

   }
   else if ( pCP->bAutoReportPrintAncestralCpGs_ ) {
      fprintf( pAO, "Ancestral CpGs {\n" );
      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         fprintf( pAO, "%d %c %s\n",
                  nUnpaddedIndex( nConsPos ),
                  aAncestralCpGAtEachConsPos2[ nConsPos ],
                  soGetName().data() );
      }
      fprintf( pAO, "} Ancestral CpGs\n" );
   }
   else if ( pCP->bAutoReportPrintPositionsForGraham_ ) {
      fprintf( pAO, "forGraham {\n" );
      
      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); 
           ++nConsPos ) {

         if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
                  & ucPTro ) ) continue;
      
         if ( ! ( aSpeciesInAcceptableColumns[ nConsPos ]
                  & ucMMul ) ) continue;
         
         if ( aAncestralCpGAtEachConsPos2[ nConsPos ] != cNoCpG ) 
            continue;

         fprintf( pAO, "%3d %s\n",
                  nUnpaddedIndex( nConsPos ),
                  aArrayOfBasesAtEachConsPos[ nConsPos ].data() );
      }
      
      fprintf( pAO, "} forGraham\n" );
      

   }
   else {


      // finally print everything out

      fprintf( pAO, "printTrees {\n" );

      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); 
           ++nConsPos ) {

         if ( ! aSpeciesInAcceptableColumns[ nConsPos ] ) continue;


         // now that we are less aggressive about deleting columns of pads,
         // there are some columns that consist of all pads (only some
         // of the pads are multiple signal because there is a multiple
         // signal base next to it, such as E01_hs1-10143913_PPan_050413.r
         // at 654.  We don't want to print these since there is no information.
         // I still think we shouldn't delete them, since we can't be sure
         // there really isn't a base and thus by deleting them we would
         // be making columns flanked that might not really be.

         if ( bIsAllPadsOrNs( aArrayOfBasesAtEachConsPos[ nConsPos ] ) ) {
            continue;
         }

         fprintf( pAO, "%3d %s", 
                  nUnpaddedIndex( nConsPos ),
                  aArrayOfBasesAtEachConsPos[ nConsPos ].data() );

         if ( !aArrayOfOneTreeAtEachConsPos[ nConsPos ].isNull() ) {
            fprintf( pAO, " %28s",
                     aArrayOfOneTreeAtEachConsPos[ nConsPos ].data() );

         }

         fprintf( pAO, " %s",
                     aDeaminationMutationsAtEachConsPos[ nConsPos ].data() );

         //         fprintf( pAO, " %c",
         //                  aAncestralCpGAtEachConsPos[ nConsPos ] );

      
         // contig name
         fprintf( pAO, " %s\n",
                  soGetName().data() );
      }

      fprintf( pAO, "} printTrees\n" );

   }
   

}


void Contig :: checkForDeaminationMutationsAmongMultipleTrees( 
                  RWTValOrderedVector<RWCString>* pTreesPositionA,
                  RWTValOrderedVector<RWCString>* pTreesPositionB,
                  const int nConsPos,
                  RWTValVector<bool>* pChosenTreesPositionA,
                  RWTValVector<bool>* pChosenTreesPositionB ) {


   // now the trees look like this:

   //         4  5  6  7  8 9
   // c(c(c(c(c?,c),c),c),c,c?)
   // 0 1 2 3 PPan  HSap  PPyg
   //            PTro  GGor MMul

   // all numbers below are string positions:
   // so internal node n goes to present day species 8-n and internal node
   // n+1.  2 exceptions:  internal node 0 goes to present day species
   // 8 and 9.  Internal node 3 goes to present day species 4 and 5.


   int nPairsInWhichAncestralIsCpG = 0;
   int nPairsInWhichDeaminationMutation = 0;

   int nGreatestNumberOfDeaminationMutationsInAnyPair = 0;

   RWTValOrderedVector<int> aPairsOfTreesWithGreatestNumberPositionA( (size_t) 2);
   RWTValOrderedVector<int> aPairsOfTreesWithGreatestNumberPositionB( (size_t) 2);


   for( int nTreeA = 0; nTreeA < pTreesPositionA->length(); ++nTreeA ) {

      if ( !(*pChosenTreesPositionA)[ nTreeA ] ) continue;
      
      RWCString soTreeA = (*pTreesPositionA)[ nTreeA ];

      // get rid of ? for now

      soTreeA.removeSingleCharacterAllOccurrences('?');
      soTreeA.removeSingleCharacterAllOccurrences('(');
      soTreeA.removeSingleCharacterAllOccurrences(')');
      soTreeA.removeSingleCharacterAllOccurrences(',');
      
      for( int nTreeB = 0; nTreeB < pTreesPositionB->length(); ++nTreeB ) {

         if ( !(*pChosenTreesPositionB)[ nTreeB ] ) continue;

         RWCString soTreeB = (*pTreesPositionB)[ nTreeB ];

         soTreeB.removeSingleCharacterAllOccurrences( '?' );
         soTreeB.removeSingleCharacterAllOccurrences( '(' );
         soTreeB.removeSingleCharacterAllOccurrences( ')' );
         soTreeB.removeSingleCharacterAllOccurrences( ',' );


         // now we have a pair of trees, soTreeA and soTreeB

         // abcdef
         // ^ top internal node, leading to PPyg and MMul 0
         //  ^ next internal node, leading to GGor 1
         //   ^ next internal node, leading to HSap, I hope! 2
         //    ^ next internal node, leading to PTro 3
         //     ^ PPan
         //      ^ PTro
         //       ^ GGor
         //    etc.

         // so there are 4 internal nodes to be concerned about

         bool bSomeInternalNodeIsACpG = false;
         bool bDeaminationMutationThisPairOfTrees = false;
         int nNumberOfDeaminationMutationsThisPairOfTrees = 0;
         

         for( int nInternalNode = 0; nInternalNode <= nMaxInternalNode; 
              ++nInternalNode ) {

            RWCString soAncestralSequence = 
               RWCString( soTreeA[ nInternalNode ] ) + 
               RWCString( soTreeB[ nInternalNode ] );

            
            // special case of MMul in which the 
            // deamination mutation occurs on the short part of the
            // branch that goes down to the right to the top 
            // internal node.  In such a case, the MMul present day
            // base is actually older than the top internal node.  
            // Thus we look for a CpG at the MMul present day bases
            // and some deamination mutation at the top internal node

            if ( nInternalNode == 0 ) {
               int nIndexOfPresentDayBases = 9;

               RWCString soPresentDayBases =
                  RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
                  RWCString( soTreeB[ nIndexOfPresentDayBases ] );

               if ( soPresentDayBases == "cg" ) {

                  bSomeInternalNodeIsACpG = true;

                  if ( soAncestralSequence == "ca" ) {

                     bDeaminationMutationThisPairOfTrees = true;
                     ++nNumberOfDeaminationMutationsThisPairOfTrees;

                  }
                  else if ( soAncestralSequence == "tg" ) {

                     bDeaminationMutationThisPairOfTrees = true;
                     ++nNumberOfDeaminationMutationsThisPairOfTrees;
                  }
               }
            }
               

            if ( soAncestralSequence != "cg" ) continue;


            bSomeInternalNodeIsACpG = true;


            // if reached here, ancestral sequence is a CpG
            // is there a mutation from this position?

            // first, check if there is a mutation down-right to
            // the corresponding present-day node


            int nIndexOfPresentDayBases =
               8 - nInternalNode;

            RWCString soPresentDayBases =
               RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
               RWCString( soTreeB[ nIndexOfPresentDayBases ] );

            if ( soPresentDayBases == "ca" ) {
               
               bDeaminationMutationThisPairOfTrees = true;

               ++nNumberOfDeaminationMutationsThisPairOfTrees;


            }
            else if ( soPresentDayBases == "tg" ) {


               bDeaminationMutationThisPairOfTrees = true;

               ++nNumberOfDeaminationMutationsThisPairOfTrees;

            }


            // case of top internal node
            if ( nInternalNode == 0 ) {
               // consider additionally deamination mutations on branch to
               // MMul

               nIndexOfPresentDayBases = 9;

               soPresentDayBases =
                  RWCString( soTreeA[ nIndexOfPresentDayBases ] ) +
                  RWCString( soTreeB[ nIndexOfPresentDayBases ] );

               if ( soPresentDayBases == "ca" ) {

                  bDeaminationMutationThisPairOfTrees = true;

                  ++nNumberOfDeaminationMutationsThisPairOfTrees;


               }
               else if ( soPresentDayBases == "tg" ) {
                  
                  bDeaminationMutationThisPairOfTrees = true;

                  ++nNumberOfDeaminationMutationsThisPairOfTrees;

               }
            }


            // if nInternalNode is 0-3, the next node is the
            // descendant internal node.  if nInternalNode is
            // 4, the next node is PPan

            RWCString soDescendantInternalNode =
               RWCString( soTreeA[ nInternalNode + 1 ] ) +
               RWCString( soTreeB[ nInternalNode + 1 ] );

            if ( soDescendantInternalNode == "ca" ) {

               bDeaminationMutationThisPairOfTrees = true;

               ++nNumberOfDeaminationMutationsThisPairOfTrees;

            }
            else if ( soDescendantInternalNode == "tg" ) {

               bDeaminationMutationThisPairOfTrees = true;

               ++nNumberOfDeaminationMutationsThisPairOfTrees;

            }

         } //         for( int nInternalNode = 0; nInternalNode < 5; 

         if ( bSomeInternalNodeIsACpG ) {
            ++nPairsInWhichAncestralIsCpG;
         }

         if ( bDeaminationMutationThisPairOfTrees ) {
            ++nPairsInWhichDeaminationMutation;
         }


         if ( nNumberOfDeaminationMutationsThisPairOfTrees >=
              nGreatestNumberOfDeaminationMutationsInAnyPair ) {

            if ( nNumberOfDeaminationMutationsThisPairOfTrees > 
                 nGreatestNumberOfDeaminationMutationsInAnyPair ) {
               nGreatestNumberOfDeaminationMutationsInAnyPair =
                  nNumberOfDeaminationMutationsThisPairOfTrees;
               aPairsOfTreesWithGreatestNumberPositionA.clear();
               aPairsOfTreesWithGreatestNumberPositionB.clear();
            }

            aPairsOfTreesWithGreatestNumberPositionA.insert( nTreeA );
            aPairsOfTreesWithGreatestNumberPositionB.insert( nTreeB );

         }

      } //      for( int nTreeB = 0; nTreeB < pTreesPositionB->length
   } // for( int nTreeA = 0; nTreeA < pTreesPositionA->length()


   if ( pCP->bAutoReportPrintCpGMutations_ ) {

      if ( nPairsInWhichAncestralIsCpG != 0 ) {

         fprintf( pAO, "cpg_mutations %d trees with ancestral CpG %d trees with deamination mutation %d pairs of trees %d %s\n",
                  nPairsInWhichAncestralIsCpG,
                  nPairsInWhichDeaminationMutation,
                  pTreesPositionB->length() *
                  pTreesPositionA->length(),
                  nUnpaddedIndex( nConsPos ),
                  soGetName().data() );
      }


      if ( pTreesPositionA->length() * pTreesPositionB->length() > 1 ) {
         fprintf( pAO, "multiple trees %d equal CpG trees: %d\n",
                  pTreesPositionA->length() * pTreesPositionB->length(),
                  aPairsOfTreesWithGreatestNumberPositionA.length() );
      }


      if ( nGreatestNumberOfDeaminationMutationsInAnyPair > 0 ) {
         fprintf( pAO, "trees with greatest number of deamination mutations %d at %d of %s is %d",
                  nGreatestNumberOfDeaminationMutationsInAnyPair,
                  nUnpaddedIndex( nConsPos ),
                  soGetName().data(),
                  aPairsOfTreesWithGreatestNumberPositionA.length()
                  );
         
         if ( aPairsOfTreesWithGreatestNumberPositionA.length() > 1 ) {
            // multiple pairs with same # of deamination mutations

            for( int nPair = 0; 
                 nPair < aPairsOfTreesWithGreatestNumberPositionA.length();
                 ++nPair ) {

               int nTreePositionA = aPairsOfTreesWithGreatestNumberPositionA[ nPair ];
               int nTreePositionB = aPairsOfTreesWithGreatestNumberPositionB[ nPair ];

               RWCString soTreePositionA = (*pTreesPositionA)[ nTreePositionA ];
               RWCString soTreePositionB = (*pTreesPositionB)[ nTreePositionB ];

               fprintf( pAO, " pair %s %s",
                        soTreePositionA.data(),
                        soTreePositionB.data() );

            }
         }

         fprintf( pAO, "\n" );


      }

                  
   } //     if ( pCP->bAutoReportPrintCpGMutations_ ) {


   if ( pCP->bAutoReportChooseTreesByCountingDeaminationMutations_ ) {

      int nNumberOfChosenTreesPositionA = 0;
      for( int nTreeA = 0; nTreeA < pChosenTreesPositionA->length(); ++nTreeA ) {
         if ( (*pChosenTreesPositionA)[ nTreeA ] ) {
            ++nNumberOfChosenTreesPositionA;

         }
      }

      int nNumberOfChosenTreesPositionB = 0;
      for( int nTreeB = 0; nTreeB < pChosenTreesPositionB->length(); ++nTreeB ) {
         if ( (*pChosenTreesPositionB)[ nTreeB ] ) {
            ++nNumberOfChosenTreesPositionB;

         }
      }


      if ( aPairsOfTreesWithGreatestNumberPositionA.length() <
           nNumberOfChosenTreesPositionA * nNumberOfChosenTreesPositionB ) {

         // we can reduce the number of chosen trees by counting
         // deamination mutations

         // But does it actually reduce the number of trees?  Or just
         // the number of pairs of trees?

         RWTValVector<bool> aATrees( (size_t) pTreesPositionA->length(),
                                     false,
                                     "aATrees" );

         int nPair;
         for( nPair = 0; 
              nPair < aPairsOfTreesWithGreatestNumberPositionA.length();
              ++nPair ) {

            int nAIndex = aPairsOfTreesWithGreatestNumberPositionA[ nPair ];
            aATrees[nAIndex] = true;
         }

         
         // rather than counting, let's directly compare to 
         // the pChosenTreesPositionA

         int nNumberOfTreesEliminatedPositionA = 0;
         for( int nTreeA = 0; nTreeA < aATrees.length(); ++nTreeA ) {
            if ( (*pChosenTreesPositionA)[ nTreeA ] &&
                 !aATrees[nTreeA] ) {
               ++nNumberOfTreesEliminatedPositionA;

               (*pChosenTreesPositionA)[ nTreeA ] = false;
            }
         }

         // do same with B

         RWTValVector<bool> aBTrees( (size_t) pTreesPositionB->length(),
                                     false,
                                     "aBTrees" );

         for( nPair = 0;
              nPair < aPairsOfTreesWithGreatestNumberPositionB.length();
              ++nPair ) {
            int nBIndex = aPairsOfTreesWithGreatestNumberPositionB[ nPair ];
            aBTrees[ nBIndex ] = true;
         }

         int nNumberOfTreesEliminatedPositionB = 0;
         for( int nTreeB = 0; nTreeB < aBTrees.length(); ++nTreeB ) {
            if ( (*pChosenTreesPositionB)[ nTreeB ] &&
                 !aBTrees[nTreeB] ) {
               ++nNumberOfTreesEliminatedPositionB;

               (*pChosenTreesPositionB)[ nTreeB ] = false;
            }
         }

      } //       if ( aPairsOfTreesWithGreatestNumberPositionA.length() <
   } //    if ( pCP->bAutoReportChooseTreesByCountingDeaminationMutations_ ) {

}


// this differs from bGetCombinedReadAtBase in that it guesses at the
// base when the data is bad.  It also does NOT add quality values--it
// assumes that is already done.
   
bool Contig :: bGetCombinedReadAtBase3( 
            const bool bAddQualityValuesOfStrands,
            LocatedFragment* pForwardRead,
            MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray,
            LocatedFragment* pReverseRead,
            MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray,
            const int nConsPos,
            char& cCombinedBase,
            int& nCombinedQuality,
            bool& bBaseALittleOK ) {


   bool bBaseOK = true;
   bBaseALittleOK = true; // will be changed if necessary


   bool bForwardReadBaseOK =
      ( pForwardRead && 
        pForwardRead->bIsInAlignedPartOfRead( nConsPos ) &&
        pForwardRead->bIsInHighQualitySegmentOfRead( nConsPos ) &&
        ( (*pForwardReadSingleSignalArray)[ nConsPos ] == cSingleSignal )
        ) ? true : false;

   bool bReverseReadBaseOK =
      ( pReverseRead && 
        pReverseRead->bIsInAlignedPartOfRead( nConsPos ) &&
        pReverseRead->bIsInHighQualitySegmentOfRead( nConsPos ) &&
        ( (*pReverseReadSingleSignalArray)[ nConsPos ] == cSingleSignal )
        ) ? true : false;

   if ( bForwardReadBaseOK && bReverseReadBaseOK ) {

      if (  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
            pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

         cCombinedBase = pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();

         if ( bAddQualityValuesOfStrands ) {
            nCombinedQuality = 
               pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() 
               +
               pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
         }
         else {
            nCombinedQuality =
               MAX( 
               pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
               pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() 
               );
         }

      }
      else {
         // reads disagree.  Which one should we use?
         // will just use one read.  Which is it?
               
         if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
              ==
              pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
            // I don't know what to do in this case so let's ignore
            // such data points
            bBaseOK = false;
         }
         else if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >
                   pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
            nCombinedQuality = pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

            cCombinedBase = pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
         }
         else {

            nCombinedQuality = pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            cCombinedBase = pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
         }
      }
   } //  if ( bForwardReadBaseOK && bReverseReadBaseOK ) {
   else if ( bForwardReadBaseOK ) {
      nCombinedQuality = pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

      cCombinedBase = pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
   }
   else if ( bReverseReadBaseOK ) {
      nCombinedQuality = pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
      cCombinedBase = pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
   }
   else {
      // neither read's base is ok
      bBaseOK = false;
   }


   if ( nCombinedQuality <  pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ )
      bBaseOK = false;

   if ( !bBaseOK ) {

      // now go to work trying to guess correct base

      bool bForwardReadBaseOK2 =
         ( pForwardRead && 
           pForwardRead->bIsInAlignedPartOfRead( nConsPos ) ) ? true : false;


      bool bReverseReadBaseOK2 =
         ( pReverseRead && 
           pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) ? true : false;

      if ( pCP->bAutoReportFlankingBasesMustBeSingleSignal_ && 
           pForwardReadSingleSignalArray ) 
         bForwardReadBaseOK2 = bForwardReadBaseOK2 &&
            ( (*pForwardReadSingleSignalArray)[ nConsPos ] == cSingleSignal );


      if ( pCP->bAutoReportFlankingBasesMustBeSingleSignal_ && 
           pReverseReadSingleSignalArray )
         bReverseReadBaseOK2 =  bReverseReadBaseOK2 &&
            ( (*pReverseReadSingleSignalArray)[ nConsPos ] == cSingleSignal );


      if ( pCP->bAutoReportFlankingBasesMustBeInHighQualitySegment_ &&
           pForwardRead )
         bForwardReadBaseOK2 = bForwardReadBaseOK2 &&
            pForwardRead->bIsInHighQualitySegmentOfRead( nConsPos );

      if ( pCP->bAutoReportFlankingBasesMustBeInHighQualitySegment_ &&
           pReverseRead )
         bReverseReadBaseOK2 = bReverseReadBaseOK2 
            && pReverseRead->bIsInHighQualitySegmentOfRead( nConsPos );

      
      if ( bForwardReadBaseOK2 && bReverseReadBaseOK2 ) {
         
         if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
              pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

            cCombinedBase = pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
            if ( bAddQualityValuesOfStrands ) {
               nCombinedQuality = 
                  pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() 
                  +
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            }
            else {
               nCombinedQuality =
                  MAX( 
                      pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
                      pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() 
                      );
            }
         }
         else {
            // reads disagree.  Which one should we use?
            // will just use one read. 

            if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
                 >=   // sic.  Arbitrarily choose forward read if they disagree

                 pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
               cCombinedBase = 
                  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();

               nCombinedQuality = 
                  pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            }
            else {
               cCombinedBase =
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase();

               nCombinedQuality =
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            }
         }
      }
      else if ( bForwardReadBaseOK2 ) {
         cCombinedBase = pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
         nCombinedQuality = pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
      }
      else if ( bReverseReadBaseOK2 ) {
         cCombinedBase = pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase();
         nCombinedQuality = pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
      }
      else {
         // neither base is even aligned or exists

         bBaseALittleOK = false;

      }


      if ( nCombinedQuality < 
           pCP->nAutoReportMinimumQualityOfFlankingBases_ ) {
         bBaseALittleOK = false;
      }
   } //    if ( !bBaseOK ) {


   return bBaseOK;
}