/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "contig.h"
#include    "consedParameters.h"
#include    "locatedFragment.h"
#include    "bIntersect.h"
#include    "mbtValVectorOfBool.h"
#include    "mbtValVector.h"
#include    "rwtptrorderedvector.h"
#include    "ntide.h"
#include    "fileDefines.h"
#include    "soAddCommas.h"
#include    "rwctokenizer.h"
#include    "autoReport.h"
#include    "consed.h"
#include    "assembly.h"
#include    "nNumberFromBase.h"
#include    "setErrorRateFromQuality.h"
#include    "dErrorRateFromQuality.h"
#include    "singletInfo.h"
#include    "readPHDAgainForHighQualitySegment.h"
#include    <stdlib.h>
#include    <stdio.h>
#include    <dirent.h>
#include    "rwcregexp.h"
#include    "getAllSinglets.h"
#include    "mbt_val_ord_offset_vec.h"
#include    "soLine.h"
#include    "max.h"
#include    <math.h>


static bool bDebug = false;

unsigned char cCpGMatrix[256][256];

static void setCpGMatrix() {

   for( int n1 = 0; n1 < 256; ++n1 ) {
      for( int n2 = 0; n2 < 256; ++n2 ) {
         cCpGMatrix[n1][n2] = 0;
      }
   }

   cCpGMatrix['c']['g'] = 1;
   cCpGMatrix['t']['g'] = 2;
   cCpGMatrix['c']['a'] = 4;

}




void Contig :: printDiscrepantRegions() {

   setCpGMatrix();

   
   int nBases = nGetNumberOfBasesInContigNotBackbone( "HSap" );

   fprintf( pAO, "\n\nAutoReportPrintAmbiguouslyAlignedRegions {\n" );
   fprintf( pAO, "CONTIG: %s %d\n\n", 
            soGetName().data(),
            nBases );

   


   int nContigLengthPlusSentinels = nGetPaddedConsLength() +
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;

   int nContigLengthWithoutSentinels = nGetPaddedConsLength();

   mbtValVectorOfBool aDiscrepantPositions( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aDiscrepantPositionsHighQuality( nContigLengthPlusSentinels,
                                                       1, // starts at 1
                                                       "Contig::aDiscrepantPositionsHighQuality" );


   mbtValVectorOfBool aPadPositions( nContigLengthPlusSentinels,
                                     1, // starts at 1
                                     "Contig::aPadPositions" );


   mbtValVector<unsigned char> aLeftSideOfCpG( nContigLengthPlusSentinels,
                                               1, // starts at 1
                                               0 ); // initial value

   aLeftSideOfCpG.soName_ = "Contig::aLeftSideOfCpG";

   mbtValVector<Quality> aWorseQualityAtDiscrepantColumn( nContigLengthPlusSentinels,
                                                          1, // starts at 1
                                                          99 ); // initial value
   aWorseQualityAtDiscrepantColumn.soName_ = "Contig::aWorseQualityAtDiscrepantColumn";

   mbtValVector<Quality> aBestQuality( nContigLengthPlusSentinels,
                                       1, // starts at 1
                                       0 ); // initial value

   typedef RWTPtrOrderedVector<LocatedFragment> arrayOfPointersToLocatedFragments;

   mbtPtrVector<arrayOfPointersToLocatedFragments> 
      aArraysOfDiscrepantReads( nContigLengthPlusSentinels,
                                1); // starts at 1

   aArraysOfDiscrepantReads.soName_ = "Contig::aArraysOfDiscrepantReads";


//    int nRead;
//    for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
//       LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );
      
//       for( int n
//    for( int nConsPos = nGetStartConsensusIndex(); 
//         nConsPos <= ( nGetEndConsensusIndex() - 1 ); ++nConsPos ) {
      
//       char cBaseA = ntGetCons( nConsPos ).cGetBase();
//       char cBaseB = ntGetCons( nConsPos + 1 ).cGetBase();

// //       if ( ( cBaseA == 'c' && cBaseB == 'g' ) ||
// //            ( cBaseA == 'c' && cBaseB == 'a' ) ||
// //            ( cBaseA == 't' && cBaseB == 'g' ) ) {
// //          aLeftSideOfCpG.setValue( nConsPos, true );
// //       }

//       if ( cCpGMatrix[cBaseA][cBaseB] ) {
//          aLeftSideOfCpG |= cCpGMatrix[cBaseA][cBaseB];
//       }


//    }


   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLengthPlusSentinels,
                                                 1, // starts at 1
                                                 "Contig::aAlignedReadsBesidesHuman" );

   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         aAlignedReadsBesidesHuman.setValue( nConsPos, true );
         
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() 
              > aBestQuality[ nConsPos ] ) {
            aBestQuality[ nConsPos ] = 
               pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99();
         }
      }
   }
      


   // record CpG's (and mutant forms), discrepancies, and pads.
   // Do this for all reads, including human backbone, except
   // for specifically ignored reads

   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {

      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


      bool bIgnoreThisRead = false;
      for( int nPattern = 0; nPattern < pCP->aAutoReportPatternsOfReadsToIgnore_.length();
           ++nPattern ) {
         if ( pLocFrag->soGetName().bContains( pCP->aAutoReportPatternsOfReadsToIgnore_[ nPattern ] ) ) {
            bIgnoreThisRead = true;
            break;
         }
      }

      if ( bIgnoreThisRead ) continue;


      // we are only interested in the part of the read that is
      // high quality segment, aligned, and on the consensus

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;


      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos; 
              ++nConsPos ) {

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ||
              ntGetCons( nConsPos ).bIsPad() ) {

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                 ntGetCons( nConsPos ).cGetBase() ) {



               aPadPositions.setValue( nConsPos, true );
            }
         }

         // check for CpG -- this needs to include human. 
         // Note that this means that if there is a CpG in which only
         // one of the forms is above the quality threshold, it will
         // not be considered a CpG

         if ( nConsPos < nRightConsPos ) {
            char cBaseA = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase();
            char cBaseB = pLocFrag->ntGetFragFromConsPos( nConsPos + 1 ).cGetBase();
            if ( cCpGMatrix[cBaseA][cBaseB] ) {
               aLeftSideOfCpG[ nConsPos ] |= cCpGMatrix[cBaseA][cBaseB];
            }
         }


         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase()
              != ntGetCons( nConsPos ).cGetBase() ) {
            aDiscrepantPositions.setValue( nConsPos, true );

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() 
                >=
                 pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) {
               aDiscrepantPositionsHighQuality.setValue( nConsPos, true );
            }


            arrayOfPointersToLocatedFragments* pArrayOfDiscrepantReads
               = aArraysOfDiscrepantReads[ nConsPos ];

            if ( !pArrayOfDiscrepantReads ) {
               RWCString soName = "RWTPtrOrderedVector<LocatedFragment> at nConsPos = ";
               soName += RWCString( (long) nConsPos );

               pArrayOfDiscrepantReads = 
                  new RWTPtrOrderedVector<LocatedFragment>( 
                         3, // initial capacity
                         soName );

               aArraysOfDiscrepantReads[ nConsPos ] =
                  pArrayOfDiscrepantReads;
            }

            pArrayOfDiscrepantReads->insert( pLocFrag );

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() 
                 < aWorseQualityAtDiscrepantColumn[ nConsPos ] ) {
               aWorseQualityAtDiscrepantColumn[ nConsPos ] =
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99();
            } 
               
         }

      }
   }




   // scan again looking for a discrepancy preceeded by at least
   // nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_



   // mark all locations for which there are at least
   // pCP->nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_
   // non-discrepant positions up to and including that base

   // start this out by being 1 wherever there are high quality segment
   // aligned reads (not the ones to ignore).  Thus this
   // array will be 0 at the beginning and end of the consensus
   // where there are no reads other than the human backbone

   mbtValVectorOfBool aStretchesOfAgreeingBases( nContigLengthPlusSentinels,
                                                 1, // starts at 1
                                                 "Contig::aStretchesOfAgreeingBases" );



   bool bInAgreeingRegion = false;
   int nPositionsThatAgree = 0;
   int nConsPos;
   for( nConsPos = 1; nConsPos <= aDiscrepantPositions.length(); ++nConsPos ) {

      // there must be aligned bases besides human that have
      // no discrepancies for it to be considered 
      // aStretchesOfAgreeingBases true.  Thus if we hit
      // a region of no such aligned reads, 
      // this region is not aStretchesOfAgreeingBases.  Furthermore,
      // further to the right where there are aligned reads, there must 
      // be several columns of agreement before we consider it
      // a aStretchesOfAgreeingBases.  Note that there is a potential
      // problem:
      //    aaaaaaaaaaaaaaaaaaaaaaaaaaaa one read
      //        daaaaaaaaaaaaaaaaaaaaaaa another read
      //        ^ this would be considered an isolated discrepancy.
      //  However, since there are no aligned agreeing bases in the
      //  same read, we probably don't want it.  This could be
      //  fixed by not allowing discrepancies if they occur 
      //  immediately at the beginning or end of a read.

      if ( !aAlignedReadsBesidesHuman[ nConsPos ] ) {
         bInAgreeingRegion = false;
         continue;
      }


      if ( aDiscrepantPositions[ nConsPos ] ) {
         bInAgreeingRegion = false;
      }
      else {
         if ( bInAgreeingRegion ) {
            ++nPositionsThatAgree;
         }
         else {
            bInAgreeingRegion = true;
            nPositionsThatAgree = 1;
         }

         if ( nPositionsThatAgree >= 
                 pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
            aStretchesOfAgreeingBases.setValue( nConsPos, true );
         }
      }

   }


   // now let's calculate the number of "potential" sites for each
   // needed size of discrepant region






   // now print out minimal regions that are flanked by at
   // least pCP->nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_

   for( nConsPos = 1; nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

      // -----DDDD-----
      //     ^ must be true

      if ( !aStretchesOfAgreeingBases[nConsPos] ) continue;

      // now want to see a range of high quality discrepancies

      // -----DDDD-----
      //      ^ must be true

      if ( !aDiscrepantPositionsHighQuality[ nConsPos + 1 ] ) continue;

      // find first position that doesn't have a hqd.  It must
      // start a stretch of agreeing bases.

      bool bFoundEnd = false;
      int nFirstNonHQD;
      int nConsPos2;
      for( nConsPos2 = nConsPos + 2; nConsPos2 <= nGetEndConsensusIndex();
           ++nConsPos2 ) {
         if ( !aDiscrepantPositionsHighQuality[ nConsPos2 ] ) {
            bFoundEnd = true;
            nFirstNonHQD = nConsPos2;
            break;
         }
      }

      // this would signal that high quality discrepancies went to the
      // end of the consensus--a condition we aren't interested in
      if ( !bFoundEnd ) continue;
         
      // -----DDDD-----
      //              ^ must be true

      if ( 
          !aStretchesOfAgreeingBases[ 
              nFirstNonHQD + 
              pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 
              1 ] 
          ) continue;

      int nLeftDiscrepantRegion = nConsPos + 1;
      int nSizeOfDiscrepantRegion = nFirstNonHQD - nLeftDiscrepantRegion;
      int nRightDiscrepantRegion = nFirstNonHQD - 1;

      // if made it here, this is a high quality isolated discrepant
      // region.  However, it might have pads or CpG mutations in it.
      
      char cPadsInDiscrepantRegion = 'b';

      for( nConsPos2 = nLeftDiscrepantRegion; 
           nConsPos2 <= nRightDiscrepantRegion; ++nConsPos2 ) {

         if ( aPadPositions[ nConsPos2 ] )
            cPadsInDiscrepantRegion = '*';
         
      }

      RWCString soCpG = "no-";

      for( nConsPos2 = nLeftDiscrepantRegion - 1;
           nConsPos2 <= nRightDiscrepantRegion; ++nConsPos2 ) {

         // check with any 2 of CpG mutation's are found here
         // must have 2 or else all 3

         if ( aLeftSideOfCpG[ nConsPos2 ] == 3 ||
              aLeftSideOfCpG[ nConsPos2 ] == 6 ||
              aLeftSideOfCpG[ nConsPos2 ] == 5 ||
              aLeftSideOfCpG[ nConsPos2 ] == 7 ) {
            soCpG = "cpg";
         }
      }
         
         

      // the 1 and 1 on the end are just for the perl script
      fprintf( pAO, "%s %d to %d (%d) %c %s 1 1\n", 
               soGetName().data(),
               nUnpaddedIndexStartingAtUserDefinedPosition( nLeftDiscrepantRegion ),
               nUnpaddedIndexStartingAtUserDefinedPosition( nRightDiscrepantRegion ),
               nSizeOfDiscrepantRegion,
               cPadsInDiscrepantRegion,
               soCpG.data() );

   }

   fprintf( pAO, "} AutoReportPrintAmbiguouslyAlignedRegions\n" );

   fprintf( pAO, "\nPOTENTIAL SITES (size of site, number of sites) {\n" );

   // now print the # of potential sites for discrepant regions
   // of various sizes
   for( int nSizeOfDiscrepantRegion = 1; nSizeOfDiscrepantRegion <= 5; 
        ++nSizeOfDiscrepantRegion ) {
      int nNumberOfPotentialSitesNoPads = -666;

      //      fprintf( pAO, "accurate:\n" );
      int nNumberOfPotentialSites =
         nGetNumberOfPotentialDiscrepantRegions( nSizeOfDiscrepantRegion,
                                                 aStretchesOfAgreeingBases,
                                                 aBestQuality,
                                                 aPadPositions,
                                                 nNumberOfPotentialSitesNoPads );
      int nRoughNumberOfPotentialSites =
         nGetNumberOfPotentialDiscrepantRegions2( nSizeOfDiscrepantRegion );

      // fprintf( pAO, "not quite as rough:\n" );
      int nPadsToo =
         nGetNumberOfPotentialDiscrepantRegions3( nSizeOfDiscrepantRegion );

      int nQualityToo = nGetNumberOfPotentialDiscrepantRegions4( nSizeOfDiscrepantRegion );

      fprintf( pAO, "%d %d %d %d %d %d\n", 
               nSizeOfDiscrepantRegion,
               nNumberOfPotentialSites, 
               nNumberOfPotentialSitesNoPads,
               nRoughNumberOfPotentialSites,
               nPadsToo,
               nQualityToo );

   }
   fprintf( pAO, "} POTENTIAL SITES\n\n" );

}



int Contig :: nGetNumberOfBasesInContigNotBackbone( 
                         const RWCString& soPatternInNameOfBackbone ) {

   int nContigLength = nGetPaddedConsLength();


   mbtValVectorOfBool aAlignedHQSPositions( nContigLength, 
                                             1, // starts at 1
                                             "Contig::aAlignedHQSPositions" );



   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {

      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( soPatternInNameOfBackbone ) ) {
         continue;
      }


      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      // we are only interested in the part of the read that is
      // high quality segment, aligned, and on the consensus

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      // will only count non-pads.  There must be at least 1 hqs aligned
      // read that is not a pad to count it.

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos; ++nConsPos ) {
         if ( !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aAlignedHQSPositions.setValue( nConsPos, true );
         }
      }

//       for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos; ++nConsPos ) {
//          if ( pCP->bAutoReportPrintDiscrepantRegionsButOnlyIfAboveQualityThreshold_ &&
//               ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() <
//                 pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) )
//               continue;

//          if ( !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {

//             aAlignedHQSPositions.setValue( nConsPos, true );
//          }
//       }
   }

   int nTotalAlignedHQSPositions = 0;
   for( int nConsPos = aAlignedHQSPositions.nGetStartIndex();
        nConsPos <= aAlignedHQSPositions.nGetEndIndex();
        ++nConsPos ) {
      if ( aAlignedHQSPositions[ nConsPos ] ) {
         ++nTotalAlignedHQSPositions;

      }
   }
   



   return( nTotalAlignedHQSPositions );
}


      

int Contig :: nGetNumberOfPotentialDiscrepantRegions( 
    const int nSizeOfPotentialDiscrepantRegion,
    const mbtValVectorOfBool& aStretchesOfAgreeingBases,
    const mbtValVector<Quality>& aBestQuality,
    const mbtValVectorOfBool& aPadPositions,
    int& nPotentialSitesNoPads ) {


   
   // -----xxxxx-----
   // ^ this is pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_
   // bases long
   //      ^ this is nSizeOfPotentialDiscrepantRegion long
   //           ^ this is pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_
   // bases long again

   // so if at position nConsPos, there must be a stretch of 
   // pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ 
   // agreeing bases starting at nConsPos 
   // and then there must be another stretch of similar agreeing bases
   // at nConsPos + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + nSizeOfPotentialDiscrepantRegion

   int nLengthOfPattern = 
      2 * pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ +
      nSizeOfPotentialDiscrepantRegion;


   // -----xxxxx-----
   //     ^ here aStretchesOfAgreeingBases should be 1 
   //               ^ here aStretchesOfAgreeingBases should be 1

   int nOffset = nSizeOfPotentialDiscrepantRegion +
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;

   // this allows nConsPos + nOffset to go to 
   // aStretchesOfAgreeingBases.length()
   // which, since this is a 1-based array, is the maximum subscript

   int nLastConsPos = aStretchesOfAgreeingBases.length() - nOffset;

   int nPotentialSites = 0;
   nPotentialSitesNoPads = 0;



   for( int nConsPos = 1; nConsPos <= nLastConsPos; ++nConsPos ) {
      if ( aStretchesOfAgreeingBases[ nConsPos ] &&
           aStretchesOfAgreeingBases[ nConsPos + nOffset ] ) {


         // so this region is flanked by aligned agreeing bases

         // are there any pads within the region?

         bool bPadsInDiscrepantRegion = false;
         for( int nConsPos2 = nConsPos + 1; 
              nConsPos2 <= nConsPos + nSizeOfPotentialDiscrepantRegion;
              ++nConsPos2 ) {
            if ( aPadPositions[ nConsPos2 ] ) {
               bPadsInDiscrepantRegion = true;
               break;
            }
         }


         if ( pCP->bAutoReportPrintDiscrepantRegionsButOnlyIfAboveQualityThreshold_ ) {
            // now require that there be a high quality base at
            // each position within the potentially discrepant region
            // That is from nConsPos + 1 to 
            // nConsPos + nSizeOfPotentialDiscrepantRegion

            // -----xxxx-----
            //      ^ here to
            //         ^ here

            bool bAllHighQuality = true;
            for( int nConsPos2 = nConsPos + 1; 
                 nConsPos2 <= nConsPos + nSizeOfPotentialDiscrepantRegion;
                 ++nConsPos2 ) {
               if ( aBestQuality[ nConsPos2 ] < pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) {
                  bAllHighQuality = false;
                  break;
               }
            }

            if ( bAllHighQuality ) {
               ++nPotentialSites;
               if ( !bPadsInDiscrepantRegion ) {


                  ++nPotentialSitesNoPads;
               }
            }
         }
         else {
            ++nPotentialSites;
            if ( !bPadsInDiscrepantRegion )
               ++nPotentialSitesNoPads;
         }
      } //    if ( aStretchesOfAgreeingBases[ nConsPos ] &&
   } //    for( int nConsPos = 1; nConsPos <= nLastConsPos; ++nConsPos ) 

   return( nPotentialSites );
}


      


int Contig :: nGetNumberOfPotentialDiscrepantRegions2(const int nSizeOfDiscrepantRegion ) {

   
   int nContigLengthPlusSentinels = nGetPaddedConsLength() + 
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;


   mbtValVectorOfBool aDiscrepantPositions( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLengthPlusSentinels,
                                                 1,
                                                 "Contig::aAlignedReadsBesiesHuman" );

   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;


      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         aAlignedReadsBesidesHuman.setValue( nConsPos, true );
         
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
              ntGetCons( nConsPos ).cGetBase() ) {
            aDiscrepantPositions.setValue( nConsPos, true );
         }
      }
   }
      

   // see if we have a 12-base region with no discrepancies, except maybe
   // bbbbbxxbbbbb
   // b, no discrepancy
   // x, possible discrepancy
   
   int nPotentialSites = 0;
   for( int nConsPos = pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1; 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
      //   for( int nConsPos = nStart; nConsPos <= nEnd; ++nConsPos ) {
      int nConsPos2;
      bool bRegionOK = true;
      for( nConsPos2 = nConsPos -  pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_; 
           bRegionOK &&
              nConsPos2 <= nConsPos - 1; ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
              !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      for( nConsPos2 = nConsPos + nSizeOfDiscrepantRegion; 
           bRegionOK &&
              ( nConsPos2 <= 
              ( nConsPos + nSizeOfDiscrepantRegion + 
              pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 1 ) ); 
           ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
            !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      if ( bRegionOK ) {
         ++nPotentialSites;
      }
   }
      
   return nPotentialSites;
}

         

   


// differs from nGetNumberOfPotentialDiscrepantRegions2 in that it
// excludes pads from the "discrepant" region

int Contig :: nGetNumberOfPotentialDiscrepantRegions3( const int nSizeOfDiscrepantRegion ) {

   
   int nContigLengthPlusSentinels = nGetPaddedConsLength() + 
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;


   mbtValVectorOfBool aDiscrepantPositions( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLengthPlusSentinels,
                                                 1,
                                                 "Contig::aAlignedReadsBesiesHuman" );

   mbtValVectorOfBool aPadPositions( nContigLengthPlusSentinels,
                                     1,
                                     "Contig::aPadPositions" );

   // set aPadPositions
   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      // all reads, including human
      //if ( pLocFrag->soGetName().bContains( "HSap" ) ) continue;

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;


      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ||
              ntGetCons( nConsPos ).bIsPad()  ) {

            // notice that we have this additional check which makes
            // sure that there are not agreeing pads in all of the
            // reads that we are considering due to an insertion in a 
            // read that is excluded.

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                 ntGetCons( nConsPos ).cGetBase() ) {
               aPadPositions.setValue( nConsPos, true );
            }
         }
      }
   }



   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;


      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         aAlignedReadsBesidesHuman.setValue( nConsPos, true );
         
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
              ntGetCons( nConsPos ).cGetBase() ) {
            aDiscrepantPositions.setValue( nConsPos, true );
         }
      }
   }
      

   // see if we have a 12-base region with no discrepancies, except maybe
   // bbbbbxxbbbbb
   // b, no discrepancy
   // x, possible discrepancy
   
   int nPotentialSites = 0;
   for( int nConsPos = pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1; nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
      //   for( int nConsPos = nStart; nConsPos <= nEnd; ++nConsPos ) {
      int nConsPos2;
      bool bRegionOK = true;
      for( nConsPos2 = nConsPos - 
              pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_; 
           bRegionOK &&
              nConsPos2 <= nConsPos - 1; ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
              !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      for( nConsPos2 = nConsPos + nSizeOfDiscrepantRegion; bRegionOK &&
              nConsPos2 <= nConsPos + nSizeOfDiscrepantRegion +
              pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 1; 
           ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
            !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      for( nConsPos2 = nConsPos; bRegionOK &&
              nConsPos2 <= nConsPos + nSizeOfDiscrepantRegion - 1; 
           ++nConsPos2 ) {
         if ( aPadPositions[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      if ( bRegionOK ) {


         ++nPotentialSites;
      }
   }
      
   return nPotentialSites;
}

         

// differs from nGetNumberOfPotentialDiscrepantRegions3 in that it
// uses quality to exclude discrepant regions

int Contig :: nGetNumberOfPotentialDiscrepantRegions4( const int nSizeOfDiscrepantRegion ) {

   
   int nContigLengthPlusSentinels = nGetPaddedConsLength() + 
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;


   mbtValVectorOfBool aDiscrepantPositions( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLengthPlusSentinels,
                                                 1,
                                                 "Contig::aAlignedReadsBesiesHuman" );

   mbtValVectorOfBool aPadPositions( nContigLengthPlusSentinels,
                                     1,
                                     "Contig::aPadPositions" );

   mbtValVector<Quality> aBestQuality( nContigLengthPlusSentinels,
                                       1, // starts at 1
                                       0 ); // initial value

   aBestQuality.soName_ = "Contig::aBestQuality";


   // set aPadPositions
   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      // all reads, including human
      //if ( pLocFrag->soGetName().bContains( "HSap" ) ) continue;

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;


      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ||
              ntGetCons( nConsPos ).bIsPad() ) {

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                 ntGetCons( nConsPos ).cGetBase() ) {

               aPadPositions.setValue( nConsPos, true );
            }
         }
      }
   }



   // all reads except human

   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {
         aAlignedReadsBesidesHuman.setValue( nConsPos, true );
         
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
              ntGetCons( nConsPos ).cGetBase() ) {
            aDiscrepantPositions.setValue( nConsPos, true );
         }

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() > 
              aBestQuality[ nConsPos ] ) {
            aBestQuality[ nConsPos ] = 
               pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99();
         }
      }
   }
      

   // see if we have a 12-base region with no discrepancies, except maybe
   // bbbbbxxbbbbb
   // b, no discrepancy
   // x, possible discrepancy
   
   int nPotentialSites = 0;
   for( int nConsPos = pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1; nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
      //   for( int nConsPos = nStart; nConsPos <= nEnd; ++nConsPos ) {
      int nConsPos2;
      bool bRegionOK = true;
      for( nConsPos2 = 
              nConsPos - pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_; 
           bRegionOK && nConsPos2 <= nConsPos - 1; ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
              !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      // e.g., if discrepant region is 2, since nConsPos is at the beginning
      // of the discrepant region, then nConsPos + 6 is the end of 
      // the flanking region

      for( nConsPos2 = nConsPos + nSizeOfDiscrepantRegion; 
           bRegionOK &&
              nConsPos2 <= 
              ( nConsPos + nSizeOfDiscrepantRegion +
                pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 1 );  
           ++nConsPos2 ) {
         if ( aDiscrepantPositions[ nConsPos2 ] ||
            !aAlignedReadsBesidesHuman[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      for( nConsPos2 = nConsPos; bRegionOK &&
              nConsPos2 <= nConsPos + nSizeOfDiscrepantRegion - 1; 
           ++nConsPos2 ) {
         if ( aPadPositions[ nConsPos2 ] ) {
            bRegionOK = false;
         }
      }

      for( nConsPos2 = nConsPos; bRegionOK &&
              nConsPos2 <= nConsPos + nSizeOfDiscrepantRegion - 1; 
           ++nConsPos2 ) {
         if ( aBestQuality[ nConsPos2 ] < 
              pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) {
            bRegionOK = false;
         }
      }


      if ( bRegionOK ) {


         ++nPotentialSites;
      }
   }
      
   return nPotentialSites;
}

         

   
void Contig :: addToMinimumQualityHistogram( 
        RWTValVector<int>& aMinimumQualityHistogram ) {


   // make an array of this contig and an array of bool telling whether
   // a particular location is ok to use or not.  Run through all reads
   // (not human) figuring out the minimum of each column.  After all
   // reads have been read, then run through the quality array and
   // for each column (that should be used), add 1 to the corresponding
   // position in aMinimumQualityHistogram


   int nContigLength = nGetPaddedConsLength();

   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLength,
                                                 1, // starts at 1
                                                 "Contig::aAlignedReadsBesidesHuman" );

   mbtValVector<Quality> aWorstQuality( nContigLength,
                                        1, // starts at 1
                                        100 ); // initial value

   aWorstQuality.soName_ = "Contig::aWorstQuality";

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      // also do not include human read

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos; 
           ++nConsPos ) {

         aAlignedReadsBesidesHuman.setValue( nConsPos, true );

         // pads are not allowed to contribute to the minimum quality

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() <
              aWorstQuality[ nConsPos ] &&
              !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            
            aWorstQuality[ nConsPos ] =
               pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
         }
      }
   }      


   for( int nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

      if ( aAlignedReadsBesidesHuman[ nConsPos ] ) {
         ++aMinimumQualityHistogram[ aWorstQuality[ nConsPos ] ];
      }
   }
}


class humanPrimatePad {
public:
   int nConsPos_;
   bool bPrimatePad_;
   bool bHumanPad_;
   bool bPrimateAlignedBase_;
   Quality qual_;
public:
   humanPrimatePad(const int nConsPos ) : 
      nConsPos_( nConsPos ),
      bPrimatePad_( false ),
      bHumanPad_( false ),
      bPrimateAlignedBase_( false ),
      qual_( 0 ) {}

   humanPrimatePad() : nConsPos_( -666 ),
                     bPrimatePad_( false ),
                     bHumanPad_( false ),
                     bPrimateAlignedBase_( false ),
                     qual_( 0 ) {}

   bool operator==( const humanPrimatePad& hum ) const {
      return( ( nConsPos_ == hum.nConsPos_ ) &&
              ( bPrimatePad_ == hum.bPrimatePad_ ) &&
              ( bHumanPad_ == hum.bHumanPad_ ) &&
              ( bPrimateAlignedBase_ == hum.bPrimateAlignedBase_ ) &&
              ( qual_ == hum.qual_ ) );
   }

};




void Contig :: addNumberOfIsolatedPads( mbtValVector<int>& aIsolatedPrimatePads,
                                        mbtValVector<int>& aIsolatedHumanPads ) {

   int nContigLength = nGetPaddedConsLength();



   RWTValOrderedVector<humanPrimatePad> 
      aHumanPrimatePads( (size_t) nContigLength );

   int nConsPos;
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {
      aHumanPrimatePads.insert( humanPrimatePad( nConsPos ) );
   }
   

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportIsolatedPadsOfReadsWithThisPattern_ ) ) {


         if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

         int nLeftConsPos = pLocFrag->nGetHighQualityStart();
         int nRightConsPos = pLocFrag->nGetHighQualityEnd();

         if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

         if ( !bIntersect( nLeftConsPos,
                           nRightConsPos,
                           pLocFrag->nGetAlignClipStart(),
                           pLocFrag->nGetAlignClipEnd(),
                           nLeftConsPos,
                           nRightConsPos ) ) continue;

         if ( !bIntersect( nLeftConsPos,
                           nRightConsPos,
                           nGetStartConsensusIndex(),
                           nGetEndConsensusIndex(),
                           nLeftConsPos,
                           nRightConsPos ) ) continue;

         for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
              ++nConsPos ) {

            aHumanPrimatePads[ nConsPos ].bPrimateAlignedBase_ = true;

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
                 > aHumanPrimatePads[ nConsPos ].qual_ ) {
               aHumanPrimatePads[ nConsPos ].qual_ =
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            }

            
            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
               aHumanPrimatePads[ nConsPos ].bPrimatePad_ = true;
            }
         }
      }
   }


   // now go through again and look for human pads.  

   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

      if ( ntGetCons( nConsPos ).bIsPad() ) {
         aHumanPrimatePads[ nConsPos ].bHumanPad_ = true;
      }
   }
            

   
   // now go through and look for double pads and remove those columns.

   int nIndex;
   for( nIndex = aHumanPrimatePads.length() - 1; nIndex >= 0; --nIndex ) {
      if ( aHumanPrimatePads[ nIndex ].bPrimatePad_ &&
           aHumanPrimatePads[ nIndex ].bHumanPad_ ) {
         aHumanPrimatePads.removeAt( nIndex );
      }
   }


   // now look for isolated bases


   for( nIndex = 1; nIndex <= ( aHumanPrimatePads.length() - 2 ); 
        ++nIndex ) {

      if ( !aHumanPrimatePads[ nIndex - 1 ].bHumanPad_ &&
           aHumanPrimatePads[ nIndex ].bHumanPad_ &&
           !aHumanPrimatePads[ nIndex + 1 ].bHumanPad_ &&
           aHumanPrimatePads[ nIndex ].bPrimateAlignedBase_ ) {
         ++aIsolatedHumanPads[ aHumanPrimatePads[nIndex].qual_ ];


         // this is just temporary so we can navigate by high
         // quality pads
         if ( aHumanPrimatePads[nIndex].qual_ >=
              pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) {

            fprintf( pAO, "human pad %s %d %d\n",
                     soGetName().data(),
                     nUnpaddedIndex( aHumanPrimatePads[nIndex].nConsPos_ ),
                     (int) aHumanPrimatePads[nIndex].qual_ );

         }
         // end temp


      }

      if ( !aHumanPrimatePads[ nIndex - 1 ].bPrimatePad_ &&
           aHumanPrimatePads[ nIndex ].bPrimatePad_ &&
           !aHumanPrimatePads[ nIndex + 1 ].bPrimatePad_ &&
           aHumanPrimatePads[ nIndex - 1 ].bPrimateAlignedBase_ &&
           aHumanPrimatePads[ nIndex ].bPrimateAlignedBase_ &&
           aHumanPrimatePads[ nIndex + 1 ].bPrimateAlignedBase_ ) {
         ++aIsolatedPrimatePads[ aHumanPrimatePads[nIndex].qual_ ];

         // temporary so we can navigate by high quality pads
         if ( aHumanPrimatePads[nIndex].qual_ >=
              pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) {

            fprintf( pAO, "primate pad %s %d %d\n",
                     soGetName().data(),
                     nUnpaddedIndex( aHumanPrimatePads[nIndex].nConsPos_ ),
                     (int) aHumanPrimatePads[nIndex].qual_ );
         }
         // end temporary

      }
   }
}



typedef mbtValVector<Quality> arrayOfQuality;

void Contig :: countPadsForEachSpecies( 
                    const RWTValOrderedVector<RWCString>& aSpecies,
                    RWTValOrderedVector<int>& aNumberOfPadsInPrimateButNotInHuman,
                    RWTValOrderedVector<int>& aNumberOfPadsInHumanButNotInPrimate,
                    int& nPossibleSites ) {



   RWTPtrOrderedVector<mbtValVectorOfBool> aArrayForEachSpeciesWhetherAlignedHQS;

   RWTPtrOrderedVector<arrayOfQuality> aArrayForEachSpeciesOfHighestQuality;
   RWTPtrOrderedVector<mbtValVectorOfBool> aArrayForEachSpeciesWhetherPad;

   int nSpecies;
   for( nSpecies = 0; nSpecies < aSpecies.length(); ++nSpecies ) {
      
      RWCString soName = "Contig::countPadsForEachSpecies ";
      soName += aSpecies[ nSpecies];

      mbtValVectorOfBool* pArrayOfAlignedHQS = 
         new mbtValVectorOfBool( nGetPaddedConsLength(),
                                 1,
                                 soName + "pArrayOfAligned" );

      arrayOfQuality* pArrayOfQuality = 
         new mbtValVector<Quality>( nGetPaddedConsLength(),
                                    1, // offset
                                    0 ); // initial quality

      mbtValVectorOfBool* pArrayOfWhetherPad =
         new mbtValVectorOfBool( nGetPaddedConsLength(),
                                 1,
                                 soName + "pArrayOfWhetherPad" );

      aArrayForEachSpeciesWhetherAlignedHQS.insert( pArrayOfAlignedHQS );
      aArrayForEachSpeciesOfHighestQuality.insert( pArrayOfQuality );
      aArrayForEachSpeciesWhetherPad.insert( pArrayOfWhetherPad );
   }

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;
      
      int nLeftConsPos = pLocFrag->nGetHighQualityStart();
      int nRightConsPos = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      if ( !bIntersect( nLeftConsPos,
                        nRightConsPos,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nLeftConsPos,
                        nRightConsPos ) ) continue;

      // let's figure out the species of the read

      int nSpeciesOfRead = -1;
      for( int nSpecies = 0; nSpecies < aSpecies.length(); ++nSpecies ) {
         if ( pLocFrag->soGetName().bContains( aSpecies[ nSpecies ] ) ) {
            nSpeciesOfRead = nSpecies;
            break;
         }
      }

      if ( nSpeciesOfRead == -1 ) continue;

      mbtValVectorOfBool* pArrayOfAlignedHQS = 
         aArrayForEachSpeciesWhetherAlignedHQS[ nSpeciesOfRead ];
      arrayOfQuality* pArrayOfQuality = 
         aArrayForEachSpeciesOfHighestQuality[ nSpeciesOfRead ];
      mbtValVectorOfBool* pArrayOfWhetherPad =
         aArrayForEachSpeciesWhetherPad[ nSpeciesOfRead ];

      for( int nConsPos = nLeftConsPos; nConsPos <= nRightConsPos;
           ++nConsPos ) {

         pArrayOfAlignedHQS->setValue( nConsPos, true );
         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
              > (*pArrayOfQuality)[ nConsPos ] ) {
            (*pArrayOfQuality)[ nConsPos ] = 
               pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

            // we are only interested if the highest quality read has
            // a pad

            bool bIsPad = 
               pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad();

            pArrayOfWhetherPad->setValue( nConsPos, bIsPad );
         }
      }
   }


   // now let's see which positions have pads in all species

   mbtValVectorOfBool 
      aArrayAllSpeciesAlignedHQS( 
         nGetPaddedConsLength(),
         1,
         "Contig::countPadsForEachSpecies aArrayForAllSpeciesAlignedHQS" );

   // start out by setting this array true at each consensus
   // position.  If any species is not aligned, set it false.

   int nConsPos;
   for( nConsPos = nGetStartConsensusIndex();
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

      aArrayAllSpeciesAlignedHQS.setValue( nConsPos, true );
   }

   mbtValVectorOfBool
      aArrayAnySpeciesPad( 
         nGetPaddedConsLength(),
         1,
         "Contig::countPadsForEachSpecies aArrayAnySpeciesPad" );

   mbtValVectorOfBool
      aArrayAnySpeciesNotPad(
         nGetPaddedConsLength(),
         1,
         "Contig::countPadsForEachSpecies aArrayAnySpeciesNotPad" );

   mbtValVector<Quality>
      aArrayOfWorstSpeciesQuality( nGetPaddedConsLength(),
                                   1,
                                   100 );
   

   for( nSpecies = 0; nSpecies < 
           aArrayForEachSpeciesWhetherAlignedHQS.length();
        ++nSpecies ) {

      mbtValVectorOfBool* pArrayOfAlignedHQS =
         aArrayForEachSpeciesWhetherAlignedHQS[ nSpecies];
      
      arrayOfQuality* pArrayOfQuality = 
         aArrayForEachSpeciesOfHighestQuality[ nSpecies ];

      mbtValVectorOfBool* pArrayOfWhetherPad =
         aArrayForEachSpeciesWhetherPad[ nSpecies ];

      for( int nConsPos = nGetStartConsensusIndex();
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         if ( !(*pArrayOfAlignedHQS)[nConsPos] ) {
            aArrayAllSpeciesAlignedHQS.setValue( nConsPos, false );
         }

         // special case for human--quality is all 20, but
         // really should be considered 50 or so.

         if ( aSpecies[ nSpecies ] != "HSap" ) {

            if ( aArrayOfWorstSpeciesQuality[ nConsPos ] >
                 (*pArrayOfQuality)[ nConsPos ] ) {
               aArrayOfWorstSpeciesQuality[ nConsPos ] =
                  (*pArrayOfQuality)[ nConsPos ];
            }
         }

         if ( (*pArrayOfWhetherPad)[ nConsPos ] ) {
            aArrayAnySpeciesPad.setValue( nConsPos, true );
         }
         else {
            aArrayAnySpeciesNotPad.setValue( nConsPos, true );
         }
      }
   }



   // find human pad array.  Our rule is that human must have no
   // pad

   mbtValVectorOfBool* pHumanArrayOfWhetherPad = NULL;
      
   for( nSpecies = 0; nSpecies < aSpecies.length(); ++nSpecies ) {
      if ( aSpecies[ nSpecies ].bContains( "HSap" ) ) {
         pHumanArrayOfWhetherPad = aArrayForEachSpeciesWhetherPad[ nSpecies ];
         break;
      }
   }

   assert( pHumanArrayOfWhetherPad );

   
   // now look for a particular motif:
   // b-b where b is no pad, the - can be a pad or not a pad, but must be 
   // of quality above a particular threshold.

   for( nConsPos = nGetStartConsensusIndex() + 1;
        nConsPos <= ( nGetEndConsensusIndex() - 1); ++nConsPos ) {

      if ( aArrayAllSpeciesAlignedHQS[ nConsPos - 1 ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos + 1 ] &&
           !aArrayAnySpeciesPad[ nConsPos - 1 ] &&
           //           aArrayAnySpeciesPad[ nConsPos ] &&
           !aArrayAnySpeciesPad[ nConsPos + 1 ] &&
           //           !(*pHumanArrayOfWhetherPad)[ nConsPos ] && 
           ( aArrayOfWorstSpeciesQuality[ nConsPos ] >=
             pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) ) {

         ++nPossibleSites;
      }



      if ( aArrayAllSpeciesAlignedHQS[ nConsPos - 1 ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos + 1 ] &&
           !aArrayAnySpeciesPad[ nConsPos - 1 ] &&
           aArrayAnySpeciesPad[ nConsPos ] &&
           !aArrayAnySpeciesPad[ nConsPos + 1 ] &&
           !(*pHumanArrayOfWhetherPad)[ nConsPos ] && 
           ( aArrayOfWorstSpeciesQuality[ nConsPos ] >=
             pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) ) {

         // some species has a pad at this position.  Figure out which
         // it is (may be more than one) and increment its counter.

         for( nSpecies = 0; nSpecies < aArrayForEachSpeciesWhetherPad.length();
              ++nSpecies ) {
            
            mbtValVectorOfBool* pArrayOfWhetherPad =
               aArrayForEachSpeciesWhetherPad[ nSpecies ];

            if ( (*pArrayOfWhetherPad)[ nConsPos ] ) {
               ++aNumberOfPadsInPrimateButNotInHuman[ nSpecies ];


               // temporary--make a script looking just
               // for cases in which pad in human, not in chimp
               if ( aSpecies[ nSpecies ] == "PTro" ) {
                  // help in testing

                  fprintf( pAO, "pad %s %d\n",
                           soGetName().data(),
                           nUnpaddedIndex( nConsPos ) );
               }


            }
         }


      }
   } // for( nConsPos 


   for( nConsPos = nGetStartConsensusIndex() + 1;
        nConsPos <= ( nGetEndConsensusIndex() - 1 ); ++nConsPos ) {

      if ( aArrayAllSpeciesAlignedHQS[ nConsPos - 1 ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos ] &&
           aArrayAllSpeciesAlignedHQS[ nConsPos + 1 ] &&
           !aArrayAnySpeciesPad[ nConsPos - 1 ] &&
           !aArrayAnySpeciesPad[ nConsPos + 1 ] &&
           (*pHumanArrayOfWhetherPad)[ nConsPos ] &&
           aArrayAnySpeciesNotPad[ nConsPos ] &&
           ( aArrayOfWorstSpeciesQuality[ nConsPos ] >=
             pCP->nQualityThresholdForFindingHighQualityDiscrepancies_ ) ) {
         

         // b*b human
         // bbb some primate
         // b*b (others might still have pads)
         //  ^ all are good quality
         // all primates present with hqs and aligned

         for( nSpecies = 0; nSpecies < aSpecies.length(); ++nSpecies ) {
            mbtValVectorOfBool* pArrayOfWhetherPad =
               aArrayForEachSpeciesWhetherPad[ nSpecies ];

            if ( !(*pArrayOfWhetherPad)[ nConsPos ] ) {
               ++aNumberOfPadsInHumanButNotInPrimate[ nSpecies ];


               // temporary--make a script looking just
               // for cases in which pad in human, not in chimp
               if ( aSpecies[ nSpecies ] == "PTro" ) {
                  // help in testing

                  fprintf( pAO, "pad %s %d\n",
                           soGetName().data(),
                           nUnpaddedIndex( nConsPos ) );
               }

            }
         }

      }
   }
}
         

      
      
   
                                        
      
void Contig :: printBasesInDiscrepantRegions( autoReport* pAutoReport ) {

   setCpGMatrix();

   
   int nBases = nGetNumberOfBasesInContigNotBackbone( "HSap" );

   fprintf( pAO, "\n\nAutoReportPrintDiscrepantRegionsAndBases {\n" );
   fprintf( pAO, "CONTIG: %s %d\n\n", 
            soGetName().data(),
            nBases );

   int nContigLengthPlusSentinels = nGetPaddedConsLength() +
      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;

   int nContigLengthWithoutSentinels = nGetPaddedConsLength();

   mbtValVectorOfBool aDiscrepantPositions( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aDiscrepantPositionsNotNearEndsOfReads( nContigLengthPlusSentinels, 
                                             1, // starts at 1
                                             "Contig::aDiscrepantPositionsNotNearEndsOfReads" );




//    mbtValVectorOfBool aDiscrepantPositionsHighQuality( nContigLengthPlusSentinels,
//                                                        1, // starts at 1
//                                                        "Contig::aDiscrepantPositionsHighQuality" );



   mbtValVector<unsigned char> aLeftSideOfCpG( nContigLengthPlusSentinels,
                                               1, // starts at 1
                                               0 ); // initial value

   aLeftSideOfCpG.soName_ = "Contig::aLeftSideOfCpG";


   typedef RWTPtrOrderedVector<LocatedFragment> arrayOfPointersToLocatedFragments;

   mbtPtrVector<arrayOfPointersToLocatedFragments> 
      aArraysOfDiscrepantReads( nContigLengthPlusSentinels,
                                1); // starts at 1

   aArraysOfDiscrepantReads.soName_ = "Contig::aArraysOfDiscrepantReads";


   mbtValVectorOfBool aAlignedReadsBesidesHuman( nContigLengthPlusSentinels,
                                                 1, // starts at 1
                                                 "Contig::aAlignedReadsBesidesHuman" );


   // this loop takes the non-human reads and set aAlignedReadsBesidesHuman
   // and aBestQuality

   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      if ( pLocFrag->soGetName().bContains( pCP->soAutoReportBackboneReadHasThisStringInIt_ ) ) continue;

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nHighQualityAlignedConsPosLeft = pLocFrag->nGetHighQualityStart();
      int nHighQualityAlignedConsPosRight = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight ) ) continue;

      if ( !bIntersect( nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight ) ) continue;

      for( int nConsPos = nHighQualityAlignedConsPosLeft; nConsPos <= nHighQualityAlignedConsPosRight;
           ++nConsPos ) {
         aAlignedReadsBesidesHuman.setValue( nConsPos, true );
         
      }
   }
      


   // record CpG's (and mutant forms), discrepancies, and pads.
   // Do this for all reads, including human backbone, except
   // for specifically ignored reads

   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {

      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


      if ( pCP->aAutoReportPatternsOfReadsToIgnore_.bIsContainedBy( pLocFrag->soGetName() ) )
         continue;

      // we are only interested in the part of the read that is
      // high quality segment, aligned, and on the consensus

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      int nHighQualityAlignedConsPosLeft = pLocFrag->nGetHighQualityStart();
      int nHighQualityAlignedConsPosRight = pLocFrag->nGetHighQualityEnd();

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( !bIntersect( nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight ) ) continue;

      if ( !bIntersect( nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight,
                        nGetStartConsensusIndex(),
                        nGetEndConsensusIndex(),
                        nHighQualityAlignedConsPosLeft,
                        nHighQualityAlignedConsPosRight ) ) continue;

      //     <------------------------->
      //       high quality and aligned
      //     ^nHighQualityAlignedConsPosLeft             ^nHighQualityAlignedConsPosRight
      //         ^nHighQualityAlignedNotNearEndConsPosLeft
      //                             ^
      //               nHighQualityAlignedNotNearEndConsPosRight

      
      int nHighQualityAlignedNotNearEndConsPosLeft =
         nHighQualityAlignedConsPosLeft + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;

      int nHighQualityAlignedNotNearEndConsPosRight =
         nHighQualityAlignedConsPosRight - pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;


      for( int nConsPos = nHighQualityAlignedConsPosLeft; nConsPos <= nHighQualityAlignedConsPosRight; 
              ++nConsPos ) {


         // check for CpG -- this needs to include human. 
         // Note that this means that if there is a CpG in which only
         // one of the forms is above the quality threshold, it will
         // not be considered a CpG

         if ( nConsPos < nHighQualityAlignedConsPosRight ) {
            char cBaseA = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase();
            char cBaseB = pLocFrag->ntGetFragFromConsPos( nConsPos + 1 ).cGetBase();
            if ( cCpGMatrix[cBaseA][cBaseB] ) {
               aLeftSideOfCpG[ nConsPos ] |= cCpGMatrix[cBaseA][cBaseB];
            }
         }


         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase()
              != ntGetCons( nConsPos ).cGetBase() ) {
            aDiscrepantPositions.setValue( nConsPos, true );

            if ( nHighQualityAlignedNotNearEndConsPosLeft <= nConsPos &&
                 nConsPos <= nHighQualityAlignedNotNearEndConsPosRight ) 
               aDiscrepantPositionsNotNearEndsOfReads.setValue( nConsPos, true );
               

            

            arrayOfPointersToLocatedFragments* pArrayOfDiscrepantReads
               = aArraysOfDiscrepantReads[ nConsPos ];

            if ( !pArrayOfDiscrepantReads ) {
               RWCString soName = "RWTPtrOrderedVector<LocatedFragment> at nConsPos = ";
               soName += RWCString( (long) nConsPos );

               pArrayOfDiscrepantReads = 
                  new RWTPtrOrderedVector<LocatedFragment>( 
                         3, // initial capacity
                         soName );

               aArraysOfDiscrepantReads[ nConsPos ] =
                  pArrayOfDiscrepantReads;
            }

            pArrayOfDiscrepantReads->insert( pLocFrag );

         }

      }
   }




   // scan again looking for a discrepancy preceeded by at least
   // nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_



   // mark all locations for which there are at least
   // pCP->nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_
   // non-discrepant positions up to and including that base

   // start this out by being 1 wherever there are high quality segment
   // aligned reads (not the ones to ignore).  Thus this
   // array will be 0 at the beginning and end of the consensus
   // where there are no reads other than the human backbone

   mbtValVectorOfBool aStretchesOfAgreeingBases( nContigLengthPlusSentinels,
                                                 1, // starts at 1
                                                 "Contig::aStretchesOfAgreeingBases" );



   bool bInAgreeingRegion = false;
   int nPositionsThatAgree = 0;
   int nConsPos;
   for( nConsPos = 1; nConsPos <= aDiscrepantPositions.length(); ++nConsPos ) {

      // there must be aligned bases besides human that have
      // no discrepancies for it to be considered 
      // aStretchesOfAgreeingBases true.  Thus if we hit
      // a region of no such aligned reads, 
      // this region is not aStretchesOfAgreeingBases.  Furthermore,
      // further to the right where there are aligned reads, there must 
      // be several columns of agreement before we consider it
      // a aStretchesOfAgreeingBases.  Note that there is a potential
      // problem:
      //    aaaaaaaaaaaaaaaaaaaaaaaaaaaa one read
      //        daaaaaaaaaaaaaaaaaaaaaaa another read
      //        ^ this would be considered an isolated discrepancy.
      //  However, since there are no aligned agreeing bases in the
      //  same read, we probably don't want it.  This could be
      //  fixed by not allowing discrepancies if they occur 
      //  immediately at the beginning or end of a read.

      if ( !aAlignedReadsBesidesHuman[ nConsPos ] ) {
         bInAgreeingRegion = false;
         continue;
      }


      if ( aDiscrepantPositions[ nConsPos ] ) {
         bInAgreeingRegion = false;
      }
      else {
         if ( bInAgreeingRegion ) {
            ++nPositionsThatAgree;
         }
         else {
            bInAgreeingRegion = true;
            nPositionsThatAgree = 1;
         }

         if ( nPositionsThatAgree >= 
                 pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
            aStretchesOfAgreeingBases.setValue( nConsPos, true );
         }
      }

   }


   // now let's calculate the number of "potential" sites for each
   // needed size of discrepant region






   // now print out minimal regions that are flanked by at
   // least pCP->nAutoReportNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_

   for( nConsPos = 1; nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {


      // -----DDDD-----
      //     ^ must be true (aStretchesOfAgreeingBases)

      if ( !aStretchesOfAgreeingBases[nConsPos] ) continue;


      // now want to see a range of high quality discrepancies

      // -----DDDD-----
      //      ^ must be true (aDiscrepantPositions)

      // I used to just look at the discrepant positions.  However,
      // there were cases like this:
      // aaaaaaaaaa
      //     DDaaaaaaaa where the DD was at the beginning of the 
      // high quality aligned portion of the read

      if ( !aDiscrepantPositionsNotNearEndsOfReads[ nConsPos + 1 ] ) continue;


      // now look for another stretch of of agreeing bases on the right side

      // -----D-----
      //     ^nConsPos
      //           ^nFirstConsPosToCheck
      // -----DD-----
      // -----DDD-----
      // -----DDDD-----
      //              ^nLastConsPosToCheck


      int nFirstConsPosToCheck = 
         nConsPos + 1 + 
         pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;

      if ( nFirstConsPosToCheck > nGetEndConsensusIndex() ) continue;

      int nLastConsPosToCheck = 
         MIN( nGetEndConsensusIndex(),
              nConsPos + 
              pCP->nAutoReportMaxSizeOfDiscrepantRegion_ 
              +
              pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ );



      int nLeftDiscrepantRegion = nConsPos + 1;
      int nRightDiscrepantRegion;
      int nSizeOfDiscrepantRegion;


      bool bFoundStretchOfAgreeingBases = false;
      for( int nConsPos2 = nFirstConsPosToCheck; 
           nConsPos2 <= nLastConsPosToCheck && !bFoundStretchOfAgreeingBases; 
           ++nConsPos2 ) {
         if ( aStretchesOfAgreeingBases[ nConsPos2 ] ) {
            bFoundStretchOfAgreeingBases = true;
            nSizeOfDiscrepantRegion = nConsPos2 - nFirstConsPosToCheck + 1;
            nRightDiscrepantRegion = 
               nLeftDiscrepantRegion + nSizeOfDiscrepantRegion - 1;
         }
      }


      if ( !bFoundStretchOfAgreeingBases ) continue;


      // if made it here, this is an isolated discrepant
      // region.  However, it might have CpG mutations in it.

      bool bCpG = false;

      for( int nConsPos4 = nLeftDiscrepantRegion - 1;
           nConsPos4 <= nRightDiscrepantRegion; ++nConsPos4 ) {

         // check with any 2 of CpG mutation's are found here
         // must have 2 or else all 3

         if ( aLeftSideOfCpG[ nConsPos4 ] == 3 ||
              aLeftSideOfCpG[ nConsPos4 ] == 6 ||
              aLeftSideOfCpG[ nConsPos4 ] == 5 ||
              aLeftSideOfCpG[ nConsPos4 ] == 7 ) {
            bCpG = true;
         }
      }

      if ( bCpG ) continue;


      // We must make sure that the agreeing region has at least 
      // pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ 
      // non-pads.  So far we have just counted that many padded positions.

      int nFlankingRegionLeft;
      int nNumberOfNonPads = 0;
      bool bFoundEnoughNonPads = false;
      for( int nConsPos5 = nLeftDiscrepantRegion - 1;
           nConsPos5 >= nGetStartConsensusIndex() && !bFoundEnoughNonPads;
           --nConsPos5 ) {

         if ( !ntGetCons( nConsPos5 ).bIsPad() ) {
            ++nNumberOfNonPads;
            if ( nNumberOfNonPads >=
                 pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
               nFlankingRegionLeft = nConsPos5;
               bFoundEnoughNonPads = true;
            }
          }
      }

      // sorry--can't be sure of the alignment
      if ( !bFoundEnoughNonPads ) continue;


      // now look at the right flanking region

      int nFlankingRegionRight;
      nNumberOfNonPads = 0;
      bFoundEnoughNonPads = false;
      for( int nConsPos6 = nRightDiscrepantRegion + 1;
           nConsPos6 <= nGetEndConsensusIndex() && !bFoundEnoughNonPads;
           ++nConsPos6 ) {
         
         if ( !ntGetCons( nConsPos6 ).bIsPad() ) {
            ++nNumberOfNonPads;
            if ( nNumberOfNonPads >=
                 pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) { 
               nFlankingRegionRight = nConsPos6;
               bFoundEnoughNonPads = true;
            }
         }
      }

      if ( !bFoundEnoughNonPads ) continue;


      // now check that these regions are agreeing.  the left flanking
      // region goes from nFlankingRegionLeft to nLeftDiscrepantRegion - 1.
      // Since aStretchOfAgreeingBases is indexed by the right
      // base of the stretch of 
      // pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_
      // bases, we need to check this range:

      bool bOK = true;
      for( int nConsPos7 = nFlankingRegionLeft + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_  - 1;
           nConsPos7 <= nLeftDiscrepantRegion - 1; ++nConsPos7 ) {

         if ( !aStretchesOfAgreeingBases[ nConsPos7 ] ) {
            bOK = false;
            break;
         }
      }

      if ( !bOK ) {
         continue;
      }

      // check the right region that it agrees
      // the right region is from nRightDiscrepantRegion + 1 to
      // nFlankingRegionRight

      for( int nConsPos8 = nRightDiscrepantRegion + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_;
           nConsPos8 <= nFlankingRegionRight;
           ++nConsPos8 ) {
         if ( !aStretchesOfAgreeingBases[ nConsPos8 ] ) {
            bOK = false;
            break;
         }
      }

      if ( !bOK ) {
         continue;
      } 


      // now we want to print out the bases for all species.  
      // If there is no read for a species, print out a "?"

      int nLowestQuality = 200;
      int nHighestQuality = -1;

      for( int nConsPos3 = nLeftDiscrepantRegion;
           nConsPos3 <= nRightDiscrepantRegion; ++nConsPos3 ) {

         // this can happen as follows:

         // aaaaaaDaDaaaaaaaa
         //        ^ in middle of discrepant region

         if ( !aDiscrepantPositions[ nConsPos3 ] ) continue;
         
         
         int nNumberOfSpecies = pAutoReport->aArrayOfSpecies_.length();

         RWTPtrVector<LocatedFragment> aBestReadForSpecies( 
                          (size_t) nNumberOfSpecies,   
                           0 );
         // initialize to 0

         RWTValVector<char> aBaseOfBestReadForSpecies(
                                    (size_t) nNumberOfSpecies,
                                    '?' ); // initialize to ?


         for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
            LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

            if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

            if ( ! ( pLocFrag->nGetHighQualityStart() <= nFlankingRegionLeft 
                     &&
                     nFlankingRegionRight <= pLocFrag->nGetHighQualityEnd() ) )
               continue;

            if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

            if ( ! ( pLocFrag->nGetAlignClipStart() <= nFlankingRegionLeft
                     &&
                     nFlankingRegionRight <= pLocFrag->nGetAlignClipEnd() ) )
               continue;


            // figure out what species this is.


            int nSpecies = 
               pAutoReport->aArrayOfSpecies_.nFindIndexOfFirstContainedBy( pLocFrag->soGetName() );

            if ( nSpecies == RW_NPOS ) {
               cerr << "couldn't find species for: " << pLocFrag->soGetName() << endl;
            }


            assert( nSpecies != RW_NPOS );


            if ( !aBestReadForSpecies[nSpecies] ) {
               aBestReadForSpecies[nSpecies] = pLocFrag;
               aBaseOfBestReadForSpecies[ nSpecies ] = 
                  pLocFrag->ntGetFragFromConsPos( nConsPos3 ).cGetBase();
            }
            else {
               // there already is a read for this species.  Let's see
               // which is better

               LocatedFragment* pOldRead = aBestReadForSpecies[nSpecies];
               
               const int nWindowSize = 11;

               float fOldReadQuality = 
                  pOldRead->fGetAverageQualityInWindow( nConsPos3, 
                                                        nWindowSize );

               float fNewReadQuality =
                  pLocFrag->fGetAverageQualityInWindow( nConsPos3,
                                                        nWindowSize );


               if ( fNewReadQuality > fOldReadQuality ) {
                  aBestReadForSpecies[nSpecies] = pLocFrag;
                  aBaseOfBestReadForSpecies[ nSpecies ] =
                     pLocFrag->ntGetFragFromConsPos( nConsPos3 ).cGetBase();
               }
            }
         } //  for( int nRead = 0; nRead < nGetNumberOfFragsInContig();


         // at this point, we've collected info on all the species at this
         // nConsPos3.  So let's print them out.

         for( int nSpecies = 0; nSpecies < 
                 pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

            putc( aBaseOfBestReadForSpecies[ nSpecies ], pAO );
            putc( ' ', pAO );
         }

         // let's add a little information in case we want to find this
         // location to study it further, such as look at the traces

         fprintf( pAO, "%s %d %s\n",
                  soGetName().data(), // contig name
                  nUnpaddedIndex( nConsPos3 ),
                  ConsEd::pGetAssembly()->soGetAceFileName().data() ); // do we really want to write this so many times?


         // START--looking for informative trees.  At least 2 species
         // for each of 2 different bases.


         bool bFoundTreeWith2Groups = false;
         for( int nQualityCutoff = 70; 
              nQualityCutoff >= 5 && !bFoundTreeWith2Groups;
              nQualityCutoff -= 5 ) {

            int nNumberOfSpeciesForBase[256];

            for( int n = 0; n < 256; ++n ) 
               nNumberOfSpeciesForBase[n] = 0;

            bool bMMulAboveCutoff = false;
            bool bPPygAboveCutoff = false;

            char cMMul = '?';
            char cPPyg = '!';

            for( int nSpecies = 0; nSpecies < 
                    pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

               RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];


               int nQuality = -1;
               if ( soSpecies == "HSap" ) 
                  nQuality = 50;
               else if ( aBestReadForSpecies[ nSpecies ] ) {
                  nQuality = aBestReadForSpecies[ nSpecies ]->ntGetFragFromConsPos( nConsPos3 ).qualGetQuality();
               }

               if ( nQuality >= nQualityCutoff ) {
                  ++nNumberOfSpeciesForBase[ aBestReadForSpecies[ nSpecies ]->ntGetFragFromConsPos( nConsPos3 ).cGetBase() ];

                  if ( soSpecies == "MMul" ) {
                     bMMulAboveCutoff = true;
                     cMMul = aBestReadForSpecies[ nSpecies ]->ntGetFragFromConsPos( nConsPos3 ).cGetBase();
                  }
                  else if ( soSpecies == "PPyg" ) {
                     bPPygAboveCutoff = true;
                     cPPyg = aBestReadForSpecies[ nSpecies ]->ntGetFragFromConsPos( nConsPos3 ).cGetBase();
                  }
               }
            }

            if ( bMMulAboveCutoff && bPPygAboveCutoff ) {
               // check that there are at least 2 groups of species
               // and each group has at least 2 species in it.

               int nNumberOfGroupsWithAtLeast2Species = 0;
               for( int n = 0; n < 256; ++n ) {
                  if ( nNumberOfSpeciesForBase[n] >= 2 ) {
                     ++nNumberOfGroupsWithAtLeast2Species;
                  }
               }


               if ( nNumberOfGroupsWithAtLeast2Species >= 2 ) {
                  // we have found a location with at least 2 bases.
                  // Each of these 2 bases is in common with at least
                  // 2 different species, both with at least quality nQualityCutoff
                  // Furthermore, MMul and PPyg are both above the cutoff.

                  RWCString soPossible = "possible";
                  if ( cMMul != cPPyg &&
                       nNumberOfSpeciesForBase[cMMul] >= 2 &&
                       nNumberOfSpeciesForBase[cPPyg] >= 2 ) {
                     soPossible = "impossible";
                  }



                  fprintf( pAO, "not necessarily problem tree: " );

                  fprintf( pAO, "%s %d %s %d %s\n",
                           soGetName().data(), // contig name
                           nUnpaddedIndex( nConsPos3 ),
                           ConsEd::pGetAssembly()->soGetAceFileName().data(),
                           nQualityCutoff,
                           soPossible.data() );

                  bFoundTreeWith2Groups = true;
               }
            }

         }
         

         // END--looking for informative trees

      } //       for( int nConsPos3 = nLeftDiscrepantRegion;

      int nQualityDifference = nHighestQuality - nLowestQuality;
      fprintf( pAO, "qual difference: %d high: %d low: %d size: %d %d to %d %s\n",
               nQualityDifference,
               nHighestQuality,
               nLowestQuality,
               nSizeOfDiscrepantRegion,
               nUnpaddedIndex( nLeftDiscrepantRegion ),
               nUnpaddedIndex( nRightDiscrepantRegion ),
               soGetName().data() );


      // START--code looking for ambiguously aligned regions
      // look for ambiguous alignments by seeing if a shift can 
      // be done 

      
      bool bAmbiguousOnLeft = false;
      bool bAmbiguousOnRight = false;

      
      for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


         if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

         if ( ! ( pLocFrag->nGetHighQualityStart() <= nFlankingRegionLeft &&
                  nFlankingRegionRight <= pLocFrag->nGetHighQualityEnd() ) )
            continue;

         if ( pLocFrag->bIsWholeReadUnaligned() ) continue;


         if ( ! ( pLocFrag->nGetAlignClipStart() <= nFlankingRegionLeft &&
               nFlankingRegionRight <= pLocFrag->nGetAlignClipEnd() ) )
            continue;

         RWCString soBases( (size_t) pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ );

         for( int nConsPos4 = nFlankingRegionLeft; nConsPos4 <= ( nLeftDiscrepantRegion - 1); 
              ++nConsPos4 ) {
            if ( !pLocFrag->ntGetFragFromConsPos( nConsPos4 ).bIsPad() ) {
               soBases += pLocFrag->ntGetFragFromConsPos( nConsPos4 ).cGetBase();
            }
         }

         
         // now see if there is any ambiguity of the alignment


         if ( soBases.length() > 2 ) {

            int n1Shorter = soBases.length() - 1;
            int n2Shorter = soBases.length() - 2;
            
            if ( soBases.soGetRestOfString( 1 ) == soBases( 0, n1Shorter ) ) {
               // problem--match like this:
               // aaaaaxxxaaaaa
               //  aaaaaxxxaaaaa

               bAmbiguousOnLeft = true;


            }
            else if ( soBases.soGetRestOfString( 2 ) == soBases( 0, n2Shorter ) ) {
               // problem.  Match like this:
               // aaaaaxxxaaaaa
               //   aaaaaxxxaaaaa


               bAmbiguousOnLeft = true;

            }

         }

         soBases = "";
         for( int nConsPos9 = nRightDiscrepantRegion + 1;
              nConsPos9 <= nFlankingRegionRight; ++nConsPos9 ) {
            if ( !pLocFrag->ntGetFragFromConsPos( nConsPos9 ).bIsPad() ) {
               soBases += pLocFrag->ntGetFragFromConsPos( nConsPos9 ).cGetBase();
            }
         }


         if ( soBases.length() > 2 ) {
            
            int n1Shorter = soBases.length() - 1;
            int n2Shorter = soBases.length() - 2;
            
            if ( soBases.soGetRestOfString( 1 ) == soBases( 0, n1Shorter ) ) {

               bAmbiguousOnRight = true;
            }
            else if ( soBases.soGetRestOfString( 2 ) == soBases( 0, n2Shorter ) ) {
               bAmbiguousOnRight = true;

            }
         }

      }//   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); 



      if ( bAmbiguousOnLeft && bAmbiguousOnRight ) {
         fprintf( pAO, "ambiguous alignment left/right unpadded %d to %d %s\n",
                        nUnpaddedIndex( nFlankingRegionLeft ),
                        nUnpaddedIndex( nFlankingRegionRight ),
                        soGetName().data() );

      }
            

      // END--code looking for ambiguously aligned regions
         



   } //    for( nConsPos = 1; nConsPos <= nGetEndConsensusIndex(); 


   fprintf( pAO, "} AutoReportPrintDiscrepantRegionsAndBases\n" );

}


static bool bIsCpG( const char cBaseA1,
                    const char cBaseA2,
                    const char cBaseA3,
                    const char cBaseB1,
                    const char cBaseB2,
                    const char cBaseB3 ) {
   
   
   int nCpGALeft = cCpGMatrix[ cBaseA1 ][ cBaseA2 ];
   int nCpGBLeft = cCpGMatrix[ cBaseB1 ][ cBaseB2 ];

   if ( nCpGALeft && nCpGBLeft && ( nCpGALeft != nCpGBLeft ) )
        return true;

   int nCpGARight = cCpGMatrix[ cBaseA2 ][ cBaseA3 ];
   int nCpGBRight = cCpGMatrix[ cBaseB2 ][ cBaseB3 ];

   if ( nCpGARight && nCpGBRight && ( nCpGARight != nCpGBRight ) )
      return true;


   return false;
}



const int nWindowLow = 10;
const int nWindowHigh = 15;
           

void Contig :: printAgreeDisagreeBetweenPairsOfSpecies( autoReport* pAutoReport ) {

   double dErrors = 0;
   setErrorRateFromQuality();
   int nDiscrepancies = 0;


   setCpGMatrix();


   int nContigLengthPlusSentinel = nGetPaddedConsLength() + 1;
 
   RWTValVector<char> aBaseA( (size_t) nContigLengthPlusSentinel,
                               '?',
                               "aBasesA" );

   RWTValVector<char> aBaseB( (size_t) nContigLengthPlusSentinel,
                               '?',
                               "aBasesB" );

   RWTPtrVector<LocatedFragment> aReadA( (size_t) nContigLengthPlusSentinel,
                                         NULL,
                                         "aReadA" );

   RWTPtrVector<LocatedFragment> aReadB( (size_t) nContigLengthPlusSentinel,
                                         NULL,
                                         "aReadB" );

   RWTValVector<int> aQualityA( (size_t) nContigLengthPlusSentinel,
                                0,
                                "aQualityA" );

   RWTValVector<int> aQualityB( (size_t) nContigLengthPlusSentinel,
                                0,
                                "aQualityB" );

   


   for( int nSpeciesA = 0; nSpeciesA < 
           pAutoReport->aArrayOfSpecies_.length() - 1; ++nSpeciesA ) {

      RWCString soSpeciesA = pAutoReport->aArrayOfSpecies_[ nSpeciesA ];

      for( int nSpeciesB = nSpeciesA + 1; 
           nSpeciesB < pAutoReport->aArrayOfSpecies_.length();
           ++nSpeciesB ) {

         RWCString soSpeciesB = pAutoReport->aArrayOfSpecies_[ nSpeciesB ];


         // clear arrays

         for( int n = 0; n < aBaseA.length(); ++n ) {
            aBaseA[n] = '?';
            aBaseB[n] = '?';
            aReadA[n] = NULL;
            aReadB[n] = NULL;
            aQualityA[n] = 0;
            aQualityB[n] = 0;
         }



         for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
            LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

            // is this one of the species we are interested in?

            if ( !pLocFrag->soGetName().bContains( soSpeciesA ) &&
                 !pLocFrag->soGetName().bContains( soSpeciesB ) )
               continue;

            // yes.

            // get range of high quality aligned sequence

            if ( pLocFrag->bIsWholeReadLowQuality() ) continue;
            if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

            int nAlignedHighQualityLeft = nGetStartConsensusIndex();
            int nAlignedHighQualityRight = nGetEndConsensusIndex();

            if ( !bIntersect( nAlignedHighQualityLeft,
                              nAlignedHighQualityRight,
                              pLocFrag->nGetAlignClipStart(),
                              pLocFrag->nGetAlignClipEnd(),
                              nAlignedHighQualityLeft,
                              nAlignedHighQualityRight ) ) continue;

            if ( !bIntersect( nAlignedHighQualityLeft,
                              nAlignedHighQualityRight,
                              pLocFrag->nGetHighQualityStart(),
                              pLocFrag->nGetHighQualityEnd(),
                              nAlignedHighQualityLeft,
                              nAlignedHighQualityRight ) ) continue;


            RWTValVector<int>* pQualityArray = 0;
            RWTPtrVector<LocatedFragment>* pReadArray = 0;
            RWTValVector<char>* pBaseArray = 0;


            if ( pLocFrag->soGetName().bContains( soSpeciesA ) ) {
               pQualityArray = &aQualityA;
               pReadArray = &aReadA;
               pBaseArray = &aBaseA;
            }
            else {

               pQualityArray = &aQualityB;
               pReadArray = &aReadB;
               pBaseArray = &aBaseB;
            }


            for( int nConsPos = nAlignedHighQualityLeft;
                 nConsPos <= nAlignedHighQualityRight;
                 ++nConsPos ) {


               
               if ( ! ( (*pReadArray)[ nConsPos ] ) ) {
                  (*pReadArray)[ nConsPos ] = pLocFrag;
                  (*pQualityArray)[ nConsPos ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
                  (*pBaseArray)[ nConsPos ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase();


               }
               else {

                  // already a read from the other strand
                  if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                       (*pBaseArray)[ nConsPos ] ) {
                     // what to do?  Let's pretend there was never a base
                     // here on either strand--can't use the data

                     (*pReadArray)[ nConsPos ] = 0;
                     (*pQualityArray)[ nConsPos ] = 0;
                     (*pBaseArray)[ nConsPos ] = '?';

                  }
                  else {
                     // so the bases agree.  Since they are on opposite
                     // strands, let's add the quality values.

                     (*pQualityArray)[ nConsPos ] +=
                        pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
                  }
               }
            } //  for( int nConsPos = nAlignedHighQualityLeft;

         } //  for( int nRead = 0; nRead < nGetNumberOfFragsInContig();

         // now we can do the analysis

         const int nMaxQualityPlus1 = 100;
         int nFrequencyOfAgreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];
         int nFrequencyOfDisagreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];
         int nFrequencyOfCpGDisagreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];

         // zero the arrays

         for( int n1 = 0; n1 < nMaxQualityPlus1; ++n1 ) {
            for( int n2 = 0; n2 < nMaxQualityPlus1; ++n2 ) {
               nFrequencyOfAgreeingBases[n1][n2] = 0;
               nFrequencyOfDisagreeingBases[n1][n2] = 0;
               nFrequencyOfCpGDisagreeingBases[n1][n2] = 0;
            }
         }
               
         bool bHaveAtLeastOneValue = false;

         // Let's look for a motif like this:
         // aaaaa?aaaaa 
         // (5 agreeing bases, a base pair not pad, 5 agreeing bases)


         for( int nConsPos = nGetStartConsensusIndex() + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 1 ;
              nConsPos <= nGetEndConsensusIndex() - pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1; ++nConsPos ) {

            bool bAgreeSoFar = true;

            // look to the left of the pair of aligned bases
            int nConsPos2 = nConsPos; // first -- will make it start
            // to the left
            int nNumberOfNonPadAgreeingBases = 0;
            while( nNumberOfNonPadAgreeingBases <
                   pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
               --nConsPos2;
               if ( nConsPos2 < nGetStartConsensusIndex() ) {
                  bAgreeSoFar = false;
                  break;
               }


               // in the following case:
               //  *
               //  b
               // (a pad aligned against a base)
               // this is counted the same as a disagreeing pair of non-pads
               

               if ( ! ( 
                       aReadA[ nConsPos2 ] && aReadB[ nConsPos2 ] &&
                       ( aBaseA[ nConsPos2 ] == aBaseB[ nConsPos2 ] ) ) ) {
                  bAgreeSoFar = false;
                  break;
               }

               // if made it here, there are reads from both species
               // and they agree
               if ( aBaseA[ nConsPos2 ] != '*' ) {
                  ++nNumberOfNonPadAgreeingBases;
               }
            }

            if ( !bAgreeSoFar ) continue;

            // now look to the right of the pair of aligned bases

            nConsPos2 = nConsPos; // first ++ will make it start
            // to the right
            nNumberOfNonPadAgreeingBases = 0;
            while( nNumberOfNonPadAgreeingBases <
                   pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
               ++nConsPos2;
               if ( nConsPos2 > nGetEndConsensusIndex() ) {
                  bAgreeSoFar = false;
                  break;
               }

               if ( ! ( 
                           aReadA[ nConsPos2 ] && aReadB[ nConsPos2 ] &&
                           ( aBaseA[ nConsPos2 ] == aBaseB[ nConsPos2 ] ) ) ) {
                  bAgreeSoFar = false;
                  break;
               }
                   
               // if made it here, there are reads from both species
               // and they agree

               if ( aBaseA[ nConsPos2] != '*' ) {
                  ++nNumberOfNonPadAgreeingBases;
               }
            }

            if ( !bAgreeSoFar ) continue;

            // we have found such a motif.

            // how could this fail( below) ? (4/29/05 DG)
            // i.e. how could there a read from species A and B at nConsPos?
            // It could fail if there are 2 reads from species A and
            // they disagree with each other and there is no way to
            // reconcile them.


            // should also add that neither is a pad.
            // We are not interested in either of these cases:

            //   bbbbb*bbbbb
            //   bbbbbbbbbbb
            //        ^
            //   We aren't interested in this case because such an indel
            //   has a different probability (hence branch length) than
            //   a substitution mutation.  

            // 
            //   bbbbb*bbbbb
            //   bbbbb*bbbbb
            //        ^
            //   We aren't interested in this case because this isn't
            //   either a mutation or a non-mutation.  It is simply an
            //   artifact of our aligning.

            if ( aReadA[ nConsPos ] && aReadB[ nConsPos ] ) {

               const bool bDisallowIndels = true;

               if ( bDisallowIndels ) {
                  if ( aBaseA[ nConsPos ] == '*' ||
                       aBaseB[ nConsPos ] == '*' )
                     continue;
               }
               else {
                  if ( aBaseA[ nConsPos ] == '*' &&
                       aBaseB[ nConsPos ] == '*' ) continue;
               }
               bHaveAtLeastOneValue = true;

               if ( aBaseA[ nConsPos ] == aBaseB[ nConsPos ] ) {
                  ++nFrequencyOfAgreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];

               }
               else {
                  ++nFrequencyOfDisagreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];

                  if ( bIsCpG( aBaseA[ nConsPos - 1 ], 
                               aBaseA[ nConsPos ], 
                               aBaseA[ nConsPos + 1 ],
                               aBaseB[ nConsPos - 1 ],
                               aBaseB[ nConsPos ],
                               aBaseB[ nConsPos + 1 ] ) ) {
                     ++nFrequencyOfCpGDisagreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];
                  }

               }

               // testing

               if ( soSpeciesA == "PPan" && soSpeciesB == "PTro" &&
                    nWindowLow <= aQualityA[ nConsPos ] &&
                    nWindowLow <= aQualityB[ nConsPos ] &&
                    ! ( nWindowHigh <= aQualityA[ nConsPos ] &&
                        nWindowHigh <= aQualityB[ nConsPos ] ) ) {

                  dErrors += 
                     ( dErrorRateFromQuality[ aQualityA[ nConsPos ] ] *
                       (1.0 - dErrorRateFromQuality[ aQualityB[ nConsPos ] ] ));
                  dErrors += 
                       ( dErrorRateFromQuality[ aQualityB[ nConsPos ] ] *
                         ( 1.0 - dErrorRateFromQuality[ aQualityA[ nConsPos ]] ));

                  dErrors += 2.0 / 3.0 * 
                     dErrorRateFromQuality[ aQualityA[ nConsPos ] ] *
                     dErrorRateFromQuality[ aQualityB[ nConsPos ] ];


                  if ( aBaseA[ nConsPos ] != aBaseB[ nConsPos ] ) 
                     ++nDiscrepancies;

                  fprintf( pAO, "nav %s %s %d %c %c %d %d %.4g %d %s\n",
                           soSpeciesA.data(),
                           soSpeciesB.data(),
                           nUnpaddedIndex( nConsPos ),
                           aBaseA[nConsPos],
                           aBaseB[nConsPos],
                           aQualityA[ nConsPos ],
                           aQualityB[ nConsPos ],
                           dErrors,
                           nDiscrepancies,
                           soGetName().data() );
               }
                        


               // end testing




            }
             
         }


         if ( bHaveAtLeastOneValue ) {
               

            // now print out these arrays

            fprintf( pAO, "Agreeing and Disagreeing Bases %s %s {\n",
                  soSpeciesA.data(),
                  soSpeciesB.data() );

            for( int nQualityA = 0; nQualityA < 100; ++nQualityA ) {
               for( int nQualityB = 0; nQualityB < 100; ++nQualityB ) {
                  if ( nFrequencyOfAgreeingBases[nQualityA][nQualityB] ||
                       nFrequencyOfDisagreeingBases[nQualityA][nQualityB] ) {
                     fprintf( pAO, "%d %d %d %d %d\n",
                              nQualityA,
                              nQualityB,
                              nFrequencyOfAgreeingBases[nQualityA][nQualityB],
                              nFrequencyOfDisagreeingBases[nQualityA][nQualityB],
                              nFrequencyOfCpGDisagreeingBases[nQualityA][nQualityB]);

                  }
               }
            }


            fprintf( pAO, "} Agreeing and Disagreeing Bases %s %s\n",
                  soSpeciesA.data(),
                  soSpeciesB.data() );

         } // bHaveAtLeastOneValue

      } //       for( int nSpeciesB = nSpeciesA + 1; 
   } //    for( int nSpeciesA = 0; nSpeciesA < 

}




void Contig :: calculateErrorProbabilitiesByComparingPTroPPan( autoReport* pAutoReport ) {

   setErrorRateFromQuality();


   int nContigLengthPlusOne = nGetPaddedConsLength() + 1;
 
   RWTValVector<char> aBaseOfHighQualityRead( (size_t) nContigLengthPlusOne,
                               '?',
                               "aBaseOfHighQualityReads" );

   RWTPtrVector<LocatedFragment> aHighQualityRead( (size_t) nContigLengthPlusOne,
                                         NULL,
                                         "aHighQualityRead" );

   RWTValVector<int> aQualityOfHighQualityRead( (size_t) nContigLengthPlusOne,
                                0,
                                "aQualityOfHighQualityRead" );

   RWTValVector<bool> aAmbiguousBase( (size_t) nContigLengthPlusOne,
                                      false,
                                      "aAmbiguousBase" );



   int nTotalDiscrepancies = 0;

   int nTotalBasesFlanked = 0;

   double dTotalPredictedMeanErrors = 0.0;

   const int nQualityCutoff = 45;

   int nRead;
   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      // is this one of the species we are interested in?

      if ( !pLocFrag->soGetName().bContains( "PPan" ) &&
                 !pLocFrag->soGetName().bContains( "PTro" ) )
         continue;


      // get range of high quality aligned sequence

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;
      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      int nAlignedHighQualityLeft = nGetStartConsensusIndex();
      int nAlignedHighQualityRight = nGetEndConsensusIndex();

      if ( !bIntersect( nAlignedHighQualityLeft,
                        nAlignedHighQualityRight,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nAlignedHighQualityLeft,
                        nAlignedHighQualityRight ) ) continue;

      if ( !bIntersect( nAlignedHighQualityLeft,
                        nAlignedHighQualityRight,
                        pLocFrag->nGetHighQualityStart(),
                        pLocFrag->nGetHighQualityEnd(),
                        nAlignedHighQualityLeft,
                        nAlignedHighQualityRight ) ) continue;


      for( int nConsPos = nAlignedHighQualityLeft;
           nConsPos <= nAlignedHighQualityRight; ++nConsPos ) {

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >=
              nQualityCutoff ) {

            if ( !aHighQualityRead[ nConsPos ] ) {
               aHighQualityRead[nConsPos ] = pLocFrag;
               aQualityOfHighQualityRead[ nConsPos ] = 
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
               aBaseOfHighQualityRead[ nConsPos ] = 
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase();
            }
            else {
               // there is already a base there.  What is it?

               if ( aBaseOfHighQualityRead[ nConsPos ] != 
                    pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {
                  aAmbiguousBase[ nConsPos ] = true;
               }
            }
         }
      } //  for( int nConsPos = nAlignedHighQualityLeft;
   } // for( int nRead = 0; nRead < nGetNumberOfFragsInContig()


   for( nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      // is this one of the species we are interested in?

      if ( !pLocFrag->soGetName().bContains( "PPan" ) &&
                 !pLocFrag->soGetName().bContains( "PTro" ) )
         continue;


      // get range of high quality aligned sequence

      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;
      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      int nAlignedHighQualityLeft = nGetStartConsensusIndex();
      int nAlignedHighQualityRight = nGetEndConsensusIndex();

      if ( !bIntersect( nAlignedHighQualityLeft,
                        nAlignedHighQualityRight,
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nAlignedHighQualityLeft,
                        nAlignedHighQualityRight ) ) continue;

      if ( !bIntersect( nAlignedHighQualityLeft,
                        nAlignedHighQualityRight,
                        pLocFrag->nGetHighQualityStart(),
                        pLocFrag->nGetHighQualityEnd(),
                        nAlignedHighQualityLeft,
                        nAlignedHighQualityRight ) ) continue;


      for( int nConsPos = nAlignedHighQualityLeft;
           nConsPos <= nAlignedHighQualityRight; ++nConsPos ) {


         if ( !aHighQualityRead[ nConsPos ] ) continue;
         if ( aHighQualityRead[ nConsPos ] == pLocFrag ) continue;
         if ( aAmbiguousBase[ nConsPos ] ) continue;
         if ( aBaseOfHighQualityRead[ nConsPos ] == '*' ) continue;



         // testing--restrict to low quality reads in the nWindowLow to
         // nWindowHigh - 1
         // window

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
              < nWindowLow ) continue;

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
              >= nWindowHigh &&
              aQualityOfHighQualityRead[ nConsPos ] >= nWindowHigh ) continue;
         // end testing

         // now look left and right and see if there are flanking agreeing bases
         // from the 2 reads


         LocatedFragment* pHighQualityRead = aHighQualityRead[ nConsPos ];
         if ( pHighQualityRead->bIsWholeReadLowQuality() ) continue;
         if ( pHighQualityRead->bIsWholeReadUnaligned() ) continue;

         int nAlignedHighQualityLeft2 = nGetStartConsensusIndex();
         int nAlignedHighQualityRight2 = nGetEndConsensusIndex();
         
         if ( !bIntersect( nAlignedHighQualityLeft2,
                           nAlignedHighQualityRight2,
                           pHighQualityRead->nGetAlignClipStart(),
                           pHighQualityRead->nGetAlignClipEnd(),
                           nAlignedHighQualityLeft2,
                           nAlignedHighQualityRight2 ) ) continue;

         if ( !bIntersect( nAlignedHighQualityLeft2,
                           nAlignedHighQualityRight2,
                           pHighQualityRead->nGetHighQualityStart(),
                           pHighQualityRead->nGetHighQualityEnd(),
                           nAlignedHighQualityLeft2,
                           nAlignedHighQualityRight2 ) ) continue;

         


         int nAlignedHighQualityLeftBoth;
         int nAlignedHighQualityRightBoth;

         assert( bIntersect( nAlignedHighQualityLeft,
                             nAlignedHighQualityRight,
                             nAlignedHighQualityLeft2,
                             nAlignedHighQualityRight2,
                             nAlignedHighQualityLeftBoth,
                             nAlignedHighQualityRightBoth ) );



         bool bAgreeSoFar = true;

         int nConsPos2 = nConsPos; // first -- will make it start to the left

         int nNumberOfNonPadAgreeingBases = 0;
         while( nNumberOfNonPadAgreeingBases <
                pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
            --nConsPos2;
            if ( nConsPos2 < nAlignedHighQualityLeftBoth ) {
               bAgreeSoFar = false;


               break;
            }

            // the following case:
            // *
            // b
            // (a pad aligned against a base)
            // this is counted the same as a disagreeing pair of non-pads

            char cBaseOfHighQualityRead = aBaseOfHighQualityRead[ nConsPos2 ];
            char cBase2 = 
               pLocFrag->ntGetFragFromConsPos( nConsPos2 ).cGetBase();

            if ( cBaseOfHighQualityRead != cBase2 ) {
               bAgreeSoFar = false;


               break;
            }




            // since the bases are equal, this checks that neither is a pad
            if ( cBaseOfHighQualityRead != '*' ) {
               ++nNumberOfNonPadAgreeingBases;
            }
         }

         if ( !bAgreeSoFar ) continue;




         // now look to the right

         nConsPos2 = nConsPos; // first ++ will make it start to the right

         nNumberOfNonPadAgreeingBases = 0;
         while( nNumberOfNonPadAgreeingBases <
                pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
            ++nConsPos2;
            if ( nConsPos2 > nAlignedHighQualityRightBoth ) {
               // like this:
               // B---- (end of hqs aligned part of read)
               // B-------
               // ^ base considered

               // Thus not flanked on right by enough agreeing bases.

               bAgreeSoFar = false;
               break;
            }
            
            char cBaseOfHighQualityRead = aBaseOfHighQualityRead[ nConsPos2 ];
            char cBase2 =
               pLocFrag->ntGetFragFromConsPos( nConsPos2 ).cGetBase();
            
            if ( cBaseOfHighQualityRead != cBase2 ) {


               bAgreeSoFar = false;
               break;
            }

            if ( cBaseOfHighQualityRead != '*' ) {
               ++nNumberOfNonPadAgreeingBases;
            }
         }

         if ( !bAgreeSoFar ) continue;


         // so we've found a base that lies within a motif like this:
         
         // -----B-----
         // -----B----- where the '-----' are agreeing bases

         // the reads are pLocFrag and pHighQualityRead

         ++nTotalBasesFlanked;

         if ( aBaseOfHighQualityRead[ nConsPos ] !=
              pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {
            ++nTotalDiscrepancies;
         }

         // calculate error probabilities

         double dErr1 = 
            dErrorRateFromQuality[ pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ];

         double dErr2 =
            dErrorRateFromQuality[ pHighQualityRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ];

         double dProbabilityOfADiscrepancy =
            dErr1 * ( 1.0 - dErr2 ) + dErr2 * ( 1.0 - dErr1 ) +
            2.0/3.0 * dErr1 * dErr2;

         dTotalPredictedMeanErrors += dProbabilityOfADiscrepancy;

         fprintf( pAO, "nav: %s %d %s %d %s %d %d %d\n",
                     soGetName().data(),
                     nUnpaddedIndex( nConsPos ),
                     pHighQualityRead->soGetName().data(),
                     pHighQualityRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
                     pLocFrag->soGetName().data(),
                     pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
                     nTotalDiscrepancies,
                     nTotalBasesFlanked );




      } //  for( int nConsPos = nAlignedHighQualityLeft;
   } // for( int nRead = 0; nRead < nGetNumberOfFragsInContig()


                  
   fprintf( pAO, "bases: %d discrepancies: %d predicted: %.6f\n",
            nTotalBasesFlanked,
            nTotalDiscrepancies,
            dTotalPredictedMeanErrors );
}



 
void Contig :: printAgreeDisagreeBetweenPairsOfSpecies2( autoReport* pAutoReport ) {

   double dErrors = 0;
   setErrorRateFromQuality();
   int nDiscrepancies = 0;


   setCpGMatrix();


   int nContigLengthPlusSentinel = nGetPaddedConsLength() + 1;
 
   RWTValVector<char> aBaseA( (size_t) nContigLengthPlusSentinel,
                               '?',
                               "aBasesA" );

   RWTValVector<char> aBaseB( (size_t) nContigLengthPlusSentinel,
                               '?',
                               "aBasesB" );

   RWTPtrVector<LocatedFragment> aReadA( (size_t) nContigLengthPlusSentinel,
                                         NULL,
                                         "aReadA" );

   RWTPtrVector<LocatedFragment> aReadB( (size_t) nContigLengthPlusSentinel,
                                         NULL,
                                         "aReadB" );

   RWTValVector<int> aQualityA( (size_t) nContigLengthPlusSentinel,
                                0,
                                "aQualityA" );

   RWTValVector<int> aQualityB( (size_t) nContigLengthPlusSentinel,
                                0,
                                "aQualityB" );

   



   for( int nSpeciesA = 0; nSpeciesA < 
           pAutoReport->aArrayOfSpecies_.length() - 1; ++nSpeciesA ) {

      RWCString soSpeciesA = pAutoReport->aArrayOfSpecies_[ nSpeciesA ];

      for( int nSpeciesB = nSpeciesA + 1; 
           nSpeciesB < pAutoReport->aArrayOfSpecies_.length();
           ++nSpeciesB ) {

         RWCString soSpeciesB = pAutoReport->aArrayOfSpecies_[ nSpeciesB ];


         // now we can do the analysis

         const int nMaxQualityPlus1 = 100;
         int nFrequencyOfAgreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];
         int nFrequencyOfDisagreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];
         int nFrequencyOfCpGDisagreeingBases[nMaxQualityPlus1][nMaxQualityPlus1];

         // zero the arrays

         for( int n1 = 0; n1 < nMaxQualityPlus1; ++n1 ) {
            for( int n2 = 0; n2 < nMaxQualityPlus1; ++n2 ) {
               nFrequencyOfAgreeingBases[n1][n2] = 0;
               nFrequencyOfDisagreeingBases[n1][n2] = 0;
               nFrequencyOfCpGDisagreeingBases[n1][n2] = 0;
            }
         }


         bool bHaveAtLeastOneValue = false;


         for( int nBothStrandsAreWhichStrand = 0; nBothStrandsAreWhichStrand <= 1;
              ++nBothStrandsAreWhichStrand ) {

            bool bTopStrandNotBottomStrand = true;
            if ( nBothStrandsAreWhichStrand == 1 )
               bTopStrandNotBottomStrand = false;
      

            // clear arrays

            for( int n = 0; n < aBaseA.length(); ++n ) {
               aBaseA[n] = '?';
               aBaseB[n] = '?';
               aReadA[n] = NULL;
               aReadB[n] = NULL;
               aQualityA[n] = 0;
               aQualityB[n] = 0;
            }



            for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
               LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

               if ( pLocFrag->bComp() == bTopStrandNotBottomStrand ) continue;

               // is this one of the species we are interested in?

               if ( !pLocFrag->soGetName().bContains( soSpeciesA ) &&
                    !pLocFrag->soGetName().bContains( soSpeciesB ) )
                  continue;

               // yes.

               // get range of high quality aligned sequence

               if ( pLocFrag->bIsWholeReadLowQuality() ) continue;
               if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

               int nAlignedHighQualityLeft = nGetStartConsensusIndex();
               int nAlignedHighQualityRight = nGetEndConsensusIndex();

               if ( !bIntersect( nAlignedHighQualityLeft,
                                 nAlignedHighQualityRight,
                                 pLocFrag->nGetAlignClipStart(),
                                 pLocFrag->nGetAlignClipEnd(),
                                 nAlignedHighQualityLeft,
                                 nAlignedHighQualityRight ) ) continue;

               if ( !bIntersect( nAlignedHighQualityLeft,
                                 nAlignedHighQualityRight,
                                 pLocFrag->nGetHighQualityStart(),
                                 pLocFrag->nGetHighQualityEnd(),
                                 nAlignedHighQualityLeft,
                                 nAlignedHighQualityRight ) ) continue;


               RWTValVector<int>* pQualityArray = 0;
               RWTPtrVector<LocatedFragment>* pReadArray = 0;
               RWTValVector<char>* pBaseArray = 0;


               if ( pLocFrag->soGetName().bContains( soSpeciesA ) ) {
                  pQualityArray = &aQualityA;
                  pReadArray = &aReadA;
                  pBaseArray = &aBaseA;
               }
               else {

                  pQualityArray = &aQualityB;
                  pReadArray = &aReadB;
                  pBaseArray = &aBaseB;
               }


               for( int nConsPos = nAlignedHighQualityLeft;
                    nConsPos <= nAlignedHighQualityRight;
                    ++nConsPos ) {


               
                  if ( ! ( (*pReadArray)[ nConsPos ] ) ) {
                     (*pReadArray)[ nConsPos ] = pLocFrag;
                     (*pQualityArray)[ nConsPos ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
                     (*pBaseArray)[ nConsPos ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase();


                  }
                  else {

                     cerr << "existing read is " <<
                        (*pReadArray)[ nConsPos ]->soGetName() << " and other read is " <<
                        pLocFrag->soGetName() << endl;
                     assert( false );

                     // already a read from the other strand
                     if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                          (*pBaseArray)[ nConsPos ] ) {
                        // what to do?  Let's pretend there was never a base
                        // here on either strand--can't use the data

                        (*pReadArray)[ nConsPos ] = 0;
                        (*pQualityArray)[ nConsPos ] = 0;
                        (*pBaseArray)[ nConsPos ] = '?';

                     }
                     else {
                        // so the bases agree.  Since they are on opposite
                        // strands, let's add the quality values.

                        (*pQualityArray)[ nConsPos ] +=
                           pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
                        assert( false ); // this shouldn't happen
                        // since different reads, same species, are
                        // on different strands
                     }
                  }
               } //  for( int nConsPos = nAlignedHighQualityLeft;

            } //  for( int nRead = 0; nRead < nGetNumberOfFragsInContig();

               

            // Let's look for a motif like this:
            // aaaaa?aaaaa 
            // (5 agreeing bases, a base pair not pad, 5 agreeing bases)


            for( int nConsPos = nGetStartConsensusIndex() + pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - 1 ;
                 nConsPos <= nGetEndConsensusIndex() - pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1; ++nConsPos ) {

               bool bAgreeSoFar = true;

               // look to the left of the pair of aligned bases
               int nConsPos2 = nConsPos; // first -- will make it start
               // to the left
               int nNumberOfNonPadAgreeingBases = 0;
               while( nNumberOfNonPadAgreeingBases <
                      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
                  --nConsPos2;
                  if ( nConsPos2 < nGetStartConsensusIndex() ) {
                     bAgreeSoFar = false;
                     break;
                  }


                  // in the following case:
                  //  *
                  //  b
                  // (a pad aligned against a base)
                  // this is counted the same as a disagreeing pair of non-pads
               

                  if ( ! ( 
                          aReadA[ nConsPos2 ] && aReadB[ nConsPos2 ] &&
                          ( aBaseA[ nConsPos2 ] == aBaseB[ nConsPos2 ] ) ) ) {
                     bAgreeSoFar = false;
                     break;
                  }

                  // if made it here, there are reads from both species
                  // and they agree
                  if ( aBaseA[ nConsPos2 ] != '*' ) {
                     ++nNumberOfNonPadAgreeingBases;
                  }
               }

               if ( !bAgreeSoFar ) continue;

               // now look to the right of the pair of aligned bases

               nConsPos2 = nConsPos; // first ++ will make it start
               // to the right
               nNumberOfNonPadAgreeingBases = 0;
               while( nNumberOfNonPadAgreeingBases <
                      pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ ) {
                  ++nConsPos2;
                  if ( nConsPos2 > nGetEndConsensusIndex() ) {
                     bAgreeSoFar = false;
                     break;
                  }

                  if ( ! ( 
                          aReadA[ nConsPos2 ] && aReadB[ nConsPos2 ] &&
                          ( aBaseA[ nConsPos2 ] == aBaseB[ nConsPos2 ] ) ) ) {
                     bAgreeSoFar = false;
                     break;
                  }
                   
                  // if made it here, there are reads from both species
                  // and they agree

                  if ( aBaseA[ nConsPos2] != '*' ) {
                     ++nNumberOfNonPadAgreeingBases;
                  }
               }

               if ( !bAgreeSoFar ) continue;

               // we have found such a motif.

               // how could this fail( below) ? (4/29/05 DG)
               // i.e. how could there a read from species A and B at nConsPos?
               // It could fail if there are 2 reads from species A and
               // they disagree with each other and there is no way to
               // reconcile them.


               // should also add that neither is a pad.
               // We are not interested in either of these cases:

               //   bbbbb*bbbbb
               //   bbbbbbbbbbb
               //        ^
               //   We aren't interested in this case because such an indel
               //   has a different probability (hence branch length) than
               //   a substitution mutation.  

               // 
               //   bbbbb*bbbbb
               //   bbbbb*bbbbb
               //        ^
               //   We aren't interested in this case because this isn't
               //   either a mutation or a non-mutation.  It is simply an
               //   artifact of our aligning.

               if ( aReadA[ nConsPos ] && aReadB[ nConsPos ] ) {

                  const bool bDisallowIndels = true;

                  if ( bDisallowIndels ) {
                     if ( aBaseA[ nConsPos ] == '*' ||
                          aBaseB[ nConsPos ] == '*' )
                        continue;
                  }
                  else {
                     if ( aBaseA[ nConsPos ] == '*' &&
                          aBaseB[ nConsPos ] == '*' ) continue;
                  }
                  bHaveAtLeastOneValue = true;

                  if ( aBaseA[ nConsPos ] == aBaseB[ nConsPos ] ) {
                     ++nFrequencyOfAgreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];

                  }
                  else {
                     ++nFrequencyOfDisagreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];

                     if ( bIsCpG( aBaseA[ nConsPos - 1 ], 
                                  aBaseA[ nConsPos ], 
                                  aBaseA[ nConsPos + 1 ],
                                  aBaseB[ nConsPos - 1 ],
                                  aBaseB[ nConsPos ],
                                  aBaseB[ nConsPos + 1 ] ) ) {
                        ++nFrequencyOfCpGDisagreeingBases[ aQualityA[ nConsPos ] ][ aQualityB[ nConsPos ] ];
                     }

                  }


                  const int n45 = 45;


                  if ( soSpeciesA == "PPan" && soSpeciesB == "PTro" &&
                       pCP->nAutoReportQualityWindowLow_ 
                       <= aQualityA[ nConsPos ] &&
                       pCP->nAutoReportQualityWindowLow_ 
                       <= aQualityB[ nConsPos ] &&
                       ( ! ( pCP->nAutoReportQualityWindowHigh_ <= 
                             aQualityA[ nConsPos ] &&
                             pCP->nAutoReportQualityWindowHigh_ <= 
                             aQualityB[ nConsPos ] ) )
                       &&
                       ( n45 <= aQualityA[ nConsPos ] ||
                         n45 <= aQualityB[ nConsPos ] ) ) {


                     dErrors += 
                        ( dErrorRateFromQuality[ aQualityA[ nConsPos ] ] *
                          (1.0 - dErrorRateFromQuality[ aQualityB[ nConsPos ] ] ));
                     dErrors += 
                        ( dErrorRateFromQuality[ aQualityB[ nConsPos ] ] *
                          ( 1.0 - dErrorRateFromQuality[ aQualityA[ nConsPos ]] ));

                     dErrors += 2.0 / 3.0 * 
                        dErrorRateFromQuality[ aQualityA[ nConsPos ] ] *
                        dErrorRateFromQuality[ aQualityB[ nConsPos ] ];


                     if ( aBaseA[ nConsPos ] != aBaseB[ nConsPos ] ) 
                        ++nDiscrepancies;

                     fprintf( pAO, "nav %s %s %d %c %c %d %d %.4g %d %s\n",
                              soSpeciesA.data(),
                              soSpeciesB.data(),
                              nUnpaddedIndex( nConsPos ),
                              aBaseA[nConsPos],
                              aBaseB[nConsPos],
                              aQualityA[ nConsPos ],
                              aQualityB[ nConsPos ],
                              dErrors,
                              nDiscrepancies,
                              soGetName().data() );
                  }
                        

               } //                if ( aReadA[ nConsPos ] && aReadB[ nConsPos
            } //             for( int nConsPos = nGetStartConsensusIndex() 

         } //  for( int nBothStrandsAreWhichStrand = 0; nBothStrandsAreWhichStrand <= 1;

         if ( bHaveAtLeastOneValue ) {

            // now print out these arrays

            fprintf( pAO, "Agreeing and Disagreeing Bases %s %s {\n",
                  soSpeciesA.data(),
                  soSpeciesB.data() );

            for( int nQualityA = 0; nQualityA < 100; ++nQualityA ) {
               for( int nQualityB = 0; nQualityB < 100; ++nQualityB ) {
                  if ( nFrequencyOfAgreeingBases[nQualityA][nQualityB] ||
                       nFrequencyOfDisagreeingBases[nQualityA][nQualityB] ) {
                     fprintf( pAO, "%d %d %d %d %d\n",
                              nQualityA,
                              nQualityB,
                              nFrequencyOfAgreeingBases[nQualityA][nQualityB],
                              nFrequencyOfDisagreeingBases[nQualityA][nQualityB],
                              nFrequencyOfCpGDisagreeingBases[nQualityA][nQualityB]);

                  }
               }
            }


            fprintf( pAO, "} Agreeing and Disagreeing Bases %s %s\n",
                  soSpeciesA.data(),
                  soSpeciesB.data() );

         } // bHaveAtLeastOneValue

      } //       for( int nSpeciesB = nSpeciesA + 1; 
   } //    for( int nSpeciesA = 0; nSpeciesA < 



}




void Contig :: areReadsCorrectlyAligned( autoReport* pAutoReport ) {

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( "HSap" ) ) continue;

      if ( pLocFrag->soGetName().bEndsWith( "f" ) ) {
         if ( pLocFrag->bComp() ) {
            fprintf( pAO, "Wrong strand: %s is on bottom strand\n",
                  pLocFrag->soGetName().data() );
         }
         else {
            fprintf( pAO, "Right strand: %s is on top strand\n",
                     pLocFrag->soGetName().data() );
         }
      }
      else if ( pLocFrag->soGetName().bEndsWith( "r" ) ) {
         if ( !pLocFrag->bComp() ) {
            fprintf( pAO, "Wrong strand: %s is on top strand\n",
                     pLocFrag->soGetName().data() );
         }
         else {
            fprintf( pAO, "Right strand: %s is on bottom strand\n",
                     pLocFrag->soGetName().data() );
         }
      }
      else {
         assert( false );
      }
   }

}


void Contig :: printLengthsOfUnalignedHighQualitySegments() {



   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( "HSap" ) ) continue;

      gotoList myGotoList;

      pLocFrag->addUnalignedHighQualityRegionsToList( &myGotoList,
                                                      true ); // bIncludeTotallyUnalignedReads

      if ( myGotoList.nGetNumGotoItems() > 0 ) {
         // I can't see how the difference of 2 line segments could
         // have any more than 2 line segments

         assert( myGotoList.nGetNumGotoItems() <= 2 );
         int nLengthOfUnalignedHighQualityRegion = 0;
         for( int nGotoItem = 0; nGotoItem < myGotoList.nGetNumGotoItems();
              ++nGotoItem ) {
            gotoItem* pGotoItem = myGotoList.pGetGotoItem( nGotoItem );

            int nUnpaddedStart = 
               pLocFrag->nConsPosToUnpaddedFragPos( pGotoItem->nGotoItemStart_ );
            int nUnpaddedEnd =
               pLocFrag->nConsPosToUnpaddedFragPos( pGotoItem->nGotoItemEnd_ );

            // we don't know which is larger, nUnpaddedEnd or nUnpaddedStart
            int nLengthOfOneRegion = ABS( nUnpaddedEnd - nUnpaddedStart ) + 1;

            nLengthOfUnalignedHighQualityRegion += nLengthOfOneRegion;

            // print this region

            fprintf( pAO, "unaligned_hqs_bases{\n" );

            fprintf( pAO, ">%s cons pos %d-%d\n",
                     pLocFrag->soGetName().data(),
                     pGotoItem->nUnpaddedGotoItemStart_,
                     pGotoItem->nUnpaddedGotoItemEnd_ );

            int nBasesOnLine = 0;
            for( int nConsPos = pGotoItem->nGotoItemStart_; 
                 nConsPos <= pGotoItem->nGotoItemEnd_; ++nConsPos ) {

               if ( !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
                  if ( nBasesOnLine == 50 ) {
                     putc( '\n', pAO );
                     nBasesOnLine = 0;
                  }
                  putc( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase(),
                        pAO );
                  ++nBasesOnLine;
               }
            }
            fprintf( pAO, "\n}unaligned_hqs_bases\n" );

         } //  for( int nGotoItem = 0;

         fprintf( pAO, "unaligned hqs bases: %d of read %s\n",
                  nLengthOfUnalignedHighQualityRegion,
                  pLocFrag->soGetName().data() );

      }
      else {
         fprintf( pAO, "unaligned hqs bases: 0 of read %s\n",
                  pLocFrag->soGetName().data() );
      }
   }


   // we also need to look at reads that are totally unaligned
   // how can we even know about them?  We must read the ../phd_dir

   DIR *pDirFile;
   struct dirent *pDirEntry;

   if ( ( pDirFile = opendir( ConsEd::pGetConsEd()->filGetPHDDir() )) == NULL ) {
      THROW_FILE_ERROR( ConsEd::pGetConsEd()->filGetPHDDir() );
   }

   RWCRegexp regPattern( "\\.phd\\.[0-9]+$" );

   RWTValOrderedVector<FileName> aSingletPhdFiles;
   RWTValOrderedVector<RWCString> aSingletReads;
   while( ( pDirEntry = readdir( pDirFile ) ) != NULL ) {

      FileName filPHDFile( pDirEntry->d_name );

      // could be "." or ".."
      if ( !filPHDFile.bContains( regPattern ) ) continue;

      RWCString soReadName = filPHDFile;

      // chop off the .phd.#
      soReadName( regPattern ) = "";

      // is this read already in the assembly?

      if ( ConsEd::pGetAssembly()->pGetLocatedFragmentByName( soReadName ) ) 
         continue;

      // if reached here, the read is not in the assembly
      
      aSingletPhdFiles.insert( filPHDFile );
      aSingletReads.insert( soReadName );
   }


   for( int nSinglet = 0; nSinglet < aSingletPhdFiles.length(); ++nSinglet ) {
      FileName filSinglet = aSingletPhdFiles[ nSinglet ];

      FileName filPHDFullPath = ConsEd::pGetConsEd()->filGetPHDDir() +
         "/" + filSinglet;

      FILE* pFil = fopen( ( char*) filPHDFullPath.data(), "r" );

      if ( !pFil ) {
         THROW_FILE_ERROR( filPHDFullPath );
      }

      singletInfo mySingletInfo;

      readPHDAgainForHighQualitySegment( &mySingletInfo, pFil );

      int nNumberOfHighQualityBases =
         mySingletInfo.nHighQualitySegmentEnd_ -
         mySingletInfo.nHighQualitySegmentStart_ + 1;


      if ( nNumberOfHighQualityBases == 1 ) 
         nNumberOfHighQualityBases = 0;

      RWCString soReadName = aSingletReads[ nSinglet ];

      fprintf( pAO, "unaligned hqs bases: %d of read %s (singlet)\n",
               nNumberOfHighQualityBases,
               soReadName.data() );
   }
}




void Contig :: printLengthsOfAlignedSegments() {

   RWTValOrderedVector<FileName> aSingletPhdFiles;
   RWTValOrderedVector<RWCString> aSingletReads;

   getAllSinglets( aSingletPhdFiles,
                   aSingletReads );

   for( int nSinglet = 0; nSinglet < aSingletReads.length(); ++nSinglet ) {
      RWCString soReadName = aSingletReads[ nSinglet ];

      fprintf( pAO, "%s 0\n",
               soReadName.data() );
   }


   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( "HSap" ) ) continue;

      if ( pLocFrag->bIsWholeReadUnaligned() ) {
         fprintf( pAO, "%s 0\n", 
                  pLocFrag->soGetName().data() );

      }
      else {
         int nConsPosLeft = pLocFrag->nGetAlignClipStart();
         int nConsPosRight = pLocFrag->nGetAlignClipEnd();

         int nUnpaddedReadLeft = 
            pLocFrag->nConsPosToUnpaddedFragPos( nConsPosLeft );
         int nUnpaddedReadRight = 
            pLocFrag->nConsPosToUnpaddedFragPos( nConsPosRight );

         int nAlignedLength = nUnpaddedReadRight - nUnpaddedReadLeft + 1;

         fprintf( pAO, "%s %d\n",
                  pLocFrag->soGetName().data(),
                  nAlignedLength );
      }
   }
}







void Contig :: compareTopAndBottomStrands( autoReport* pAutoReport ) {



   // this will change when we delete columns

   int nPaddedContigLength = nGetPaddedConsLength();




   mbtValVectorOfBool aSomeCombinedReadHasNoPad( nPaddedContigLength,
                                                 1,
                                                 "Contig::aSomeCombinedReadHasNoPad" );


   const char cSingleSignalUndetermined = 'u';
   const char cSingleSignal = 's';
   const char cMultipleSignal = 'm';


   RWTPtrOrderedVector<LocatedFragment> aListOfReadsInSingleSignalArrays( (size_t) 10 );

   typedef MBTValOrderedOffsetVector<char> typeArrayOfChar;
   RWTPtrOrderedVector<typeArrayOfChar> aListOfSingleSignalArrays( (size_t) 10 );


   int nSpecies;
   for( nSpecies = 0; nSpecies < 
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         // the fake read for the 2nd alignment
         if ( pLocFrag->soGetName().bContains( "b." ) ) {
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // there should always be a human read.  No, perhaps we are in
      // a contig that doesn't have one.  In that case, ignore this contig.
      if (! pHumanRead ) return;


      // save single signal arrays before deleting column of pads

      for( int n = 0; n <= 1; ++n ) {
         LocatedFragment* pLocFrag = NULL;
         if ( n == 0 ) 
            pLocFrag = pForwardRead;
         else
            pLocFrag = pReverseRead;
         
         if ( !pLocFrag ) continue;

         MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;

         pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray );

         aListOfReadsInSingleSignalArrays.insert( pLocFrag );

         aListOfSingleSignalArrays.insert( pSingleSignalArray );
      }
         


      for( int nConsPos = nGetStartConsensusIndex(); 
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {

         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         if ( !pHumanRead->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }
       

         // cases:  use just forward
         //         use just reverse
         //         use both
         //         use neither

         char cCase = ' ';

         LocatedFragment* pReadToUse = NULL;
         
         if ( pForwardRead && pForwardRead->bIsInAlignedPartOfRead( nConsPos ) 
              && 
              pReverseRead && pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) {

            if (  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

               // just so it isn't null:
               pReadToUse = pForwardRead;

               cCase = 'b'; // both
            }
            else {
               // will just use one read.  Which is it?
               
               if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
                    ==
                    pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
                  // I don't know what to do in this case so let's ignore
                  // such data points
                  cCase = 'n';
               }
               else if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >
                         pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
                  pReadToUse = pForwardRead;
                  cCase = 'o';
               }
               else {
                  pReadToUse = pReverseRead;
                  cCase = 'o';
               }
            }
               
         }
         else if ( pForwardRead && pForwardRead->bIsInAlignedPartOfRead( nConsPos ) ) {
            cCase = 'o';
            pReadToUse = pForwardRead;
         }
         else if ( pReverseRead && pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) {
            cCase = 'o';
            pReadToUse = pReverseRead;
         }
         else {
            cCase = 'n';
         }

         // if both the human and primate are pads,
         // skip this base


         if ( cCase != 'n' && 
              !pReadToUse->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
            aSomeCombinedReadHasNoPad.setValue( nConsPos, true );
         }
      } //       for( int nConsPos = nGetStartConsensusIndex(); 
   } //    for( nSpecies = 0; nSpecies < 



   // remove columns in which all combined reads have a pad


   // first, do it to the single signal arrays.

   for( int nRead = 0; nRead < aListOfSingleSignalArrays.length();
        ++nRead ) {

      MBTValOrderedOffsetVector<char>* pSingleSignalArray = 
         aListOfSingleSignalArrays[ nRead ];

      for( int nConsPos = nGetEndConsensusIndex();
           nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {

         if ( !aSomeCombinedReadHasNoPad[ nConsPos ] ) {
            pSingleSignalArray->removeAt( nConsPos );
         }
      }
   } //    for( int nRead = 0; nRead < aListOfReadsInSingleSignalArrays.length();

   int nConsPos;
   for( nConsPos = nGetEndConsensusIndex();
        nConsPos >= nGetStartConsensusIndex(); --nConsPos ) {
      if ( ! aSomeCombinedReadHasNoPad[ nConsPos ] ) {
         deleteColumn( nConsPos, false );
      }
   }



   // let's save the assembly to see that everything went ok

   ConsEd::pGetAssembly()->saveToFile( "temp_no_columns.ace",
                                       false ); // bAceFormat1



   // new padded length after deleting columns
   nPaddedContigLength = nGetPaddedConsLength();




   mbtValVector<int> aMinQualityInColumn( (size_t) nPaddedContigLength,
                                          1,
                                          666,
                                          "aMinQualityInColumn" );

   const unsigned char ucPTro = 1;
   const unsigned char ucPPan = 2;
   const unsigned char ucGGor = 4;
   const unsigned char ucPPyg = 8;
   const unsigned char ucMMul = 16;

   mbtValVector<unsigned char> aSpeciesInColumn( (size_t) nPaddedContigLength,
                                                 1,
                                                         0,
                                                         "aSpeciesInColumn" );

   mbtValVectorOfBool aDiscrepantPositions( nPaddedContigLength,
                                            1,
                                            "Contig::aDiscrepantPositions" );

   mbtValVectorOfBool aNeededSpeciesAreAlignedAndAgree( nPaddedContigLength,
                                               1,
                                               "Contig::aNeededSpeciesAreAlignedAndAgree" );
   
   mbtValVectorOfBool aNeededSpeciesAreAlignedAndHQ( nPaddedContigLength,
                                                     1,
                                                     "Contig::aNeededSpeciesAreAlignedAndHQ" );

   mbtValVectorOfBool aSomeCombinedReadIsMultipleSignal( nPaddedContigLength,
                                                          1,
                                                          "Contig::aSomeCombinedReadIsMultipleSignal" );




   for( nSpecies = 0; nSpecies < 
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];

      unsigned char ucSpeciesMask;

      if ( soSpecies == "PTro" ) {
         ucSpeciesMask = ucPTro;
      }
      else if ( soSpecies == "PPan" ) {
         ucSpeciesMask = ucPPan;
      }
      else if ( soSpecies == "GGor" ) {
         ucSpeciesMask = ucGGor;
      }
      else if ( soSpecies == "PPyg" ) {
         ucSpeciesMask = ucPPyg;
      }
      else if ( soSpecies == "MMul" ) {
         ucSpeciesMask = ucMMul;
      }
      else {
         ucSpeciesMask = 0;
      }
      


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }

      // there should always be a human read.  No, perhaps we are in
      // a contig that doesn't have one.  In that case, ignore this contig.
      if (! pHumanRead ) return;

      MBTValOrderedOffsetVector<char>* pForwardReadSingleSignalArray = NULL;
      MBTValOrderedOffsetVector<char>* pReverseReadSingleSignalArray = NULL;

      if ( pForwardRead ) {
         int nIndex =
            aListOfReadsInSingleSignalArrays.indexByPointers( pForwardRead );
         assert( nIndex != RW_NPOS );
         pForwardReadSingleSignalArray = aListOfSingleSignalArrays[ nIndex ];
      }

      if ( pReverseRead ) {
         int nIndex =
            aListOfReadsInSingleSignalArrays.indexByPointers( pReverseRead );
         assert( nIndex != RW_NPOS );
         pReverseReadSingleSignalArray = aListOfSingleSignalArrays[ nIndex ];
      }
         


      int nConsPos;
      for( nConsPos = nGetStartConsensusIndex(); 
           nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {


         // the consensus and human read should be equal
         assert( pHumanRead->bIsInAlignedPartOfRead( nConsPos ) );

         // cases:  use just forward
         //         use just reverse
         //         use both
         //         use neither

         const char cOneRead = 'o';
         const char cBothReads = 'b';
         const char cNoRead = 'n';

         char cCase = ' ';

         LocatedFragment* pReadToUse = NULL;
         bool bSingleSignalOfBase = false;

         bool bForwardIsSingleSignal = false;
         if ( pForwardReadSingleSignalArray ) {
            bForwardIsSingleSignal = ( 
                   (*pForwardReadSingleSignalArray)[ nConsPos ] == 
                   cSingleSignal );
         }
         bool bReverseIsSingleSignal = false;
         if ( pReverseReadSingleSignalArray ) {
            bReverseIsSingleSignal = ( 
               (*pReverseReadSingleSignalArray)[ nConsPos ] == 
               cSingleSignal );
         }

         
         if ( pForwardRead && pForwardRead->bIsInAlignedPartOfRead( nConsPos ) 
              && 
              pReverseRead && pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) {


            if (  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

               bSingleSignalOfBase = bForwardIsSingleSignal ||
                  bReverseIsSingleSignal;

               if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
                    >
                    pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() )
                  pReadToUse = pForwardRead;
               else
                  pReadToUse = pReverseRead;

               cCase = cBothReads; // both
            }
            else {
               // will just use one read.  Which is it?

               if ( bForwardIsSingleSignal == bReverseIsSingleSignal ) {

                  bSingleSignalOfBase = bForwardIsSingleSignal;

                  // choose by which read is higher quality

                  if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() == pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
                     // no way to choose--don't use
                     cCase = cNoRead;
                  }
                  else if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >
                            pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ) {
                     cCase = cOneRead;
                     pReadToUse = pForwardRead;
                  }
                  else {
                     cCase = cOneRead;
                     pReadToUse = pReverseRead;
                  }
               }
               else {
                  // complicated case in which one is single signal 
                  // and one is multiple signal

                  int nHigherQualityIsForward = 
                     (int) pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality()
                     -
                     (int) pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
                  if ( nHigherQualityIsForward >= 0 && bForwardIsSingleSignal ) {
                     cCase = cOneRead;
                     pReadToUse = pForwardRead;
                     bSingleSignalOfBase = true;
                  }
                  else if ( nHigherQualityIsForward < 0 && bReverseIsSingleSignal ) {
                     cCase = cOneRead;
                     pReadToUse = pReverseRead;
                     bSingleSignalOfBase = true;
                  }
                  else {
                     // conflict between quality and single signal
                     // also conflict between base calls.  This is the
                     // most ambiguous case.  


                     if ( abs( nHigherQualityIsForward ) <= 5 ) {

                        // qualities are close, so use whichever
                        // is single signal

                        if ( bForwardIsSingleSignal ) {
                           cCase = cOneRead;
                           pReadToUse = pForwardRead;
                           bSingleSignalOfBase = true;
                        }
                        else {
                           cCase = cOneRead;
                           pReadToUse = pReverseRead;
                           bSingleSignalOfBase = true;
                        }

                     }
                     else {
                        // qualities are not close, so use
                        // whichever is higher quality

                        fprintf( pAO, "conflict: %s %s and %s %s at conspos %d\n",
                              pForwardRead->soGetName().data(),
                              szPrintBool( bForwardIsSingleSignal ),
                              pReverseRead->soGetName().data(),
                              szPrintBool( bReverseIsSingleSignal ),
                              nUnpaddedIndex( nConsPos ) );
                              





                        bSingleSignalOfBase = false;
                        if ( nHigherQualityIsForward > 0 ) {
                           cCase = cOneRead;
                           pReadToUse = pForwardRead;
                        }
                        else {
                           cCase = cOneRead;
                           pReadToUse = pReverseRead;
                        }
                     }
                  }
               } //  if ( bForwardIsSingleSignal == bReverseIsSingleSignal )else
               






            } //  if (  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() == pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) { else

         } //          if ( pForwardRead && pForwardRead->bIsInAlignedPartOfRead( nConsPos )               &&               pReverseRead && pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) {
         else if ( pForwardRead && pForwardRead->bIsInAlignedPartOfRead( nConsPos ) ) {
            cCase = cOneRead;
            pReadToUse = pForwardRead;
            bSingleSignalOfBase = bForwardIsSingleSignal;
         }
         else if ( pReverseRead && pReverseRead->bIsInAlignedPartOfRead( nConsPos ) ) {
            cCase = cOneRead;
            pReadToUse = pReverseRead;
            bSingleSignalOfBase = bReverseIsSingleSignal;
         }
         else {
            cCase = cNoRead;
         }

         if ( cCase != cNoRead ) {
            if ( pReadToUse->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                 pHumanRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {
               aDiscrepantPositions.setValue( nConsPos, true );
            }

            aSpeciesInColumn[ nConsPos ] |= ucSpeciesMask;

            if ( !bSingleSignalOfBase )
               aSomeCombinedReadIsMultipleSignal.setValue( nConsPos, true );

         }

         // set aMinQualityInColumn

         if ( cCase == cOneRead ) {

            int nQuality = pReadToUse->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
            aMinQualityInColumn[ nConsPos ] = 
               MIN( aMinQualityInColumn[ nConsPos ], nQuality );
         }
         else if ( cCase == cBothReads ) {
            int nQuality = 
               pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality() +
               pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

            if ( nQuality > 90 )
               nQuality = 90;

            aMinQualityInColumn[ nConsPos ] = 
               MIN( aMinQualityInColumn[ nConsPos ], nQuality );
            
         }

         if ( bDebug ) {
            cerr << soSpecies << " case: " << cCase << " species " <<
               (unsigned int) aSpeciesInColumn[ nConsPos ] << endl;
         }
            



      } //       for( int nConsPos = nGetStartConsensusIndex(); 
   } //    for( int nSpecies = 0; nSpecies < 



   const int nThresholdQuality = 20;

   int nNumberOfColumnsAboveThreshold = 0;
   int nNumberOfColumnsWithNeededSpecies = 0;
   int nNumberOfColumnsWithNeededSpeciesAndQuality = 0;
   int nNumberOfColumnsWithNeededSpeciesAndQualityAndSingleSignal = 0;

   unsigned char ucOKMask = ucMMul | ucPPyg | ucGGor;
   unsigned char ucChimpMask = ucPPan | ucPTro;

   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {


      // need to check with Phil if deletion in human is
      // of interest in making trees.

//       if ( ntGetCons( nConsPos ).bIsPad() ) {
//          continue;
//       }

      bool bColumnAboveThreshold = false;
      if ( aMinQualityInColumn[ nConsPos ] != 666 &&
           aMinQualityInColumn[ nConsPos ] >= nThresholdQuality ) {
         bColumnAboveThreshold = true;

         
         ++nNumberOfColumnsAboveThreshold;
      }
      
      bool bColumnWithNeededSpecies = false;
      if ( ( aSpeciesInColumn[ nConsPos ] & ucOKMask ) ==
           ucOKMask ) {
         // now check that one of chimps is also present

         if ( aSpeciesInColumn[ nConsPos ] & ucChimpMask ) {
            bColumnWithNeededSpecies = true;

            ++nNumberOfColumnsWithNeededSpecies;

            // if there are no discrepancies in this column, then
            // it is an acceptable aligned column:  Notice that there
            // are no quality conditions on the flanking bases.

            if ( !aDiscrepantPositions[ nConsPos ] ) {
               aNeededSpeciesAreAlignedAndAgree.setValue( nConsPos, true );
            }

         }
      }

      if ( bColumnAboveThreshold && bColumnWithNeededSpecies ) {
         ++nNumberOfColumnsWithNeededSpeciesAndQuality;



         if ( !aSomeCombinedReadIsMultipleSignal[ nConsPos ] ) {
            ++nNumberOfColumnsWithNeededSpeciesAndQualityAndSingleSignal;
         }

         aNeededSpeciesAreAlignedAndHQ.setValue( nConsPos, true );
      }


   }



   // now let's see about the quality of the alignment and see
   // how many places have a certain alignment

//    mbtValVectorOfBool aNeededSpeciesAreAlignedAndAgreeSeveralColumns( 
//           nPaddedContigLength,
//           1,
//           "Contig::NeededSpeciesAreAlignedAndAgreeSeveralColumn" );

   mbtValVectorOfBool aColumnsFlanked(
             nPaddedContigLength,    
             1,
             "Contig:aColumnsFlanked" );


   for( int nSizeOfDiscrepantRegion = 1; nSizeOfDiscrepantRegion <= 3; 
        ++nSizeOfDiscrepantRegion ) {


      // nConsPosDiscrepantRegionLeft


      for( int nConsPosDiscrepantRegionLeft = pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ + 1;
           // e.g., if consensus is from 1 to 11, then 11 - 5 = 6 which is 
           // the last position to check
           nConsPosDiscrepantRegionLeft <= ( nGetEndConsensusIndex() - pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ - nSizeOfDiscrepantRegion + 1);
           ++nConsPosDiscrepantRegionLeft ) {

         // check the left flank
         
         int nNumberOfFlankingBases = 0;
         bool bRegionOK = true;
         int nConsPos2;
         for( nConsPos2 = nConsPosDiscrepantRegionLeft - 1;
              bRegionOK && ( nConsPos2 >= nGetStartConsensusIndex() ) &&
                 ( nNumberOfFlankingBases < 
                   pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ );
              --nConsPos2 ) {

            bRegionOK = aNeededSpeciesAreAlignedAndAgree[ nConsPos2 ];
            ++nNumberOfFlankingBases;
         }

         // a little check for the left edge of the consensus
         if ( nNumberOfFlankingBases < pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ )
            bRegionOK = false;

         // check the right flank

         nNumberOfFlankingBases = 0;
         for( nConsPos2 = 
                 nConsPosDiscrepantRegionLeft + nSizeOfDiscrepantRegion;
              bRegionOK && ( nConsPos2 <= nGetEndConsensusIndex() ) &&
                 ( nNumberOfFlankingBases < 
                   pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ );
              ++nConsPos2 ) {

            bRegionOK = aNeededSpeciesAreAlignedAndAgree[ nConsPos2 ];
            ++nNumberOfFlankingBases;
         }

         // another little check for the right edge of the consensus
         if ( nNumberOfFlankingBases < pCP->nAutoReportMinNumberOfPerfectlyAlignedBasesBeforeDiscrepancy_ )
            bRegionOK = false;
         

         if ( bRegionOK ) {
            for( nConsPos2 = nConsPosDiscrepantRegionLeft;
                    nConsPos2 <= 
                    nConsPosDiscrepantRegionLeft + nSizeOfDiscrepantRegion - 1;
                 ++nConsPos2 ) {

               aColumnsFlanked.setValue( nConsPos2, true );

            }
         }

      }
   }


   // now count how many columns there are that passed the alignment check:

   int nNumberOfColumnsFlanked = 0;
   int nNumberOfColumnsFlankedAndHQ = 0;
   int nNumberOfColumnsFlankedHQSingleSignal = 0;
   int nNumberOfColumnsSingleSignalAllSpeciesHQ = 0;
   for( nConsPos = nGetStartConsensusIndex(); 
        nConsPos <= nGetEndConsensusIndex(); ++nConsPos ) {


         if ( aNeededSpeciesAreAlignedAndHQ[ nConsPos ] 
              && !aSomeCombinedReadIsMultipleSignal[ nConsPos ] ) {
            ++nNumberOfColumnsSingleSignalAllSpeciesHQ;
         }

         if ( aColumnsFlanked[ nConsPos ] ) {
            ++nNumberOfColumnsFlanked;

            if ( aNeededSpeciesAreAlignedAndHQ[ nConsPos ] ) {
               ++nNumberOfColumnsFlankedAndHQ;
            
               if ( !aSomeCombinedReadIsMultipleSignal[ nConsPos ] ) {
                  ++nNumberOfColumnsFlankedHQSingleSignal;
               }
            }
         }
   }
         
   
   fprintf( pAO, "totals: quality threshold: %d columns above quality: %d columns with species: %d columns with both: %d also single: %d flanked: %d flanked and hq: %d flanked_hq_single: %d\n",
            nThresholdQuality,
            nNumberOfColumnsAboveThreshold,
            nNumberOfColumnsWithNeededSpecies,
            nNumberOfColumnsWithNeededSpeciesAndQuality,
            nNumberOfColumnsWithNeededSpeciesAndQualityAndSingleSignal,
            nNumberOfColumnsFlanked,
            nNumberOfColumnsFlankedAndHQ,
            nNumberOfColumnsFlankedHQSingleSignal );

   assert( nNumberOfColumnsSingleSignalAllSpeciesHQ == 
           nNumberOfColumnsWithNeededSpeciesAndQualityAndSingleSignal );
         
         

   



}



void Contig :: compareTopAndBottomStrandsWithHuman( autoReport* pAutoReport ) {




   // get 2-D arrays of quality and relative area bin giving # of bases
   // 2 of these 2-D arrays--one for bases agreeing with human and 1 for
   // bases disagreeing with human

   const int nNumberOfQualityValues = 99;
   const int nNumberOfRelativeAreaBins = 13;
   const int nTopRelativeAreaBin = nNumberOfRelativeAreaBins - 1;

   typedef RWTValVector<int> arrayOfInt;


   RWTPtrVector<arrayOfInt> ppArrayOfRelativeAreasAgreeWithHuman( 
                        (size_t) ( nNumberOfQualityValues + 1 ),
                        NULL,
                        "ppArrayOfRelativeAreasAgreeWithHuman" );

   RWTPtrVector<arrayOfInt> ppArrayOfRelativeAreasDisagreeWithHuman(
                        (size_t) ( nNumberOfQualityValues + 1 ), 
                        NULL,
                        "ppArrayOfRelativeAreasDisagreeWithHuman" );

   int nQuality;
   for( nQuality = 0; nQuality <= nNumberOfQualityValues; ++nQuality ) {
      
      RWCString soNameAgree = "ppArrayOfRelativeAreasAgreeWithHuman[";
      soNameAgree += RWCString( (long) nQuality );
      soNameAgree += "]";

      ppArrayOfRelativeAreasAgreeWithHuman[ nQuality ] =
         new RWTValVector<int>(
                               (size_t) nNumberOfRelativeAreaBins,
                               0,
                               soNameAgree );

      RWCString soNameDisagree = "ppArrayOfRelativeAreasDisagreeWithHuman[";
      soNameDisagree += RWCString( (long) nQuality );
      soNameDisagree += "]";

      ppArrayOfRelativeAreasDisagreeWithHuman[ nQuality ] =
         new RWTValVector<int>(
                               (size_t) nNumberOfRelativeAreaBins,
                               0,
                               soNameDisagree );
   }


                                                                           
   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;
   //   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/tempreads.txt";


   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }

   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );


   for( int nSpecies = 0; nSpecies < 
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;


      bool bSpeciesHasBadRead = false;
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains("HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( "b.r" ) ) {
            cerr << "ignoring read " << pLocFrag->soGetName() << endl;
            continue;
         }
         
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) {
            bSpeciesHasBadRead = true;
            break;
         }

         if ( pLocFrag->soGetName().bEndsWith(".f" )) {
            pForwardRead = pLocFrag;
         }
         else if ( pLocFrag->soGetName().bEndsWith( ".r" ) ) {
            pReverseRead = pLocFrag;
         }
         else {
            assert( false );
         }
      }

      if ( bSpeciesHasBadRead ) continue;

      // can't use species if all reads aren't there
      if ( !pForwardRead || !pReverseRead || !pHumanRead ) continue;


      // now get the relative area arrays for both reads

      mbtValVector<float>* pRelativeAreaOfUncalledBasesForwardRead = NULL;
      mbtValVector<float>* pRelativeAreaOfUncalledBasesReverseRead = NULL;

      assert( pForwardRead->bGetSingleSignalRelativeAreas( pRelativeAreaOfUncalledBasesForwardRead ) );
      assert( pReverseRead->bGetSingleSignalRelativeAreas( pRelativeAreaOfUncalledBasesReverseRead ) );


      if ( pForwardRead->bIsWholeReadUnaligned() ||
           pReverseRead->bIsWholeReadUnaligned() ) 
         continue;

      int nConsPosLeft;
      int nConsPosRight;

      // just fixed
      if ( !bIntersect( pForwardRead->nGetAlignClipStart(),
                        pForwardRead->nGetAlignClipEnd(),
                        pReverseRead->nGetAlignClipStart(),
                        pReverseRead->nGetAlignClipEnd(),
                        nConsPosLeft,
                        nConsPosRight ) ) continue;


      for( int nConsPos = nConsPosLeft; nConsPos <= nConsPosRight; ++nConsPos ) {
         
         // we are only interested in cases in which the reads do not agree

         if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
              pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() )
            continue;

         // since they disagree with each other, at least 1 must disagree
         // with human.
         // since they disagree with each other, at least 1 must not be
         // a pad.

         // we are only interested in case in which higher quality read
         // has larger relative area

         int nQualityForward = pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
         int nQualityReverse = pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();


         int nOrientedUnpaddedReadPosOfForwardRead =
            pForwardRead->nOrientedUnpaddedFragPosFromConsPos( nConsPos );
         int nOrientedUnpaddedReadPosOfReverseRead =
            pReverseRead->nOrientedUnpaddedFragPosFromConsPos( nConsPos );

         float fForwardRelativeArea = 
            (*pRelativeAreaOfUncalledBasesForwardRead)[ nOrientedUnpaddedReadPosOfForwardRead ];

         float fReverseRelativeArea = 
            (*pRelativeAreaOfUncalledBasesReverseRead)[ nOrientedUnpaddedReadPosOfReverseRead ];


         if ( nQualityForward >= nQualityReverse &&
              fForwardRelativeArea < fReverseRelativeArea ) continue;

         if ( nQualityForward <= nQualityReverse &&
              fForwardRelativeArea > fReverseRelativeArea ) continue;


         // we are left with the ambiguous case where quality and
         // relative area conflict

            
         // which bin will each fall into?

         int nForwardReadRelativeAreaIndex = -1;
         if ( fForwardRelativeArea == -1.0 ) 
            nForwardReadRelativeAreaIndex = 0;
         else {
            for( int nBin = 1; nBin <= ( nTopRelativeAreaBin - 1 ); ++nBin ) {
               float fRelativeAreaBottomOfBin = ( nBin - 1 ) / 10.0;
               float fRelativeAreaTopOfBin = nBin / 10.0;

               if ( fRelativeAreaBottomOfBin <= fForwardRelativeArea &&
                    fForwardRelativeArea < fRelativeAreaTopOfBin ) {
                  nForwardReadRelativeAreaIndex = nBin;
               }
            }

            if ( nForwardReadRelativeAreaIndex == -1 )
               nForwardReadRelativeAreaIndex = nTopRelativeAreaBin;
         }


         // which bin will each fall into?

         int nReverseReadRelativeAreaIndex = -1;
         if ( fReverseRelativeArea == -1.0 ) 
            nReverseReadRelativeAreaIndex = 0;
         else {
            for( int nBin = 1; nBin <= ( nTopRelativeAreaBin - 1 ); ++nBin ) {
               float fRelativeAreaBottomOfBin = ( nBin - 1 ) / 10.0;
               float fRelativeAreaTopOfBin = nBin / 10.0;

               if ( fRelativeAreaBottomOfBin <= fReverseRelativeArea &&
                    fReverseRelativeArea < fRelativeAreaTopOfBin ) {
                  nReverseReadRelativeAreaIndex = nBin;
               }
            }

            if ( nReverseReadRelativeAreaIndex == -1 )
               nReverseReadRelativeAreaIndex = nTopRelativeAreaBin;
         }


         for( int nForwardOrReverse = 0; 
              nForwardOrReverse <= 1; ++nForwardOrReverse ) {


            int nQuality = -1;
            int nRelativeAreaIndex = -1;
            LocatedFragment* pLocFrag2 = NULL;

            if ( nForwardOrReverse == 0 ) {
               nQuality = nQualityForward;
               nRelativeAreaIndex = nForwardReadRelativeAreaIndex;
               pLocFrag2 = pForwardRead;
            }
            else {
               nQuality = nQualityReverse;
               nRelativeAreaIndex = nReverseReadRelativeAreaIndex;
               pLocFrag2 = pReverseRead;
            }


            RWTValVector<int>* pArrayOfRelativeAreas = NULL;

            if ( ntGetCons( nConsPos ).cGetBase() == 
                 pLocFrag2->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {

               // agree with human case
               pArrayOfRelativeAreas =
                  ppArrayOfRelativeAreasAgreeWithHuman[ nQuality ];
            }
            else {
               // disagree with human case

               pArrayOfRelativeAreas =
                  ppArrayOfRelativeAreasDisagreeWithHuman[ nQuality ];
            }

            ++( (*pArrayOfRelativeAreas)[ nRelativeAreaIndex ] );
         } //          for( int nForwardOrReverse = 0; 
      } //       for( int nConsPos = nConsPosLeft; nConsPos <= nConsPosRight; ++nConsPos ) {

      
      delete pRelativeAreaOfUncalledBasesForwardRead;
      delete pRelativeAreaOfUncalledBasesReverseRead;

   } //    for( nSpecies = 0; nSpecies < 

   fprintf( pAO, "Relative Areas {\n" );

   for( nQuality = 0; nQuality <= nNumberOfQualityValues; ++nQuality ) {
      fprintf( pAO, "%2d", nQuality );

      RWTValVector<int>* pArrayOfAgreeWithHumanRelativeAreas =
         ppArrayOfRelativeAreasAgreeWithHuman[ nQuality ];

      RWTValVector<int>* pArrayOfDisagreeWithHumanRelativeAreas =
         ppArrayOfRelativeAreasDisagreeWithHuman[ nQuality ];

      for( int nRelativeAreaBin = 0; nRelativeAreaBin <= nTopRelativeAreaBin;
           ++nRelativeAreaBin ) {

         fprintf( pAO, "   %d %d",
                  (*pArrayOfAgreeWithHumanRelativeAreas)[ nRelativeAreaBin ],
                  (*pArrayOfDisagreeWithHumanRelativeAreas)[ nRelativeAreaBin ] );
      }

      fprintf( pAO, "\n" );
   }

   fprintf( pAO, "} Relative Areas\n" );

} // void Contig :: compareTopAndBottomStrandsWithHuman( autoReport* pAutoReport ) {



void Contig :: compareTopAndBottomStrands2( autoReport* pAutoReport ) {





   const int nNumberOfQualityValues = 99;
   const int nNumberOfRelativeAreaBins = 13;
   const int nTopRelativeAreaBin = nNumberOfRelativeAreaBins - 1;


                                                                           
   // read all of the good reads
   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/goodReadsBothBatches.out";
   //   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/tempreads.txt";

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }

   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );


   fprintf( pAO, "Relative Areas {\n" );


   for( int nSpecies = 0; nSpecies < 
           pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {

      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies ];


      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      LocatedFragment* pHumanRead = NULL;


      bool bSpeciesHasBadRead = false;
      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

         if ( pLocFrag->soGetName().bContains("HSap" ) ) {
            pHumanRead = pLocFrag;
            continue;
         }

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( "b.r" ) ) {
            cerr << "ignoring read " << pLocFrag->soGetName() << endl;
            continue;
         }
         
         if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) {
            bSpeciesHasBadRead = true;
            break;
         }

         if ( pLocFrag->soGetName().bEndsWith(".f" )) {
            pForwardRead = pLocFrag;
         }
         else if ( pLocFrag->soGetName().bEndsWith( ".r" ) ) {
            pReverseRead = pLocFrag;
         }
         else {
            assert( false );
         }
      }

      if ( bSpeciesHasBadRead ) continue;

      // can't use species if all reads aren't there
      if ( !pForwardRead || !pReverseRead || !pHumanRead ) continue;


      // now get the relative area arrays for both reads

      mbtValVector<float>* pRelativeAreaOfUncalledBasesForwardRead = NULL;
      mbtValVector<float>* pRelativeAreaOfUncalledBasesReverseRead = NULL;

      assert( pForwardRead->bGetSingleSignalRelativeAreas( pRelativeAreaOfUncalledBasesForwardRead ) );
      assert( pReverseRead->bGetSingleSignalRelativeAreas( pRelativeAreaOfUncalledBasesReverseRead ) );


      if ( pForwardRead->bIsWholeReadUnaligned() ||
           pReverseRead->bIsWholeReadUnaligned() ) 
         continue;

      int nConsPosLeft;
      int nConsPosRight;

      if ( !bIntersect( pForwardRead->nGetAlignClipStart(),
                        pForwardRead->nGetAlignClipEnd(),
                        pReverseRead->nGetAlignClipStart(),
                        pReverseRead->nGetAlignClipEnd(),
                        nConsPosLeft,
                        nConsPosRight ) ) continue;


      for( int nConsPos = nConsPosLeft; nConsPos <= nConsPosRight; ++nConsPos ) {
         
         // we are only interested in cases in which the reads do not agree

         if ( pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
              pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() )
            continue;

         // since they disagree with each other, at least 1 must disagree
         // with human.
         // since they disagree with each other, at least 1 must not be
         // a pad.

         // we are only interested in case in which higher quality read
         // has larger relative area

         int nQualityForward = pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
         int nQualityReverse = pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality();


         int nOrientedUnpaddedReadPosOfForwardRead =
            pForwardRead->nOrientedUnpaddedFragPosFromConsPos( nConsPos );
         int nOrientedUnpaddedReadPosOfReverseRead =
            pReverseRead->nOrientedUnpaddedFragPosFromConsPos( nConsPos );

         float fForwardRelativeArea = 
            (*pRelativeAreaOfUncalledBasesForwardRead)[ nOrientedUnpaddedReadPosOfForwardRead ];

         float fReverseRelativeArea = 
            (*pRelativeAreaOfUncalledBasesReverseRead)[ nOrientedUnpaddedReadPosOfReverseRead ];


         if ( nQualityForward >= nQualityReverse &&
              fForwardRelativeArea < fReverseRelativeArea ) continue;

         if ( nQualityForward <= nQualityReverse &&
              fForwardRelativeArea > fReverseRelativeArea ) continue;


         char cHumanBase =  ntGetCons( nConsPos ).cGetBase();

         bool bForwardReadAgreesWithHuman =
            ( cHumanBase == pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase() );
         bool bReverseReadAgreesWithHuman =
            ( cHumanBase == pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase() );



         int nQualityOfHighQualityRead = -1;
         int nQualityOfLowwQualityRead = -1;
         float fRelativeAreaOfHighQualityRead = -666.0;
         float fRelativeAreaOfLowwQualityRead = -666.0;
         bool bHighQualityReadAgreesWithHuman;
         bool bLowwQualityReadAgreesWithHuman;

         if ( nQualityForward >= nQualityReverse ) {
            nQualityOfHighQualityRead = nQualityForward;
            nQualityOfLowwQualityRead = nQualityReverse;
            fRelativeAreaOfHighQualityRead = fForwardRelativeArea;
            fRelativeAreaOfLowwQualityRead = fReverseRelativeArea;
            bHighQualityReadAgreesWithHuman = bForwardReadAgreesWithHuman;
            bLowwQualityReadAgreesWithHuman = bReverseReadAgreesWithHuman;
         }
         else {
            nQualityOfHighQualityRead = nQualityReverse;
            nQualityOfLowwQualityRead = nQualityForward;
            fRelativeAreaOfHighQualityRead = fReverseRelativeArea;
            fRelativeAreaOfLowwQualityRead = fForwardRelativeArea;
            bHighQualityReadAgreesWithHuman = bReverseReadAgreesWithHuman;
            bLowwQualityReadAgreesWithHuman = bForwardReadAgreesWithHuman;
         }

         fprintf( pAO, "%d %d %.1f %.1f %d %d %d %s %s\n",
                  nQualityOfHighQualityRead,
                  nQualityOfLowwQualityRead,
                  fRelativeAreaOfHighQualityRead,
                  fRelativeAreaOfLowwQualityRead,
                  ( bHighQualityReadAgreesWithHuman ? 1 : 0 ),
                  ( bLowwQualityReadAgreesWithHuman ? 1 : 0 ),
                  nUnpaddedIndex( nConsPos ),
                  soSpecies.data(),
                  soGetName().data() );


      } //       for( int nConsPos = nConsPosLeft; nConsPos <= nConsPosRight; ++nConsPos ) {

      
      delete pRelativeAreaOfUncalledBasesForwardRead;
      delete pRelativeAreaOfUncalledBasesReverseRead;

   } //    for( nSpecies = 0; nSpecies < 


   fprintf( pAO, "} Relative Areas\n" );

} // void Contig :: compareTopAndBottomStrandsWithHuman2( autoReport* pAutoReport ) {



         
         
         



// void Contig :: singleSignalInfo( autoReport* pAutoReport ) {


//    for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
//       LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

//       if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
//          continue;
//       }

//       // fake read that has no chromat
//       if ( pLocFrag->soGetName().bEndsWith( "b.r" ) ) {
//          continue;
//       }

//       mbtValVector<bool>* paSingleSignalArray = NULL;
      
//       pLocFrag->getSingleSignalBasesTemp( paSingleSignalArray );

//       for( int n1ReadPos = paSingleSignalArray->nGetStartIndex();
//            n1ReadPos <= paSingleSignalArray->nGetEndIndex();
//            ++n1ReadPos ) {

         
         
//          if ( (*paSingleSignalArray)[ n1ReadPos ] ) {

//             // see if this base is quality 20 or better
            
//             int n1PaddedReadPos = 
//                pLocFrag->nUnorientedPaddedFromOrientedUnpadded( n1ReadPos );

//             if ( pLocFrag->ntideGet( n1PaddedReadPos ).qualGetQuality() >=
//                  20 ) {

//                // for now, also check if discrepant

//                int nConsPos = 
//                   pLocFrag->nGetConsPosFromFragIndex( n1PaddedReadPos );

//                if ( ntGetCons( nConsPos ).cGetBase() !=
//                     pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() ) {
//                   fprintf( pAO, "%s %d\n",
//                         pLocFrag->soGetName().data(),
//                         n1ReadPos );
//                }
//             }
//          }
//       }
//    }
// }



typedef RWTValVector<int> arrayOfInt;
                     
void Contig :: singleSignalInfo( autoReport* pAutoReport ) {

   const int nNumberOfQualityValues = 99;

   const int nNumberOfRelativeAreaBins = 13;
   const int nTopRelativeAreaBin = nNumberOfRelativeAreaBins - 1;

   RWTPtrVector<arrayOfInt> ppArrayOfRelativeAreasAgree( 
                (size_t) ( nNumberOfQualityValues + 1),
                NULL,
                "ppArrayOfRelativeAreasAgree" );

   RWTPtrVector<arrayOfInt> ppArrayOfRelativeAreasDisagree(
                (size_t) ( nNumberOfQualityValues + 1),
                NULL,
                "ppArrayOfRelativeAreasDisagree" );

   int nQuality;
   for( nQuality = 0; nQuality <= nNumberOfQualityValues; ++nQuality ) {

      RWCString soNameAgree = "ppArrayOfRelativeAreasAgree[";
      soNameAgree += RWCString( (long) nQuality );
      soNameAgree += "]";

      ppArrayOfRelativeAreasAgree[nQuality] =
         new RWTValVector<int>( 
                          (size_t) nNumberOfRelativeAreaBins,
                          0,
                          soNameAgree );

      RWCString soNameDisagree = "ppArrayOfRelativeAreasDisagree[";
      soNameDisagree += RWCString( (long) nQuality );
      soNameDisagree += "]";

      ppArrayOfRelativeAreasDisagree[nQuality] =
         new RWTValVector<int>( 
                          (size_t) nNumberOfRelativeAreaBins,
                          0,
                          soNameDisagree );

   }


   // read all of the good reads
   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/goodReadsBothBatches.out";
   //   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/tempreads.txt";

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }

   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );



   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
         continue;
      }

      // fake read that has no chromat
      if ( pLocFrag->soGetName().bEndsWith( "b.r" ) ) {
         continue;
      }

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;

      if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) 
         continue;

      mbtValVector<float>* pArrayOfRelativeAreaOfUncalledBases = NULL;
      assert( pLocFrag->bGetSingleSignalRelativeAreas( pArrayOfRelativeAreaOfUncalledBases ) );

      int nUnpaddedOrientedReadPosStart = 
         pLocFrag->nOrientedUnpaddedFragPosFromConsPos( 
              pLocFrag->nGetStartUnclippedConsPos() );

      int nUnpaddedOrientedReadPosEnd =
         pLocFrag->nOrientedUnpaddedFragPosFromConsPos(
              pLocFrag->nGetEndUnclippedConsPos() );

      int nIncrement = 1;
      if ( pLocFrag->bComp() )
         nIncrement = -1;


      // get ready for first += nIncrement
      int nUnpaddedOrientedReadPos = 
         nUnpaddedOrientedReadPosStart - nIncrement;

      for( int nConsPos = pLocFrag->nGetStartUnclippedConsPos();
           nConsPos <= pLocFrag->nGetEndUnclippedConsPos();
           ++nConsPos ) {

         // what is the oriented unpadded read pos?

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) 
            continue;

         nUnpaddedOrientedReadPos += nIncrement;

         int nQuality = 
            pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

         // what is the relative area at this base?

         float fRelativeArea = 
            (*pArrayOfRelativeAreaOfUncalledBases)[ nUnpaddedOrientedReadPos ];


         // convert this to an index

         int nFoundIndex = -1;
         for( int nBin = 0; nBin <= ( nTopRelativeAreaBin - 1 ); ++nBin ) {
            float fRelativeAreaBottomOfBin = ( nBin - 1 ) / 10.0;
            float fRelativeAreaTopOfBin =  nBin / 10.0;

            if ( fRelativeArea == -1 ) {
               nFoundIndex = 0;
               break;
            }
            else if ( fRelativeAreaBottomOfBin <= fRelativeArea &&
                 fRelativeArea < fRelativeAreaTopOfBin ) {



               nFoundIndex = nBin;
               break;
            }
         }

         if ( nFoundIndex == -1 ) {

            nFoundIndex = nTopRelativeAreaBin;
         }

         bool bAgree = 
            ( ntGetCons( nConsPos ).cGetBase() == pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() );

         RWTValVector<int>* pArrayOfRelativeAreas = NULL;
         if ( bAgree ) {

            // agree case
            pArrayOfRelativeAreas = 
               ppArrayOfRelativeAreasAgree[ nQuality ];
         }
         else {
            // disagree case

            pArrayOfRelativeAreas =
               ppArrayOfRelativeAreasDisagree[ nQuality ];
         }
         
         ++( (*pArrayOfRelativeAreas)[ nFoundIndex ] );

      }
      
      delete pArrayOfRelativeAreaOfUncalledBases;
   } //    for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) 

   
   fprintf( pAO, "Relative Areas {\n" );
   for( nQuality = 0; nQuality <= 99; ++nQuality ) {
      fprintf( pAO, "%2d",
               nQuality );

      RWTValVector<int>* pArrayOfAgreeRelativeAreas =
            ppArrayOfRelativeAreasAgree[ nQuality ];

      RWTValVector<int>* pArrayOfDisagreeRelativeAreas =
            ppArrayOfRelativeAreasDisagree[ nQuality ];

      for( int nRelativeAreaBin = 0; nRelativeAreaBin <= nTopRelativeAreaBin;
           ++nRelativeAreaBin ) {

         fprintf( pAO, "   %d %d",
                  (*pArrayOfAgreeRelativeAreas)[ nRelativeAreaBin ],
                  (*pArrayOfDisagreeRelativeAreas)[ nRelativeAreaBin ] );
      }

      fprintf( pAO, "\n" );
   }

   fprintf( pAO, "} Relative Areas\n" );
         
}

                     
         






                     
void Contig :: singleSignalInfo2() {


   RWTValVector<int> aArrayOfSingleSignalBasesByQuality( ucQualityMax + 1,    
                                                    0,
                                        "aArrayOfSingleSignalByQuality" );

   RWTValVector<int> aArrayOfMultipleSignalBasesByQuality( ucQualityMax + 1,
                                                      0,
                                      "aArrayOfMultipleSignalByQuality" );



   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   //   RWCString soGoodReads = "/me2/gordon/primates/checkHumanGenome/tempreads.txt";

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }

   mbtValOrderedVectorOfRWCString aGoodReads( (size_t) 1000 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      aGoodReads.insert( soLine );
   }

   fclose( pGoodReads );



   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );

      if ( pLocFrag->soGetName().bContains( "HSap" ) ) {
         continue;
      }

      // fake read that has no chromat
      if ( pLocFrag->soGetName().bEndsWith( "b.r" ) ) {
         continue;
      }

      if ( pLocFrag->bIsWholeReadUnaligned() ) continue;
      if ( pLocFrag->bIsWholeReadLowQuality() ) continue;

      if ( aGoodReads.index( pLocFrag->soGetName() ) == RW_NPOS ) 
         continue;

      MBTValOrderedOffsetVector<char>* pSingleSignalArray = NULL;
      
      assert( pLocFrag->bGetSingleSignalBasesConsensusLength( pSingleSignalArray ) );

      // only interested in high quality segment

      int nIntersectLeft;
      int nIntersectRight;
      if ( !bIntersect( pLocFrag->nGetHighQualityStart(),
                        pLocFrag->nGetHighQualityEnd(),
                        pLocFrag->nGetAlignClipStart(),
                        pLocFrag->nGetAlignClipEnd(),
                        nIntersectLeft,
                        nIntersectRight ) ) continue;

      for( int nConsPos = nIntersectLeft;
           nConsPos <= nIntersectRight;
           ++nConsPos ) {

         if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) 
            continue;

         int nQuality = 
            pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

         if ( (*pSingleSignalArray)[ nConsPos ] == cSingleSignal ) {
            ++aArrayOfSingleSignalBasesByQuality[ nQuality ];
         }
         else {
            ++aArrayOfMultipleSignalBasesByQuality[ nQuality ];
         }

      }
      
      delete pSingleSignalArray;
   } //    for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) 

   
   fprintf( pAO, "SingleSignalInfo2 {\n" );
   for( int nQuality = 0; nQuality <= 99; ++nQuality ) {
      fprintf( pAO, "%2d %d %d\n",
               nQuality,
               (aArrayOfSingleSignalBasesByQuality)[ nQuality ],
               (aArrayOfMultipleSignalBasesByQuality)[ nQuality ] );
   }

   fprintf( pAO, "} SingleSignalInfo2\n" );
         
}

                     
         




void Contig :: compareTopAndBottomStrands3( autoReport* pAutoReport ) {


   // read all of the good reads
   RWCString soGoodReads = pCP->filAutoReportGoodHitReads_;

   FILE* pGoodReads = fopen( soGoodReads.data(), "r" );
   if ( !pGoodReads ) {
      RWCString soError = "could not open ";
      soError += soGoodReads;
      THROW_ERROR( soError );
   }



   RWTPtrOrderedVector<LocatedFragment> aGoodReads( (size_t) 10 );

   while(  fgets( soLine, nMaxLineSize, pGoodReads ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      soLine.stripTrailingWhitespaceFast();

      for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


         if ( pLocFrag->soGetName() == soLine ) {
            // the fake read for the 2nd alignment
            assert( !pLocFrag->soGetName().bContains( "b." ) );

            aGoodReads.insert( pLocFrag );
         }
      }
   }

   fclose( pGoodReads );


   int nSpecies;
   for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {
      
      RWCString soSpecies = pAutoReport->aArrayOfSpecies_[ nSpecies];

      LocatedFragment* pForwardRead = NULL;
      LocatedFragment* pReverseRead = NULL;
      
      for( int nRead = 0; nRead < aGoodReads.length(); ++nRead ) {
         LocatedFragment* pLocFrag = aGoodReads[ nRead ];

         if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue;

         if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
            pForwardRead = pLocFrag;
         }
         else {
            pReverseRead = pLocFrag;
         }
      }
      
      
      if ( !pForwardRead || !pReverseRead ) {
         continue;
      }


      // if reached here, we have both a .f and .r read for this species

      if ( pForwardRead->bIsWholeReadUnaligned() ||
           pReverseRead->bIsWholeReadUnaligned() ) 
         continue;

      int nConsPosLeft;
      int nConsPosRight;

      if ( !bIntersect( pForwardRead->nGetAlignClipStart(),
                        pForwardRead->nGetAlignClipEnd(),
                        pReverseRead->nGetAlignClipStart(),
                        pReverseRead->nGetAlignClipEnd(),
                        nConsPosLeft,
                        nConsPosRight ) ) continue;


      int nMiddleOfCommonAlignedRegion = ( nConsPosLeft + nConsPosRight ) / 2;

      // convert this to unpadded read position for each read

      int nForwardUnpadded = 
         pForwardRead->nOrientedUnpaddedFragPosFromConsPos( nMiddleOfCommonAlignedRegion );
      int nReverseUnpadded =
         pReverseRead->nOrientedUnpaddedFragPosFromConsPos( nMiddleOfCommonAlignedRegion );

      fprintf( pAO, "FORWARD_REVERSE_ALIGNMENT %s .f: %s %d .r: %s %d\n",
               soSpecies.data(),
               pForwardRead->soGetName().data(),
               nForwardUnpadded,
               pReverseRead->soGetName().data(),
               nReverseUnpadded );
   } //    for( nSpecies = 0; nSpecies < pAutoReport->aArrayOfSpecies_.length(); ++nSpecies ) {


} // void Contig :: compareTopAndBottomStrands3( autoReport* pAutoReport ) {








void Contig :: compareTopAndBottomStrands4( autoReport* pAutoReport ) {


   fprintf( pAO, "compareTopAndBottomStrands4 {\n" );

   LocatedFragment* pForwardRead = NULL;
   LocatedFragment* pReverseRead = NULL;

   for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) {
      LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead );


      if ( pLocFrag->soGetName().bEndsWith( ".f" ) ) {
         pForwardRead = pLocFrag;
      }
      else {
         pReverseRead = pLocFrag;
      }
   }

   assert( pForwardRead && pReverseRead );


   // check if the alignment matches (or is similar) to that against 
   // human

   int nConsPosForward = 
      pForwardRead->nConsPosFromOrientedUnpaddedFragPos( pCP->nAutoReportTopStrandPinnedPosition_ );

   int nConsPosReverse =
      pReverseRead->nConsPosFromOrientedUnpaddedFragPos( pCP->nAutoReportBottomStrandPinnedPosition_ );


   fprintf( pAO, "pinned positions: %d %d %d\n",
            nConsPosForward,
            nConsPosReverse,
            abs( nConsPosForward - nConsPosReverse ) );



   // save single signal arrays before deleting column of pads


   MBTValOrderedOffsetVector<char>* pSingleSignalArrayForward;
   MBTValOrderedOffsetVector<char>* pSingleSignalArrayReverse;

   pForwardRead->bGetSingleSignalBasesConsensusLength( pSingleSignalArrayForward );
   pReverseRead->bGetSingleSignalBasesConsensusLength( pSingleSignalArrayReverse );



   int nIntersectConsPosStart;
   int nIntersectConsPosEnd;

   if ( !bIntersect( pForwardRead->nGetAlignClipStart(),
                     pForwardRead->nGetAlignClipEnd(),
                     pReverseRead->nGetAlignClipStart(),
                     pReverseRead->nGetAlignClipEnd(),
                     nIntersectConsPosStart,
                     nIntersectConsPosEnd ) ) return;

   
   
   for( int nConsPos = nIntersectConsPosStart; 
        nConsPos <= nIntersectConsPosEnd; ++nConsPos ) {

      if ( (*pSingleSignalArrayForward)[ nConsPos ] == cSingleSignal &&
           (*pSingleSignalArrayReverse)[ nConsPos ] == cSingleSignal ) {
         
         fprintf( pAO, "%d %d %c %c %d %s\n",
                  pForwardRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).qualGetQuality(),
                  pForwardRead->ntGetFragFromConsPos( nConsPos ).cGetBase(),
                  pReverseRead->ntGetFragFromConsPos( nConsPos ).cGetBase(),
                  nUnpaddedIndex( nConsPos ),
                  soGetName().data() );
      }
   }

   
   fprintf( pAO, "} compareTopAndBottomStrands4\n" );

}