/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    <stdio.h>
#include    <iostream.h>
#include    <fstream.h>
#include    <strstream.h>
#include    "rwcstring.h"
#include    "rwtvalvector.h"
#include    "rwctokenizer.h"
#include    <time.h>

// for chmod
#include    <sys/types.h>
#include    <sys/stat.h>

   
#include    "sysdepend.h"
#include    "locatedFragment.h"
#include    "contig.h"
#include    "assembly.h"
#include    "basesegment.h"
#include    "mbt_exception.h"
#include    "filename.h"
#include    "listOfFragments.h"
#include    "listOfFragmentsAndEndPositions.h"
#include    "guiapp.h"
#include    "consed.h"
#include    "deleteFileIfItExists.h"
#include    "consedParameters.h"
#include    "guiTopWindow.h"
#include    "popupErrorMessage.h"
#include    "subcloneTTemplate.h"
#include    "textbox.h"
#include    "autoFinishExpTag.h"
#include    "universalPrimerAutoFinishExpTagsSortedByTemplateName.h"
#include    <math.h>
#include    "please_wait.h"
#include    "evalExpCluster.h"
#include    "expidAndLocatedFragment.h"
#include    "singletInfo.h"
#include    "readPHDOfSinglet.h"
#include    "mbt_errors.h"
#include    "rwcregexp.h"
#include    "oligoTag.h"
#include    "fileDefines.h"
#include    <stdlib.h>
#include    "mbt_val_ord_offset_vec.h"
#include    "dErrorRateFromQuality.h"
#include    "nGetReadTypeFromReadName.h"
#include    "readFileOfPhdFiles.h"
#include    "dGetErrorsFixedAtBase.h"
#include    "listOfLibraries.h"
#include    "templatesSortedByLibrary.h"
#include    "abs.h"
#include    "guiMultiContigNavigator.h"
#include    "findRepeat.h"
#include    "lowconsqual.h"
#include    "contigEndPair.h"
#include    "templateForMinilibrary.h"
#include    "restrictionFragment.h"
#include    "popupInfoOrErrorMessageNoFormat.h"
#include    "clScaffold.h"
#include    "fwdRevPair.h"
#include    "highQualityDiscrepancyGotoList.h"
#include    "copyFiles.h"
#include    "renameFiles.h"
#include    "bIsNumeric.h"
#include    "soGetErrno.h"
#include    "bIsNumericMaybeWithWhitespace.h"
#include    "hp_exception_kludge.h"
#include    "popupErrorMessage2.h"
#include    "soAddCommas.h"
#include    "ntide.h"
#include    "printTime.h"
#include    "popupInfoMessage.h"




#define PARSE_PANIC( file, nLine, soMessage, soLine ) \
{  ostrstream ost; \
   ost << "Corrupted ace file detected by consed source file " \
     << __FILE__ << " at " << __LINE__ <<endl; \
   ost << "Error in ace file " << file << " at line " << nLine << ": " \
      << soMessage << endl << soLine << ends; \
   InputDataError ide(ost.str()); \
   throw ide; }


void Assembly :: saveAssembly( bool& bUserPushedCancel, 
                               Widget widOfCallingWindow ) {

   if ( pCP->bReadOnly_ ) {
      RWCString soErrorMessage = "Sorry, you can't save the assembly since you started consed -read_only";
      InputDataError ide( soErrorMessage );
      throw ide;
   }
   
   
   // get filename
   FileName soAssemblyDirName( soGetAceFileName().soGetDirectory());
   FileName soDirMask = soAssemblyDirName + "*.ace*";

   FileName soTryOutFileName = soGetAceFileName().filGetNextVersion();

   // bump version number until stat says file doesn't exist
   while (soTryOutFileName.bFileByThisNameExists()) {
      soTryOutFileName = soTryOutFileName.filGetNextVersion();
   }

   FileName soUserOutFileName =
     GuiApp::popupFileSelector("Save assembly to file", 
                               soDirMask,
                               soTryOutFileName);

   // if user cancelled or did not supply name, bail now
   if (soUserOutFileName.isNull()) { 
      bUserPushedCancel = true;
      return; 
   }

   bool bGoAhead = true;
   if (soUserOutFileName.bFileByThisNameExists()) {
      bGoAhead = 
        GuiApp::popupDecisionMessage("File %s exists.  Save anyway?",
                                     (const char*)soUserOutFileName);
   }
   if (! bGoAhead) {
      bUserPushedCancel = true;
      return;
   }

   PleaseWait* pPleaseWait = NULL;

   try {

      // the "if" is to fix a Linux bug:  Linux didn't like
      // fclose( NULL ) but Solaris could handle it
      if ( pfilEditHistory_ ) {
         fclose( pfilEditHistory_ );
         pfilEditHistory_ = NULL;
      }


      bool bAceFormat1 = 
         (consedParameters::pGetConsedParameters()->nWriteThisAceFormat_ == 1)?
         true : false;

      if ( widOfCallingWindow == 0 ) 
         widOfCallingWindow =  GuiApp::pGetGuiApp()->widGetTopLevel();

      pPleaseWait = new PleaseWait( widOfCallingWindow );

      // this can take a long time
      saveToFile(soUserOutFileName, bAceFormat1 );

      delete pPleaseWait;

      deleteFileIfItExists( soUserOutFileName + ".wrk" );
      // update label in gui contig win to reflect new version
      ConsEd::pGetConsEd()->setAssemblyNameOnAllContigwins( 
                                 soUserOutFileName.soGetBasename()
                                 );

      GuiApp::pGetGuiApp()->pGetGuiTopWindow()->setAssemblyName( 
                                           soUserOutFileName.soGetBasename() 
                                                                );

      clearAllChangedFlags();
      ConsEd::pGetConsEd()->disallowUndos();
      bUserPushedCancel = false;
   }
   catch (SysRequestFailed srf) {
      GuiApp::popupErrorMessage(srf.szGetDesc());
      // this is a method of keeping consed from exiting if there
      // was a problem writing the file

      if ( pPleaseWait )
         delete pPleaseWait;

      bUserPushedCancel = true;
   }

}


void Assembly :: clearAllChangedFlags() {

   setUnchanged();

   // loop through all contigs and fragments in assembly

   for ( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      pContig->setUnchanged();

      int nNumFrags = pContig->nGetNumberOfFragsInContig();
      for (int nFrag = 0; nFrag < nNumFrags; nFrag++) {

         // get pointer to this located frag from contig
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag);
         pLocFrag->setUnchanged();
      }
   }
}



// returns NULL if contig is not found
Contig* Assembly :: pGetContigByName( const RWCString& soContigName ) {
   
   Contig* pContig;


   int nContig = 0;
   bool bFound = false;
   while( !bFound && 
         nContig < nNumContigs() ) {
      pContig = pGetContig( nContig );
      if (pContig->soGetName() == soContigName ) 
        bFound = true;
      else
        ++nContig;
   }

   if (!bFound) pContig = 0;

   return( pContig );
}



Contig* Assembly :: pGetContigByNameCaseInsensitive( const RWCString& soContigName ) {


   RWCString soContigLower( soContigName );
   soContigLower.toLower();
   
   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      if ( pContig->soGetName().returnToLower() == soContigLower ) {
         return pContig;
      }
   }
   
   return NULL;
}
         


void Assembly :: writeEditToEditHistoryFile( char* szFormat, ... ) {
   char     szLineToWrite[10000];

   va_list  args;
   va_start(args, szFormat);
   vsprintf( szLineToWrite, szFormat, args );
   va_end(args);


   // shouldn't ever get here, but handle it anyway
   if ( pCP->bReadOnly_ ) {
      return;
   }


   if ( pfilEditHistory_ == NULL ) {
     // this must be the first edit--open the edit history file

     FileName filEditHistory = soGetAceFileName() + ".wrk";


     if (bOpenEditHistoryForAppendFlag() ) {

       pfilEditHistory_ = fopen( (char*)filEditHistory.data(), "a" );
     }
     else {
       // open the output file
       pfilEditHistory_ = fopen( (char*)filEditHistory.data(), "w" );

     }
       


     if ( pfilEditHistory_ == NULL ) {
       // throw an exception with a detailed explanation
       ostrstream ost; 
       ost << "Unable to open file " << filEditHistory
           << " for writing" << endl << ends;
       SysRequestFailed srf(ost.str());
       // what the hell, show 'em the errno
       srf.includeErrnoDescription();
       throw srf;
     }
   }

   // now the file is open, one way or the other.  so write the edit--
   // that's what we're here for

   strcat( szLineToWrite, "\n" );


   if ( fputs( szLineToWrite, pfilEditHistory_ ) == EOF ) {
     // throw an exception with a detailed explanation
     ostrstream ost; 
     ost << "Unable to write to .wrk file" << endl << ends;
     SysRequestFailed srf(ost.str());
     // what the hell, show 'em the errno
     srf.includeErrnoDescription();
     throw srf;
   }

   if (fflush( pfilEditHistory_ ) != 0 )  {
     ostrstream ost; 
     ost << "Unable to flush .wrk file" << endl << ends;
     SysRequestFailed srf(ost.str());
     // what the hell, show 'em the errno
     srf.includeErrnoDescription();
     throw srf;
   }

}




// called repeatedly by saveToFile()
static void writeContigToAceFile(Contig& rContig, ofstream& ofsAceFile,
                          const bool bAceFormat1 ) {


   Ntide::setGetBaseUpperOrLowerLookupTableIfNecessary();

   rContig.sortReadsByPosition();


   if (bAceFormat1) {
      // write consensus sequence
      ofsAceFile << endl << "DNA " << rContig.soGetName() << endl;
   }
   else {

      // write consensus sequence
      ofsAceFile << endl << "CO " << rContig.soGetName() << " " << 
         rContig.nGetPaddedSeqLength() << " " << 
         rContig.nGetNumberOfFragsInContig() << " " <<
         rContig.baseSegArray_.nGetNumSegments() << " " <<
         (rContig.bIsComplementedFromWayPhrapCreated() ? "C" : "U" )
                 << endl;
   }

   // loop through entire sequence
   int nCharsOnLine = 0;
   int nPos;
   for ( nPos = rContig.nGetStartConsensusIndex(); 
        nPos <= rContig.nGetEndConsensusIndex(); 
        nPos++) {
      Ntide nt = rContig.ntGetCons(nPos);

      char cBase = nt.cGetBaseUpperOrLowerUnsafe();

      ofsAceFile << cBase;
      nCharsOnLine++;
      if (nCharsOnLine == nAceMaxDnaLineLen) {
         ofsAceFile << endl;
         nCharsOnLine = 0;
      }
   }
   if (nCharsOnLine) ofsAceFile << endl;
   ofsAceFile << endl;

   // write the BaseQuality lines

   if (bAceFormat1 )
      ofsAceFile << endl << "BaseQuality " << rContig.soGetName() << endl;
   else
      ofsAceFile << endl << "BQ" << endl;

   int nNumbersOnLine = 0;
   for( nPos = rContig.nGetStartConsensusIndex();
        nPos <= rContig.nGetEndConsensusIndex();
        nPos++) {

      Ntide nt = rContig.ntGetCons( nPos );
      
      // quality information is only output for non-pads
      if (!nt.bIsPad() ) {

         ofsAceFile << " " << (int) nt.qualGetQuality();
         ++nNumbersOnLine;

         static const int nAceMaxBaseQualityLineLen = 50;

         if (nNumbersOnLine >= nAceMaxBaseQualityLineLen) {
            ofsAceFile << endl;
            nNumbersOnLine = 0;
         }
      }
   }

   if (nNumbersOnLine) 
      ofsAceFile << endl;
   
   ofsAceFile << endl;


   if (bAceFormat1) 
      // the "sequence" section contains Assembled_from* and BaseSegment* lines
      ofsAceFile << "Sequence " << rContig.soGetName() << endl;

   // put out "AF" lines
   int nNumFrags = rContig.nGetNumberOfFragsInContig();
   int nFrag;
   for ( nFrag = 0; nFrag < nNumFrags; nFrag++) {

      // get pointer to this located frag from contig
      LocatedFragment* pLocFrag = rContig.pLocatedFragmentGet(nFrag);

      if (bAceFormat1) {
         // when reading the .ace file, we peeled '.comp' off of
         // the fragment name.  replace it here
         RWCString soFragName = pLocFrag->soGetFragmentName();

         if (pLocFrag->bComp()) soFragName += ".comp";

      
         ofsAceFile << "Assembled_from* " << soFragName <<
            "  " << pLocFrag->nGetAlignStart() << "  " <<
            pLocFrag->nGetAlignEnd() << endl;
      }
      else {

         ofsAceFile << "AF " << pLocFrag->soGetFragmentName();
         if (pLocFrag->bComp() )
            ofsAceFile << " C ";
         else
            ofsAceFile << " U ";

         ofsAceFile << pLocFrag->nGetAlignStart() << endl;
      }
   }

   // put out BS lines
   int nNumBaseSegs = rContig.baseSegArray_.nGetNumSegments();
   for (int nSeg = 0; nSeg < nNumBaseSegs; nSeg++) {
      BaseSegment* pBs = rContig.baseSegArray_[nSeg];
      LocatedFragment* pLf = pBs->pGetLocFrag();

      if (bAceFormat1) {
         RWCString soFragName = pLf->soGetFragmentName();
         if (pLf->bComp()) soFragName += ".comp";
         
         ofsAceFile << "Base_segment* " << pBs->nGetStartInConsensus()
            << " " << pBs->nGetEndInConsensus() << "  " << soFragName 
            << "  " << pLf->nGetFragIndexFromConsPos( pBs->nGetStartInConsensus() )
            << " "<< pLf->nGetFragIndexFromConsPos( pBs->nGetEndInConsensus() ) 
            << endl;
      }
      else {
         ofsAceFile << "BS " 
                 << pBs->nGetStartInConsensus()
                 << " " << pBs->nGetEndInConsensus() 
                 << " " << pLf->soGetFragmentName()
                 << endl;
      }
   }

   if (!bAceFormat1)
      ofsAceFile << endl;

   // only used in bAceFormat1
   RWCString soFragName;

   // put out fragment sequence and clipping lines
   for (nFrag = 0; nFrag < nNumFrags; nFrag++) {
      LocatedFragment* pLf = rContig.pLocatedFragmentGet(nFrag);

      if (bAceFormat1) {
         soFragName = pLf->soGetFragmentName();
         if (pLf->bComp()) soFragName += ".comp";

         ofsAceFile << endl << "DNA " << soFragName << endl;
      }       
      else {

         int nNumberOfReadTags = pLf->nGetNumberOfReadTagsToWriteToAceFile();

         ofsAceFile << "RD " << pLf->soGetName() << " " <<
            pLf->nGetPaddedSeqLength() << " " <<
            "0"  // number of whole read info items
                    << " " << nNumberOfReadTags << endl;
      }

      // loop through entire sequence
      int nCharsOnLine = 0;
      for (int nPos = pLf->nGetStartIndex(); // fragment, not consensus indices
           nPos <= pLf->nGetEndIndex();   // from Sequence class member funs
           nPos++) {
         Ntide nt = pLf->ntideGet(nPos); // from Sequence class

         char cBase = nt.cGetBaseUpperOrLowerUnsafe();

         ofsAceFile << cBase;

         nCharsOnLine++;
         if (nCharsOnLine == nAceMaxDnaLineLen) {
            ofsAceFile << endl;
            nCharsOnLine = 0;
         }
      }
      if (nCharsOnLine) ofsAceFile << endl;

      ofsAceFile << endl;

      // clipping line
      if (bAceFormat1) {
         ofsAceFile << "Sequence " << soFragName << endl;
         ofsAceFile << "Clipping* " << pLf->nGetStartUnclippedFragPos() <<
            ' ' << pLf->nGetEndUnclippedFragPos() << endl;
      }
      else {

         ofsAceFile << "QA ";
         if ( pLf->bIsWholeReadLowQuality() )
            ofsAceFile << "-1 -1 ";
         else {
            // check to make sure a corrupt ace file isn't being 
            // written:

            int nStartHighQuality = pLf->nGetFragIndexFromConsPos(
                          pLf->nGetStartUnclippedConsPos() 
                                             );
            int nEndHighQuality = pLf->nGetFragIndexFromConsPos(
                          pLf->nGetEndUnclippedConsPos()
                                             );

            if ( ! (
                    1 <= nStartHighQuality &&
                    nStartHighQuality <= nEndHighQuality &&
                    nEndHighQuality <= pLf->nGetEndIndex() ) ) {
               RWCString soError = "The ace file you just wrote is corrupted and will not be able to be read by Consed.  The QA (quality) line for read ";
               soError += pLf->soGetName();
               soError += " contains out-of-bounds numbers.  (";
               soError += RWCString( (long) nStartHighQuality );
               soError += ", ";
               soError += RWCString( (long) nEndHighQuality );
               soError += ", ";
               soError += RWCString( (long) pLf->nGetEndIndex() );
               soError += ").  If you can reproduce what you have done so you can get this message again, and you can supply the data set to David Gordon with instructions so that he can also reproduce it, then he can fix this bug.";
               popupErrorMessage( soError );
            }

            ofsAceFile << 
               pLf->nGetFragIndexFromConsPos(
                   pLf->nGetStartUnclippedConsPos() 
                                             )
                       << " " <<
               pLf->nGetFragIndexFromConsPos(
                   pLf->nGetEndUnclippedConsPos() 
                                              )
                       << " ";
         }

         if ( pLf->bIsWholeReadUnaligned() )
            ofsAceFile << "-1 -1 " << endl;
         else {

            // check to make sure a corrupt ace file isn't being written

            int nStartAlignedRegion = pLf->nGetFragIndexFromConsPos(
                          pLf->nGetAlignClipStart() );

            int nEndAlignedRegion = pLf->nGetFragIndexFromConsPos(
                          pLf->nGetAlignClipEnd() );

            if ( !(
                   1 <= nStartAlignedRegion &&
                   nStartAlignedRegion <= nEndAlignedRegion &&
                   nEndAlignedRegion <= pLf->nGetEndIndex() ) ) {
               
               RWCString soError = "The ace file you just wrote is corrupted and Consed will not be able to read it.  The QA (alignment) line for read ";
               soError += pLf->soGetName();
               soError += " contains out-of-bounds numbers.  ( ";
               soError += RWCString( (long) nStartAlignedRegion );
               soError += ", ";
               soError += RWCString( (long) nEndAlignedRegion );
               soError += ").  nGetEndIndex = ";
               soError += RWCString( (long) pLf->nGetEndIndex() );
               soError += " If you can reproduce what you have done so you get this message again, and you can supply the data set to David Gordon with instructions so he can also reproduce this message, then he can fix this bug.";
               popupErrorMessage( soError );
            }
            


            ofsAceFile << 
               pLf->nGetFragIndexFromConsPos(
                   pLf->nGetAlignClipStart()
                                           ) 
                    << " " <<
            pLf->nGetFragIndexFromConsPos(
                        pLf->nGetAlignClipEnd()
                                          ) 
                    << endl;
         }
      }

      // the PHD file and timestamp should have just been changed (above)
      // for all of the reads that were edited since the last save

      if (bAceFormat1) {
         ofsAceFile << "Description " << 
            "CHROMAT_FILE: " << (pLf->filGetChromatFilename() ) <<
            " PHD_FILE: " << (pLf->filGetPHDFilename() ) << 
            " TIME: " << pLf->soGetTimestamp() << endl;
      }
      else {

         RWCString soDSLine( (size_t) 1000 );
         soDSLine = "DS";

         if ( !pLf->filGetChromatFilename().bIsNull() &&
              pLf->filGetChromatFilename() != "none" ) {

            soDSLine += " CHROMAT_FILE: ";
            soDSLine += pLf->filGetChromatFilename();
         }
         



         if ( !pLf->filGetPHDFilename().bIsNull() ) {
            soDSLine += " PHD_FILE: ";
            soDSLine += pLf->filGetPHDFilename();
         }

            
         // now VERSION
         // the intent is to not write version for older files

         if ( pLf->filGetPHDFilename().bIsNull() ) {

            soDSLine += " VERSION: ";
            char szTemp[10];
            sprintf( szTemp, "%d", pLf->nVersion_ );
            soDSLine += szTemp;
         }

         // time

         soDSLine += " TIME: ";
         soDSLine += pLf->soGetTimestamp();


         // chemistry.  Historically, CHEM has not been written
         // for prim and term (Sanger) reads.  However, phrap
         // historically *does* write them.  When in doubt, don't
         // change it.


              

         if ( pLf->soChemistry_ != "prim" &&
              pLf->soChemistry_ != "term" &&
              !pLf->soChemistry_.bIsNull() ) {
            
            soDSLine += " CHEM: ";
            soDSLine += pLf->soChemistry_;
         }

         // why is DYE not here?  DG 8/15/08

         ofsAceFile << soDSLine << endl;

         pLf->writeReadTagsToAceFile( ofsAceFile );
      }
   } //   for (nFrag = 0; nFrag < nNumFrags; nFrag++) {
}


FileName Assembly ::  filSaveAssemblyToUserSpecifiedOrNextAvailableVersionOfAceFile() {

   FileName filToSave;

   if ( !pCP->filUserWantsToSaveToThisAceFile_.bIsNull() )
      filToSave = pCP->filUserWantsToSaveToThisAceFile_;
   else {
      filToSave = filGetAceFileFullPathname().filFindOneHigherThanHighestVersion3();
   }

   bool bAceFormat1 = 
      (consedParameters::pGetConsedParameters()->nWriteThisAceFormat_ == 1)?
      true : false;

   saveToFile( filToSave, bAceFormat1 );

   return filToSave;
}



void Assembly ::  saveAssemblyToUserSpecifiedOrNextAvailableVersionOfAceFile( 
const FileName& filToSave ) {

   FileName filToSave2( filToSave );

   if ( filToSave2.bIsNull() )
      filToSave2 = filGetAceFileFullPathname().filFindOneHigherThanHighestVersion3();


   bool bAceFormat1 = 
      (consedParameters::pGetConsedParameters()->nWriteThisAceFormat_ == 1)?
      true : false;

   saveToFile( filToSave2, bAceFormat1 );

}







// passed a file name, saves a new version of the 
// .ace file (but only with '*' i.e. padded lines)
// overwrites existing file, throws exception if
// it cannot open. 
void Assembly :: saveToFile(const char* szOutFilePath, const bool bAceFormat1 ) {

   

   int nContig;

   // loop through all fragments in assembly
   // look for any that have changed and write new PHD files for them
   for (nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      int nNumFrags = pContig->nGetNumberOfFragsInContig();
      for (int nFrag = 0; nFrag < nNumFrags; nFrag++) {

         // get pointer to this located frag from contig
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag);
         if (pLocFrag->bChanged() ) pLocFrag->writePHDAndMore();
      }
   }


   // make sure we got a name
   assert (szOutFilePath);

   cerr << "writing " << szOutFilePath << endl;

   // open the output file
#ifdef OFSTREAM_OPEN_WITHOUT_PERMISSIONS
   ofstream ofsAceFile(szOutFilePath, ios::out );
#else
   ofstream ofsAceFile(szOutFilePath, ios::out, 0666 );
#endif

   if (! ofsAceFile) {
      // throw an exception with a detailed explanation
      ostrstream ost; 
      ost << "Unable to open file " << szOutFilePath
          << " for writing" << endl << ends;
      SysRequestFailed srf(ost.str());
      // what the hell, show 'em the errno
      srf.includeErrnoDescription();
      throw srf;
   }

   if (!bAceFormat1) {
      setNumberOfReadsInAssemblyAccurate();
      ofsAceFile << "AS " << nNumContigs() << " " << 
         nGetNumberOfReadsInAssembly() << endl;
   }

   // loop through all contigs in assembly
   for ( nContig = 0; nContig < nNumContigs(); nContig++) {
      writeContigToAceFile((*this)[nContig], ofsAceFile, bAceFormat1 );
   }

   // write all consensus tags in assembly
   for ( nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig* pContig = pGetContig( nContig );
      pContig->writeConsensusTagsToAceFile( ofsAceFile );
   }

   // write all wholeAssemblyItems


   if ( !bAceFormat1 ) {
      for( int nWA = 0; nWA < aWholeAssemblyItems_.length(); ++nWA ) {
         aWholeAssemblyItems_[nWA]->writeWholeAssemblyItemToAceFile( ofsAceFile );
      }
   }

   ofsAceFile.close();

#ifdef OFSTREAM_OPEN_WITHOUT_PERMISSIONS
   int nError = chmod( szOutFilePath, 0666 );
#endif

   // update the file name
   soFileName_ = szOutFilePath;



}


// Searches the entire assembly for a fragment of name soFragmentName
// If found, returns its LocatedFragment pointer.
// If not found, returns null.


LocatedFragment*    Assembly :: pGetLocatedFragmentByName( const RWCString& soFragmentName ) {

   if ( bReadsSortedByReadNameNeedsFixing_ )
       setUpReadsSortedByReadName();

   return( pReadsSortedByReadName_->pFindReadByName( soFragmentName ) );
}




void Assembly :: readPHDBallsAndPHDFilesAndSetQualitiesAndPeakPositions() {
   int nContig;

   int nFragsSoFar = 0;


   pCP->nPhdFilesRead_ = 0;

   // check if the ace file refers to any phd balls
   
   for( int nWA = 0; nWA < aWholeAssemblyItems_.length(); ++nWA ) {
   
      wholeAssemblyItem* pWA =  aWholeAssemblyItems_[nWA];

      if ( pWA->soType_ == "phdBall" ) {
         FileName filPhdBall = pWA->soText_;

         // I guess this is so this file isn't attempted to be
         // read twice
         if ( filPhdBall == pCP->filFileOfPhdFiles_ ) continue;

         FileName filPhdBallFullPath = pWA->soText_;
         if ( !filPhdBallFullPath.bFileByThisNameExists() ) {

            filPhdBallFullPath = pCP->filPhdBallDirectory_ + "/" +
               pWA->soText_;
         
            if ( !filPhdBallFullPath.bFileByThisNameExists() ) {
               RWCString soError = pWA->soText_ + " and " +
                  filPhdBallFullPath + " do not exist even though it is specified in a WA item in the ace file";
               THROW_ERROR( soError );
            }
         }

         readFileOfPhdFiles( filPhdBallFullPath );

      }
   }


   // now (for backwards compatibility) check for the phd.ball
   // in the current directory

   // command line overrides .consedrc
   // But if commandline does not have a -fileOfPhdFiles, then use
   // the .consedrc

   // command line -fileOfPhdFiles is put into pCP->filFileOfPhdFiles_
   //
   // pCP->filFastStartupFile_ comes from .consedrc and is phd.ball by
   // default

   if ( pCP->filFileOfPhdFiles_.isNull() ) {
      if ( pCP->bFastStartup_ ) {
         pCP->filFileOfPhdFiles_ = pCP->filFastStartupFile_;
      }
   }

   if ( !pCP->filFileOfPhdFiles_.isNull() && 
        pCP->filFileOfPhdFiles_.bFileByThisNameExists() ) {
      readFileOfPhdFiles( pCP->filFileOfPhdFiles_ );
   }




   int nNumberOfIndividualPhdFilesRead = 0;

   for (nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      int nNumFrags = pContig->nGetNumberOfFragsInContig();

      for (int nFrag = 0; nFrag < nNumFrags; nFrag++) {
         ++nFragsSoFar;


         // get pointer to this located frag from contig
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag);

         try {
            if ( !pLocFrag->bAlreadyReadPhdFile_ ) {
               // if the phd file has already been read, this will not
               // read it again
               pLocFrag->readPHDFileAndSetQualitiesAndPeakPositions();

               ++(pCP->nPhdFilesRead_ );
               ++nNumberOfIndividualPhdFilesRead;
               
               if ( pCP->nPhdFilesRead_ % 50 == 0 ) {
                  int nPerCentComplete =  
                     ( pCP->nPhdFilesRead_ * 100 / nNumberOfReadsInAssembly_ )
                     / 2 + 50;
               
                  printf( "%d%% done.  %d phd files read so far...\n",
                       nPerCentComplete,
                       pCP->nPhdFilesRead_ );
               }
            }

            if ( pLocFrag->nVersion_ != 1 &&
                 !pLocFrag->bAlreadyReadPhredCallsInPhdBall_ ) {

               // this is the case in which there is a read
               // with a higher version and there is no file
               // for this read in the phd.ball (or else the user
               // isn't using the phd.ball)

               try {
                  pLocFrag->readPhredCalls( );
               }
               catch( ExceptionBase eb ) {
                  // couldn't read the phred calls.  However, *could*
                  // read the edited phd file (the highest version).
                  // So ok to show quality values in Aligned Reads Window
                  // (don't set it to all quality 0) but don't
                  // allow user to bring up traces.

                  pLocFrag->bReadOnly_ = true;
                  popupErrorMessage( eb.szGetDesc() );
                  pCP->bErrorMessageDisplayedAtStartup_ = true;
               }
            } // if ( pLocFrag->filGetPHDFilename().nGetVersion() != 1 ...
         } // try
         catch( ExceptionBase eb ) {
            // something was wrong in the phd file
            // So mark it read-only and set the qualities to 0's
            pLocFrag->setBadPHDFile();
            popupErrorMessage( eb.szGetDesc() );
            consedParameters::pGetConsedParameters()->bErrorMessageDisplayedAtStartup_ 
               = true;
         } // catch...

      } //  for (int nFrag = 0; nFrag < nNumFrags;
   } //    for (nContig = 0; 

   cerr << "Number of individual phd files read: " << 
      nNumberOfIndividualPhdFilesRead << endl;

   cerr << "Total reads in assembly: " << soAddCommas( nNumberOfReadsInAssembly_ ) << endl;


}



void Assembly :: setContigMatchTablesInAllContigs() {


   for ( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      pContig->setContigMatchTables();
   }

}


   


int Assembly :: nGetContigIndexByContig( Contig* pContig ) {

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      if ( pGetContig( nContig ) == pContig ) 
         return( nContig );
   }

   return( -1 );
}




bool Assembly :: bDoesTheNumberOfTemplatesMakeSense() {
   
   if ( pReadsSortedByTemplateName_ ) {
      pReadsSortedByTemplateName_->clear();
      pReadsSortedByTemplateName_->resize( nGetNumberOfReadsInAssembly() );
   }
   else {
      pReadsSortedByTemplateName_ = new readsSortedByTemplateName(
                                           nGetNumberOfReadsInAssembly() );
   }

   
   int nNumberOfReadsAdded = 0;

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); 
           ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );
         pLocFrag->setDefaultTemplateNameIfNecessary();
         if ( pLocFrag->bIsWholeCloneTemplateNotSubcloneTemplate() ) continue;
         if ( pLocFrag->bIsAFakeRead() ) continue;
         pReadsSortedByTemplateName_->append( pLocFrag );
      }
   }

   pReadsSortedByTemplateName_->resort2();


   if ( pReadsSortedByTemplateName_->length() == 0 ) {

      popupErrorMessage( "Warning:  there are no reads that are subclone reads.  All reads are either whole clone (bac, cosmid, or cDNA) or fake reads.  If this is not correct, then you probably have not correctly customized determineReadTypes.perl for your read naming convention.  See README.txt and see the documentation within determineReadTypes.perl  To help you in figuring out what is wrong, on the Main Consed Window, point to Info, hold down mouse button 1, and release on Show Info For Each Read" );

      consedParameters::pGetConsedParameters()->bErrorMessageDisplayedAtStartup_ 
         = true;
      return( false );
   }

   const int nTemplatesToReport = 3;
   int nMostReadsPerTemplate[ nTemplatesToReport ];
   RWTValVector< RWCString > aTemplatesAndReads( nTemplatesToReport );
   int n;
   for( n = 0; n < nTemplatesToReport; ++n ) {
      nMostReadsPerTemplate[ n ] = 0;
      aTemplatesAndReads[ n ] = "";
   }
   
   int nNumberOfTemplates = 0;
   int nSubscriptOfFirstReadOfTemplate = 0;
   RWCString soLastTemplate = "";
   
   for( int nRead = 0; nRead < pReadsSortedByTemplateName_->length();
        ++nRead ) {
      
      RWCString soTemplate = 
         (*pReadsSortedByTemplateName_)[ nRead ]->soGetTemplateName();

      if ( soTemplate != soLastTemplate && nRead != 0 ) {
         // check last template to see if it has more reads for it than
         // other saved templates
         
         int nNumberOfReadsInLastTemplate = 
            nRead - nSubscriptOfFirstReadOfTemplate;
   
         bool bFoundSlot = false;
         for( int nSavedTemplate = 0; nSavedTemplate < nTemplatesToReport &&
                 !bFoundSlot;
              ++nSavedTemplate ) {
            if ( nNumberOfReadsInLastTemplate > 
                 nMostReadsPerTemplate[ nSavedTemplate ] ) {
               // found the slot this one should go in.  Push the
               // others down (I wish I had "push" like perl).
   
               for( n = nTemplatesToReport - 1; n > nSavedTemplate;
                    --n ) {
                  nMostReadsPerTemplate[ n ] = nMostReadsPerTemplate[ n - 1 ];
                  aTemplatesAndReads[ n ] = aTemplatesAndReads[ n - 1 ];
               }
   
               RWCString soExplanation = soLastTemplate;
   
               // often too many reads--don't make such a large box
//                for( n = nSubscriptOfFirstReadOfTemplate; 
//                     n < nRead;
//                     ++n ) {
//                   soExplanation += " ";
//                   soExplanation += 
//                      (*pReadsSortedByTemplateName_)[ n ]->soGetName();
//                }

               aTemplatesAndReads[ nSavedTemplate ] = soExplanation;
               nMostReadsPerTemplate[ nSavedTemplate ] = 
                  nNumberOfReadsInLastTemplate;

               bFoundSlot = true;
            }
         }
      }
      
      if ( soTemplate != soLastTemplate ) {
   
         // now deal with the next template
   
         ++nNumberOfTemplates;
         soLastTemplate = soTemplate;
         nSubscriptOfFirstReadOfTemplate = nRead;
      }
   }
   
   // check last template to see if it has more reads for it than
   // other saved templates
   
   int nNumberOfReadsInLastTemplate = 
      pReadsSortedByTemplateName_->length() - 
      nSubscriptOfFirstReadOfTemplate;
   
   bool bFoundSlot = false;
   for( int nSavedTemplate = 0; nSavedTemplate < nTemplatesToReport &&
           !bFoundSlot;
        ++nSavedTemplate ) {
      if ( nNumberOfReadsInLastTemplate > 
           nMostReadsPerTemplate[ nSavedTemplate ] ) {
         // found the slot this one should go in.  Push the
         // others down (I wish I had "push" like perl).
   
         for( n = nTemplatesToReport - 1; n > nSavedTemplate;
              --n ) {
            nMostReadsPerTemplate[ n ] = nMostReadsPerTemplate[ n - 1 ];
            aTemplatesAndReads[ n ] = aTemplatesAndReads[ n - 1 ];
         }
   
         RWCString soExplanation = soLastTemplate;
   
         // often too many reads--don't make such a large box         
//          for( n = nSubscriptOfFirstReadOfTemplate; 
//               n < nRead;
//               ++n ) {
//             soExplanation += " ";
//             soExplanation += 
//                (*pReadsSortedByTemplateName_)[ n ]->soGetName();
//          }
         bFoundSlot = true;

         aTemplatesAndReads[ nSavedTemplate ] = soExplanation;
         nMostReadsPerTemplate[ nSavedTemplate ] = 
            nNumberOfReadsInLastTemplate;
   
      }
   }


   // What is going to be the rule for checking whether the number of
   // templates makes sense?  If most reads are whole clone reads (as
   // in the case of cDNA assemblies, then there won't be any subclone
   // templates at all).  So I suggest that we check how many subclone
   // reads there are (not fake).  This is given by the reads in the
   // pReadsSortedByTemplateName_ array.  Then we check how many
   // templates there are in this and see if the number of templates
   // is less than the number of reads and more than the number of
   // reads divided by 5.  In autofinish, if this is not true,
   // autofinish will terminate with an error. We will have a resource
   // that allows the person to override the error and continue.


   if ( nNumberOfTemplates > 500 &&

        ( nNumberOfTemplates == pReadsSortedByTemplateName_->length() ) ) {
      popupErrorMessage( "Warning:  there were %d reads that went into the assembly and %d reads from subclones.  However, consed believes that each of these was from a different subclone template.  Consed does not see a single forward/reverse pair or forward and walk, or redo.  If this is not correct, then you should read README.txt about determineReadTypes.perl and customize determineReadTypes.perl for your naming convention.  To help you in figuring out what is wrong, on the Main Consed Window, point to Info, hold down mouse button 1, and release on Show Info For Each Read.  If this *is* correct, then you can override this error message by setting the consed resource \nconsed.autoFinishContinueEvenThoughReadInfoDoesNotMakeSense: true\nSee README.txt for more info.",
         nGetNumberOfReadsInAssembly(),
         pReadsSortedByTemplateName_->length() );

      consedParameters::pGetConsedParameters()->bErrorMessageDisplayedAtStartup_ 
         = true;

      return( false );
   }


   if ( nNumberOfTemplates < pReadsSortedByTemplateName_->length() / 5 ) {
      popupErrorMessage( "Warning:  there were %d reads that went into the assembly and %d reads from subclones.  However, consed/autofinish thinks there were only %d subclone templates.  This is probably incorrect and will cause numerous problems:  primer picking and template picking within consed will not work and autofinish will not work correctly.  This problem is probably due to your not customizing determineReadTypes.perl correctly for your read naming convention.  See README.txt (which came with consed) and see the documentation within determineReadType.perl  For example, consed thinks that template %s has %d reads, template %s has %d reads and template %s has %d reads.  To help you in figuring out what is wrong, on the Main Consed Window, point to Info, hold down mouse button 1, and release on Show Info For Each Read",
         nGetNumberOfReadsInAssembly(),
         pReadsSortedByTemplateName_->length(),
         nNumberOfTemplates,
         (const char*) aTemplatesAndReads[ 0 ],
         nMostReadsPerTemplate[ 0 ],
         (const char*) aTemplatesAndReads[ 1 ],
         nMostReadsPerTemplate[ 1 ],
         (const char*) aTemplatesAndReads[ 2 ],
         nMostReadsPerTemplate[ 2 ] 
                         );

      consedParameters::pGetConsedParameters()->bErrorMessageDisplayedAtStartup_ 
         = true;
      return( false );
   }

   return( true );
}


         

         

static void toDoAtEndOfReadsForATemplate( 
        const int nSubscriptOfFirstReadOfLastTemplate,
        const RWCString& soLastTemplate,
        TextBox* pTextBox,
        const int nNewTemplateRead,
        readsSortedByTemplateName* pReadsSortedByTemplateName ) {

   int nNumberOfReadsOfLastTemplate = 
            nNewTemplateRead - nSubscriptOfFirstReadOfLastTemplate;

   pTextBox->appendWithArgs( "template %s with %d %s\n",
                           (const char*) soLastTemplate,
                           nNumberOfReadsOfLastTemplate,
                             ( nNumberOfReadsOfLastTemplate == 1 ? "read" :
                               "reads" )
                             );

   for( int nRead = nSubscriptOfFirstReadOfLastTemplate;
        nRead < nNewTemplateRead; ++nRead ) {

      LocatedFragment* pLocFrag = (*pReadsSortedByTemplateName)[ nRead ];
      pTextBox->appendWithArgs( "    %-20s %-8s %-8s ",
                        (const char*) pLocFrag->soGetName(),
                        (const char*) pLocFrag->soChemistry_,
                        szGetReadType2( pLocFrag->nReadType_ )  );

      if ( pLocFrag->nReadType_ == pLocFrag->nReadTypeFromPhdFile_ )
         pTextBox->append( "(from phd file)\n" );
      else
         pTextBox->appendWithArgs( "(inferred from name--no info in phd file: %d)\n",
                                   pLocFrag->nReadTypeFromPhdFile_ );

   }
}





void Assembly :: showReadInfo() {


   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   int nNumberOfReadsOfEachType[ nReadTypesMaxSubscript + 1 ];

   int nNumberOfReadsNotSetInPhdFile = 0;

   for( int n = 0; n <= nReadTypesMaxSubscript; ++n )
      nNumberOfReadsOfEachType[ n ] = 0;


   int nContig;
   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig();
           ++nRead ) {

         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );

         if ( pLocFrag->nReadTypeFromPhdFile_ == 0 )
            ++nNumberOfReadsNotSetInPhdFile;

         ++nNumberOfReadsOfEachType[ pLocFrag->nReadType_ ];
      }
   }

   processSinglets();


   int nSinglet;
   for( nSinglet = 0; 
        nSinglet < pSingletsSortedByTemplateName_->length(); 
        ++nSinglet ) {
      singletInfo* pSingletInfo = 
         (*pSingletsSortedByTemplateName_)[ nSinglet ];

      if ( pSingletInfo->nReadType_ == 0 ) {
         ++nNumberOfReadsNotSetInPhdFile;
         pSingletInfo->nReadType_ = 
            nGetReadTypeFromReadName( pSingletInfo->soReadName_ );

      }


      ++nNumberOfReadsOfEachType[ pSingletInfo->nReadType_ ];
   }


   delete pPleaseWait;

   const int nNumberOfRowsVisible = 35;
   TextBox* pTextBox = new TextBox( "Read Info",
                                    nNumberOfRowsVisible );

   pTextBox->append( "Autofinish and Consed's primer picker must know, \nfor each read, whether it is a forward universal \nprimer read, a reverse universal primer read, a pcr \nend read, or a walk.  Autofinish and Consed's \nprimer picker must also know, for each read, what \nthe read's template name is so they know when 2 \nreads come from the same subclone template or \ndifferent subclone templates.  If a read comes \nfrom the whole clone (bac, cosmid, or cDNA), it is \nimportant to know that as well.\n\nIf the information below is not correct, it is \nlikely that you have not correctly customized \ndetermineReadTypes.perl for your read naming \nconvention.  Please refer to README.txt or Help \non the Aligned Reads Window and refer to the \ndocumentation in determineReadsTypes.perl itself\n" );
   pTextBox->append( "Read Type Summary:\n\n" );

   for( int nReadType = 0; nReadType <= nReadTypesMaxSubscript; ++nReadType ) {
      
      if ( nReadType == 0 ) {
         pTextBox->appendWithArgs( "Reads whose type is not in phd file and will be \ninferred to be universal forward or universal \nreverse depending on the extension (such reads \nwill never be considered walks):  %d\n",
                                   nNumberOfReadsNotSetInPhdFile );
      }
      else {
         pTextBox->appendWithArgs( "Reads of type %s: %d\n",
                                   szGetReadType2( nReadType ),
                                   nNumberOfReadsOfEachType[ nReadType ] );
      }
   }

   
   // now figure out summary information about template names


   if ( pReadsSortedByTemplateName_ ) {
      pReadsSortedByTemplateName_->clear();
      pReadsSortedByTemplateName_->resize( nGetNumberOfReadsInAssembly() );
   }
   else {
      pReadsSortedByTemplateName_ = new readsSortedByTemplateName(
                                           nGetNumberOfReadsInAssembly() );
   }


   int nSmallFractionOfReads = nGetNumberOfReadsInAssembly() / 100;


   RWTPtrOrderedVector<LocatedFragment> aWholeCloneReads( (size_t) nSmallFractionOfReads );
   RWTPtrOrderedVector<LocatedFragment> aFakeReads( (size_t) nSmallFractionOfReads );

   int nNumberOfWholeCloneReads = 0;
   int nNumberOfFakeReads = 0;
   int nNumberOfReads = 0;
   int nNumberOfSubcloneReads = 0;

   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); 
           ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );
         ++nNumberOfReads;

         pLocFrag->setDefaultTemplateNameIfNecessary();

         if ( pLocFrag->bIsWholeCloneTemplateNotSubcloneTemplate() ) {
            ++nNumberOfWholeCloneReads;
            aWholeCloneReads.append( pLocFrag );
         }
         else if ( pLocFrag->bIsAFakeRead() ) {
            ++nNumberOfFakeReads;
            aFakeReads.append( pLocFrag );
         }
         else {
            ++nNumberOfSubcloneReads;
            pReadsSortedByTemplateName_->append( pLocFrag );
         }
      }
   }


   pReadsSortedByTemplateName_->resort2();

   
   pTextBox->append( "\n\n\nTemplate Name Summary Information:\n\n\n" );

   pTextBox->appendWithArgs( "Number of whole clone reads (from bac, cosmid, or cDNA):  %d\n",
                             nNumberOfWholeCloneReads );
   pTextBox->appendWithArgs( "Number of fake reads: %d\n",
                             nNumberOfFakeReads );
   pTextBox->appendWithArgs( "Number of reads from subclones (M13, plasmid): %d\n",
                             nNumberOfSubcloneReads );
   
   pTextBox->appendWithArgs( "Total Number of reads in assembly: %d\n\n\n",
                             nNumberOfReads );

   
   // now make histogram of the number of templates with each # of reads

   const int nNumberOfBins = 10;

   int nTemplatesWithParticularNumberOfReads[ nNumberOfBins ];

   int nBin;
   for( nBin = 0; nBin < nNumberOfBins; ++nBin )
      nTemplatesWithParticularNumberOfReads[ nBin ] = 0;

   RWCString soLastTemplate = "";
   int nSubscriptOfFirstReadOfLastTemplate = 0;

   int nRead;
   for( nRead = 0; nRead < pReadsSortedByTemplateName_->length();
        ++nRead ) {

      RWCString soTemplate = 
         (*pReadsSortedByTemplateName_)[ nRead ]->soGetTemplateName();

      if ( soTemplate != soLastTemplate && nRead != 0 ) {
         int nNumberOfReadsOfLastTemplate = 
            nRead - nSubscriptOfFirstReadOfLastTemplate;

         if ( nNumberOfReadsOfLastTemplate <= ( nNumberOfBins - 2 ) )
            ++nTemplatesWithParticularNumberOfReads[ nNumberOfReadsOfLastTemplate ];
         else   
            ++nTemplatesWithParticularNumberOfReads[ nNumberOfBins - 1 ];
      }

      // do this, even for the 1st read in the list
      if ( soTemplate != soLastTemplate ) {
         soLastTemplate = soTemplate;
         nSubscriptOfFirstReadOfLastTemplate = nRead;
      }
   }

   // do for last template group in list

   int nNumberOfReadsOfLastTemplate = 
      nRead - nSubscriptOfFirstReadOfLastTemplate;
   
   if ( nNumberOfReadsOfLastTemplate <= ( nNumberOfBins - 2 ) )
      ++nTemplatesWithParticularNumberOfReads[ nNumberOfReadsOfLastTemplate ];
   else   
      ++nTemplatesWithParticularNumberOfReads[ nNumberOfBins - 1 ];
   


   // start with 1 because there will be no templates with 0 reads
   for( nBin = 1; nBin < nNumberOfBins - 1; ++nBin ) {
      pTextBox->appendWithArgs("# of templates with %d %s: %d\n",
                               nBin,
                               ( nBin == 1 ? "read" : "reads" ),
                               nTemplatesWithParticularNumberOfReads[ nBin ] );

   }
   pTextBox->appendWithArgs("# of templates with %d or more reads: %d\n",
                            nNumberOfBins - 1,
                            nTemplatesWithParticularNumberOfReads[ nNumberOfBins - 1 ] );

   pTextBox->append( "\n\n\n\nList of Fake Reads:\n\n" );
   for( int nFakeRead = 0; nFakeRead < aFakeReads.length(); ++nFakeRead ) {
      LocatedFragment* pLocFrag = aFakeReads[ nFakeRead ];
      pTextBox->appendWithArgs( "   %s\n", 
                                (const char*) pLocFrag->soGetName() );
   }

   pTextBox->append( "\n\n\nList of Whole Clone Reads:\n\n" );
   if ( aWholeCloneReads.length() != 0 )
      pTextBox->append( "    name                 chemistry template\n" );
   for( int nWholeCloneRead = 0; nWholeCloneRead < aWholeCloneReads.length();
        ++nWholeCloneRead ) {
      LocatedFragment* pLocFrag = aWholeCloneReads[ nWholeCloneRead ];
      pTextBox->appendWithArgs( "   %20s %10s %8s\n",
                                (const char*) pLocFrag->soGetName(),
                                (const char*) pLocFrag->soChemistry_,
                                (const char*) pLocFrag->soGetTemplateName() );
   }
                     

   pTextBox->append( "\n\n\nList of Subclone Reads:\n\n" );
   pTextBox->append( "    name                 chemistry  read type\n\n" );


   soLastTemplate = "";
   nSubscriptOfFirstReadOfLastTemplate = 0;
   for( nRead = 0; nRead <  pReadsSortedByTemplateName_->length();
        ++nRead ) {
      LocatedFragment* pLocFrag = (*pReadsSortedByTemplateName_)[ nRead ];
      
      if ( pLocFrag->soGetTemplateName() != soLastTemplate && nRead != 0 ) {
         toDoAtEndOfReadsForATemplate( nSubscriptOfFirstReadOfLastTemplate,
                                       soLastTemplate,
                                       pTextBox,
                                       nRead,
                                       pReadsSortedByTemplateName_ );
      }

      if ( pLocFrag->soGetTemplateName() != soLastTemplate ) {
         soLastTemplate = pLocFrag->soGetTemplateName();
         nSubscriptOfFirstReadOfLastTemplate = nRead;
      }
   }

   toDoAtEndOfReadsForATemplate( nSubscriptOfFirstReadOfLastTemplate,
                                 soLastTemplate,
                                 pTextBox,
                                 pReadsSortedByTemplateName_->length(),
                                 pReadsSortedByTemplateName_ );



   pTextBox->append( "\n\n\nList of Reads Put (by Phrap) into Singlets File:\n\n" );
   

   for( nSinglet = 0; nSinglet < pSingletsSortedByTemplateName_->length();
        ++nSinglet ) {
      singletInfo* pSinglet = (*pSingletsSortedByTemplateName_)[ nSinglet ];

      pTextBox->appendWithArgs("%-15s %-5s %-20s %-10s %-15s\n",
                               pSinglet->soTemplate_.data(),
                               pSinglet->soTemplateType_.data(),
                               pSinglet->soReadName_.data(),
                               pSinglet->soChemistry_.data(),
                               szGetReadType2( pSinglet->nReadType_ ) );
   }



   pTextBox->makeVisible();

}




void Assembly :: showTemplateQualityInfo() {

   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   setContigTemplateArrays();

   const int nNumberOfRowsVisible = 35;
   TextBox* pTextBox = new TextBox( "Template Quality Info",
                                    nNumberOfRowsVisible );

   pTextBox->append( "\n\n" );
   pTextBox->append( "name           error rate\n" );
   pTextBox->append( "\n\n" );
                                    

   for( int nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {

      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];

      pSub->calculateErrorRateIfNecessary();

      pTextBox->appendWithArgs( "%s     %.2e\n",
                                (const char*) pSub->soTemplateName_,
                                pSub->dErrorRate_ );
   }



   pTextBox->makeVisible();

   delete pPleaseWait;
}






void Assembly :: setContigTemplateArrays() {

   if ( !pCP->bNeedToSetContigTemplateArrays_ ) return;

   pCP->bNeedToSetContigTemplateArrays_ = false;

   // first make a list of reads: Assembly :: pReadsSortedByTemplateName_
   // Then make list of templates: Assembly :: subcloneTTemplateArray_
   // Then process this and split into various Contig :: aSubcloneTemplates_
   // Some subcloneTTemplates may be duplicated so it can be put into
   // more than one Contig :: aSubcloneTemplates_ list, each with a 
   // different subcloneTTemplate :: pContig_


   if ( pCP->nWhatIsRunning_ == nGraphicalConsedIsRunning ) {
      printf( "setting contig template arrays...\n" );
      fflush( stdout );
   }

   // this is needed by subcloneTTemplate::bReadsAreConsistent() 
   // to see if read pairs spanning the gap are close enough to the
   // ends of the contigs.  This used to be elsewhere, but the
   // showTemplateInsertLocations wouldn't work if it wasn't here.
   setAllContigHighQualitySegments();


   clearAndResizeContigTemplateArrays();

   subcloneTTemplateArray_.clear();

   int nNumberOfReadsInAssemblyWithTemplates = 
      nGetNumberOfReadsInAssemblyWithTemplates();

   if ( pReadsSortedByTemplateName_ ) {
      pReadsSortedByTemplateName_->clear();
      pReadsSortedByTemplateName_->resize( nNumberOfReadsInAssemblyWithTemplates );
   }
   else {
      pReadsSortedByTemplateName_ = new readsSortedByTemplateName( 
                              nNumberOfReadsInAssemblyWithTemplates
                                                      );
   }


   for( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig* pContig = pGetContig( nContig );
      
      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); 
           ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );

         pLocFrag->setDefaultTemplateNameIfNecessary();

         if ( pLocFrag->bIsWholeCloneTemplateNotSubcloneTemplate() ) {
            continue;
         }
         if ( pLocFrag->bIsAFakeRead() ) {
            continue;
         }

         if ( pLocFrag->bIsAnUnpairedShortRead() ) {
            continue;
         }
         
         pReadsSortedByTemplateName_->append( pLocFrag );
      }
   }

   pReadsSortedByTemplateName_->resort2();




   subcloneTTemplateArray_.resize( 
                   (size_t) pReadsSortedByTemplateName_->entries() );


   // creating subcloneTTemplate objects

   RWCString soLastTemplate;
   subcloneTTemplate* pTemplate = NULL;
   bool bFirstTime = true;
   for( int nRead = 0; nRead < pReadsSortedByTemplateName_->entries();
        ++nRead ) {
      
      LocatedFragment* pLocFrag = (*pReadsSortedByTemplateName_)[ nRead ];


      // fixes a UWGC bug in which the first read had a null template
      // so it matched soLastTemplate, but pTemplate was null

      if ( pLocFrag->soGetTemplateName() != soLastTemplate || bFirstTime ) {
         bFirstTime = false;

         soLastTemplate = pLocFrag->soGetTemplateName();
         
         // new template--process it.
         // The ctor inserts this subcloneTTemplate into 
         // Assembly::subcloneTTemplateArray_

         pTemplate = new subcloneTTemplate( pLocFrag );
      }
      else {
         pTemplate->aReads_.insert( pLocFrag );
         pLocFrag->pSub_ = pTemplate;
      }
   }

   // why do the templates need to be sorted by name?
   // Because in contig5.cpp, universal reads near the end of the 
   // contig are checked for mates by finding the subcloneTTemplate
   // object by looking up via the template name.


   sortTemplatesByTemplateName();


   // this must be done before pSub->processingAfterAllReadsAdded
   // because the latter uses the library in order to check that
   // the reads from the same template are not too far apart

   assignLibrariesToSubcloneTTemplates();


   // 1st stage in processing subcloneTTemplate arrays

   int nTemplate;
   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {
      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];

      pSub->processingAfterAllReadsAdded();
   }

   // calculating library information

   calculateLibraryInsertSizeFromForwardReversePairs();


   // use that library information to mark more templates as
   // bad
   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {
      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];

      if ( pSub->bOKToUseThisTemplate_ )
         pSub->markTemplateBadIfInsertSeemsTooBig();
   }

   // we do this after calculating the insert size so that 
   // we can use unavailable (lost/dropped) for calculating the
   // library insert size



   setBadTemplateArray();
   setBadLibraryArray();

   flagAllSubclonesIfBadTemplateOrBadLibrary();
   

   // 2nd stage in processing subcloneTTemplate arrays.  this stage
   // puts the templates into the different contigs.  If a template
   // is in a badTemplate or badLibrary list, it is not put into
   // a contig.

   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {
      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];

      pSub->processingAfterAddedAllTemplatesInThisLibrary();
   }


   sortContigTemplateArrays();



   cerr << "after sortContigTemplateArrays" << endl;


   if ( consedParameters::pGetConsedParameters()->bAutoFinishDumpTemplates_ )
      dumpTemplates();



}



void Assembly :: dumpTemplates() {
   for( int nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {

      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];
      
      pSub->dumpTemplate();
      fprintf( pFILE, "%s with reads: \n", 
               pSub->soTemplateName_.data() );
      for( int nRead = 0; nRead < pSub->aReads_.entries(); ++nRead ) {
         LocatedFragment* pLocFrag = pSub->aReads_[ nRead ];
         fprintf( pFILE, "    %s\n",
                  pLocFrag->soGetName().data() );
      }
      
      for( int nContig = 0; nContig < pSub->aContigs_.entries(); ++nContig ) {
         Contig* pContig = pSub->aContigs_[ nContig ];
         fprintf( pFILE, "in contig: %s from %d to %d ",
                  pContig->soGetName().data(),
                  pSub->aUnpaddedLeft_[ nContig ],
                  pSub->aUnpaddedRight_[ nContig ]
                  );

         if ( pSub->aFlags_[ nContig ] & LEFT_CONS_POS_EQUAL_OR_FURTHER_LEFT )
            fprintf( pFILE, "LEFT_CONS_POS_EQUAL_OR_FURTHER_LEFT " );
         if ( pSub->aFlags_[ nContig ] &  LEFT_CONS_POS_EQUAL_OR_FURTHER_RIGHT )
            fprintf( pFILE, "LEFT_CONS_POS_EQUAL_OR_FURTHER_RIGHT " );
         if ( pSub->aFlags_[ nContig ] & RIGHT_CONS_POS_EQUAL_OR_FURTHER_LEFT )
            fprintf( pFILE, "RIGHT_CONS_POS_EQUAL_OR_FURTHER_LEFT " );
         if ( pSub->aFlags_[ nContig ] & RIGHT_CONS_POS_EQUAL_OR_FURTHER_RIGHT )
            fprintf( pFILE, "RIGHT_CONS_POS_EQUAL_OR_FURTHER_RIGHT " );
         if ( pSub->aFlags_[ nContig ] & RIGHT_CONS_POS_EXACT ) 
            fprintf( pFILE, "RIGHT_CONS_POS_EXACT " );
         if ( pSub->aFlags_[ nContig ] & LEFT_CONS_POS_EXACT )
            fprintf( pFILE, "LEFT_CONS_POS_EXACT " );
         fprintf( pFILE, "\n" );
      }
   } // for( int nTemplate ...



}



static int compareTemplates( const subcloneTTemplate** ppSub1,
                             const subcloneTTemplate** ppSub2 ) {

   if ( (*ppSub1)->soTemplateName_ < (*ppSub2)->soTemplateName_ )
      return( -1 );
   else if ( (*ppSub1)->soTemplateName_ == (*ppSub2)->soTemplateName_ )
      return( 0 );
   else
      return( 1 );
}
      

void Assembly :: sortTemplatesByTemplateName() {

   void* pArray = (void*) subcloneTTemplateArray_.data();
   size_t nNumberOfTemplates = subcloneTTemplateArray_.entries();
   size_t nSizeOfAnElement = sizeof( subcloneTTemplate* );
   
   qsort( pArray, nNumberOfTemplates, nSizeOfAnElement, 
          ( (int(*) ( const void*, const void*) ) compareTemplates ) );
   

   if ( !bTemplatesAreSortedAlphabetically() ) {
      PANIC_OST( ost ) << "Assembly::sortTemplatesByTemplateName didn't work."
                       << ends;
   
      throw ProgramLogicError( ost.str() );
   }


}


   


// static int compareTemplateToTemplateName( const RWCString* pTemplateName,
//                              const subcloneTTemplate* pSub ) {


//    cerr << "comparing " << pSub->soTemplateName_ << " and " <<
//       (*pTemplateName) << endl;

//    if ( (*pTemplateName ) < pSub->soTemplateName_ )
//       return( -1 );
//    else if ( (*pTemplateName) == pSub->soTemplateName_ )
//       return( 0 );
//    else
//       return( 1 );
// }
      





subcloneTTemplate* Assembly :: pGetSubcloneTemplateByTemplateName( 
                                  const RWCString& soTemplateName ) {


   int nIndex = 
      subcloneTTemplateArray_.nFindFirstOccurrenceOfMatch( soTemplateName );

   // I used to assert if no template found.  Now I'm more forgiving.
   if ( nIndex == RW_NPOS )
      return( NULL );

   subcloneTTemplate* pSub = subcloneTTemplateArray_[ nIndex ];

   assert( pSub->soTemplateName_ == soTemplateName );

   return( pSub );
}





bool Assembly :: bTemplatesAreSortedAlphabetically() {

   bool bOK = true;

   for( int nTemplate = 1; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {

      subcloneTTemplate* pSub1 = subcloneTTemplateArray_[ nTemplate - 1 ];
      subcloneTTemplate* pSub2 = subcloneTTemplateArray_[ nTemplate ];
   
      if ( pSub1->soTemplateName_ > pSub2->soTemplateName_ ) {
         cerr << "in  Assembly::bTemplatesAreSortedAlphabetically(), templates " 
              << nTemplate - 1 << " (" << pSub1->soTemplateName_ << ") and " <<
            nTemplate << " (" << pSub2->soTemplateName_ << ") are out of order"
              << endl;
         bOK = false;
      }
   }
   return( bOK );
}
   



void Assembly :: sortContigTemplateArrays() {

   // run through all contigs, sorting arrays

   for ( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      pContig->aSubcloneTemplates_.resort();
   }
}
   

void Assembly :: clearAndResizeContigTemplateArrays() {
   for ( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );
      
      pContig->aSubcloneTemplates_.clearAndDestroy();

      pContig->aSubcloneTemplates_.resize( (size_t) pContig->nGetNumberOfFragsInContig() );
   }
}
      


void Assembly :: tagAceFileWithExperiments() {


   ofstream ofsAceFile( (char*) soFileName_.data(), ios::out | ios::app );

   if ( !ofsAceFile ) {
      ostrstream ost;
      ost << "Unable to open file " << soFileName_ << "for append " << endl << ends;

      SysRequestFailed srf( ost.str() );
      srf.includeErrnoDescription();
      throw srf;
   }
   

   for( int nChosenExp = 0; nChosenExp < pAllChosenExperiments_->length();
        ++nChosenExp ) {
      experiment* pExp = (*pAllChosenExperiments_)[ nChosenExp ];
      
      ofsAceFile << endl;

      pExp->writeAutoFinishExpTagToAceFile( ofsAceFile );
   }

   ofsAceFile.close();
}



void Assembly :: tagAceFileWithOligosForExperiments() {

   ofstream ofsAceFile( (char*) soFileName_.data(), ios::out | ios::app );

   if ( !ofsAceFile ) {
      ostrstream ost;
      ost << "Unable to open file " << soFileName_ << "for append " << 
         endl << ends;
      SysRequestFailed srf( ost.str() );
      srf.includeErrnoDescription();
      throw srf;
   }

   for( int nChosenExp = 0; nChosenExp < pAllChosenExperiments_->length();
        ++nChosenExp ) {
      experiment* pExp = (*pAllChosenExperiments_)[ nChosenExp ];

      if ( pExp->nReadType_ != nWalk ) continue;

      ofsAceFile << endl;

      bool bComplementedFromWayPhrapCreatedContig = !pExp->bTopStrandNotBottomStrand_;
      if ( pExp->pContig_->bComplementedFromWayPhrapCreated_ )
         bComplementedFromWayPhrapCreatedContig = 
            !bComplementedFromWayPhrapCreatedContig;

      oligoTag* pOligoTag = new oligoTag( 
                                  pExp->pCustomPrimer_,
                                  pExp->bCloneTemplateNotSubcloneTemplate_,
                                  bComplementedFromWayPhrapCreatedContig,
                                  pExp->nPrimerID_,
                                  "" ); // soComment

      pOligoTag->writeTagToAceFile( ofsAceFile );
   }

   // also append oligo tags for pcr to the ace file

   if (  pChosenPrimersForAutofinishPCR_ ) {

      for( int nOligoTag = 0; 
           nOligoTag < pChosenPrimersForAutofinishPCR_->length();
           ++nOligoTag ) {

         oligoTag* pOligoTag = (*pChosenPrimersForAutofinishPCR_)[ nOligoTag ];
         pOligoTag->writeTagToAceFile( ofsAceFile );

      }
   }

   
   ofsAceFile.close();
}



void Assembly :: saveACopyOfAceFileToBack() {

   FileName filBackupAceFile = soFileName_ + ".back";
   FileName filBackupOrigAceFile = filBackupAceFile + "_orig";

   FileName filCopyToAce;

   if ( filBackupOrigAceFile.bFileByThisNameExists() ) {
      deleteFileIfItExists( filBackupAceFile );
      renameFiles( soFileName_, filBackupAceFile );
      copyFiles( filBackupAceFile, soFileName_ );
   }
   else {
      if ( filBackupAceFile.bFileByThisNameExists() ) {
         renameFiles( filBackupAceFile, filBackupOrigAceFile );
         renameFiles( soFileName_, filBackupAceFile );
         copyFiles( filBackupAceFile, soFileName_ );
      }
      else {
         renameFiles( soFileName_, filBackupOrigAceFile );
         copyFiles( filBackupOrigAceFile, soFileName_ );
      }
   }
}



void Assembly :: showReadChemistries() {
   
   RWTValOrderedVector<RWCString> aChemistryNames;
   RWTValOrderedVector<int> aChemistryNumberOfReads;

   for( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig* pContig = pGetContig( nContig );
      
      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); 
           ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );
         RWCString soChemistry =  pLocFrag->soChemistry_;
         if ( soChemistry.isNull() )
            soChemistry = "Not in phd file";

         int nIndex = aChemistryNames.index( soChemistry );

         if ( nIndex == RW_NPOS ) {
            aChemistryNames.insert( soChemistry );
            aChemistryNumberOfReads.insert( 1 );
         }
         else {
            ++aChemistryNumberOfReads[ nIndex ];
         }
      }
   }


   assert( aChemistryNames.length() == aChemistryNumberOfReads.length() );


   int nNumberOfRowsNeeded = 6 + aChemistryNames.length();
   if ( nNumberOfRowsNeeded > 20 )
      nNumberOfRowsNeeded = 20;


   TextBox* pTB = new TextBox( "Number of Reads for Each Chemistry",
                               nNumberOfRowsNeeded );

   pTB->append( "Chemistry Name       # of reads\n\n\n" );

   for( int nChem = 0; nChem < aChemistryNames.length(); ++nChem ) {

      pTB->appendWithArgs( "%-20s %6d\n",
                           (char*) aChemistryNames[ nChem ].data(),
                           aChemistryNumberOfReads[ nChem ] );
   }

   pTB->makeVisible();

}





int Assembly :: nGetNumberOfForwardUniversalPrimerAutoFinishExpTags() {

   int nUniversalForwardTags = 0;

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      for( int nTag = 0; nTag < pContig->nGetNumberOfTags(); ++nTag ) {
         tag* pTag = pContig->pGetTag( nTag );
         
         if ( pTag->soType_ != "autoFinishExp" ) continue;

         autoFinishExpTag* pAutoFinishExpTag = (autoFinishExpTag*) pTag;

         if ( pAutoFinishExpTag->nReadType_ == nUniversalForward ) 
            ++nUniversalForwardTags;

      }
   }

   return( nUniversalForwardTags );
}



int Assembly :: nGetNumberOfReverseUniversalPrimerAutoFinishExpTags() {

   int nUniversalReverseTags = 0;

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      for( int nTag = 0; nTag < pContig->nGetNumberOfTags(); ++nTag ) {
         tag* pTag = pContig->pGetTag( nTag );

         if ( pTag->soType_ != "autoFinishExp" ) continue;

         autoFinishExpTag* pAutoFinishExpTag = (autoFinishExpTag*) pTag;

         if ( pAutoFinishExpTag->nReadType_ == nUniversalReverse )
            ++nUniversalReverseTags;
      }
   }


   return( nUniversalReverseTags );
}



bool Assembly :: bHasUniversalPrimerReadBeenTriedBeforeByAutoFinish( 
                                   subcloneTTemplate* pSub,
                                   const int nReadType ) {
   
   universalPrimerAutoFinishExpTagsSortedByTemplateName* pUniversal;

   if ( nReadType == nUniversalForward )
      pUniversal = pForwardUniversalPrimerAutoFinishExpTags_;
   else if ( nReadType == nUniversalReverse )
      pUniversal = pReverseUniversalPrimerAutoFinishExpTags_;
   else {
      cerr << "error condition in bHasUniversalPrimerReadBeenTriedBefore " <<
         pSub->soGetName() << endl;
      return( false );
   }
      
   
   int nIndex = pUniversal->nFindFirstOccurrenceOfMatch( pSub->soGetName() );

   if ( nIndex == RW_NPOS ) 
      return( false );
   else
      return( true );
}
                                                         





bool Assembly :: bIsThereAnAutoFinishExpTagForThisReverse( 
                               const RWCString& soTemplateName,
                               autoFinishExpTag*& pAutoFinishExpTag ) {

   
   
   int nIndex = 
      pReverseUniversalPrimerAutoFinishExpTags_->nFindIndexOfMatchOrSuccessor2( soTemplateName ); 

   if ( nIndex != RW_NPOS ) {
      pAutoFinishExpTag = (*pReverseUniversalPrimerAutoFinishExpTags_)[ nIndex ];
      if ( pAutoFinishExpTag->aTemplates_[0] == soTemplateName )
         return( true );
   }

   // if reached here, must not have been the same template

   return( false );
}


static int cmpChosenExperiments( experiment** ppExp1,
                                 experiment** ppExp2 ) {

   if ( (*ppExp1)->pContig_->soGetName() <
        (*ppExp2)->pContig_->soGetName() )
      return( -1 );
   else if ( 
            (*ppExp1)->pContig_->soGetName() >
            (*ppExp2)->pContig_->soGetName() )
      return( 1 );
   else {
      // same contig
      if ( (*ppExp1)->nUnpaddedLeft_ <
           (*ppExp2)->nUnpaddedLeft_ )
         return( -1 );
      else if ( (*ppExp1)->nUnpaddedLeft_ >
                (*ppExp2)->nUnpaddedLeft_ )
         return( 1 );
      else {
         // same contig and same location
         // Just to give a defined order, let's use expid:
         
         if ( (*ppExp1)->aExpID_[0] <
              (*ppExp2)->aExpID_[0] )
            return( -1 );
         else if (  (*ppExp1)->aExpID_[0] >
                    (*ppExp2)->aExpID_[0] )
            return( 1 );
         else
            return( 0 );
      }
   }
}

bool Assembly :: bAreChosenExperimentsSorted() {

   bool bOK = true;
   // the casting to int below is necesary--otherwise the - 1 will be
   // done with unsigned which will make 0 - 1 into four billion or so
   for( int nExp = 0; nExp < ( (int) (pAllChosenExperiments_->length()) - 1 );
        ++nExp ) {
      experiment* pExp1 = (*pAllChosenExperiments_)[ nExp ];
      experiment* pExp2 = (*pAllChosenExperiments_)[ nExp + 1 ];

      if ( cmpChosenExperiments( &pExp1, &pExp2 ) > 0 ) {
         cerr << "chosen experiments " << nExp << " and " << nExp + 1 <<
            " are out of order out of a total of " <<
            pAllChosenExperiments_->length() << " experiments" << endl;
         bOK = false;
      }
   }
   return( bOK );
}



void Assembly :: sortChosenExperiments() {

   void* pArray = (void*) pAllChosenExperiments_->data();
   size_t nNumberOfElementsInArray = pAllChosenExperiments_->entries();
   size_t nSizeOfAnElement = sizeof( experiment* );

   qsort( pArray, nNumberOfElementsInArray, nSizeOfAnElement,
          ( ( int(*) ( const void*, const void*) ) cmpChosenExperiments ));

   if ( ! bAreChosenExperimentsSorted() ) {
      PANIC_OST( ost ) << "sortChosenExperiments failed" << ends;
      throw ProgramLogicError( ost.str() );
   }
}





void Assembly :: createExpSummaryFiles() {

   sortChosenExperiments();

   fprintf( pAO, 
            "printing experiment summary files:\n    %s\n    %s\n    %s\n    %s\n",
           (const char*) pCP->filAutoFinishForwardsSummary_,
           (const char*) pCP->filAutoFinishReversesSummary_,
           (const char*) pCP->filAutoFinishCustomPrimersSummary_,
           (const char*) pCP->filAutoFinishSortedSummary_ );


   fprintf( pFORWARDS, "ace file: %s\nautofinish full output file: %s\n",
            (const char*) filGetAceFileFullPathname(),
            (const char*) pCP->filAutoFinishFullOutput_ );

   fprintf( pFORWARDS, ",,,(strand),(first base of read),(high quality left),(high quality right),(contig),(exp id),(template)\n" );

   fprintf( pREVERSES, "ace file: %s\nautofinish complete output file: %s\n",
            (const char*) filGetAceFileFullPathname(),
            (const char*) pCP->filAutoFinishFullOutput_ );

   fprintf( pREVERSES, ",,,(strand),(first base of read),(high quality left),(high quality right),(contig),(exp id),(template)\n" );

   fprintf( pCUSTOM_PRIMERS, "ace file: %s\nautofinish complete output file: %s\n",
            (const char*) filGetAceFileFullPathname(),
            (const char*) pCP->filAutoFinishFullOutput_ );

   fprintf( pCUSTOM_PRIMERS, "(primer bases),(melt temp),(primer id),(strand),(first base of read),(high quality left),(high quality right),(contig),(exp id),(template),(exp id),(template),...\n" );


   fprintf( pSORTED_SUMMARY, "(contig) (left) (right) (type),(strand),(first base of read),(exp id),(template)\n" );

   if ( pCP->bAutoFinishPrintCustomNavigationFileForChosenReads_ ) {
      fprintf( pCUSTOM_NAVIGATION, "TITLE: autofinish output file %s\n",
               pCP->filAutoFinishCustomNavigation_.data() );
   }


   if ( pCP->bAutoFinishPrintMinilibrariesSummaryFile_ ) {

      fprintf( pMINILIBRARIES, "ace file: %s\nautofinish complete output file: %s\n",
               (const char*) filGetAceFileFullPathname(),
               (const char*) pCP->filAutoFinishFullOutput_ );

      fprintf( pMINILIBRARIES, "(contig),(which end of that contig),(contig),(which end of that contig),(subclone1),(subclone2),...\n");

      printMinilibrariesSummaryFile();
   }


   FILE* pFile;

   for( int nExp = 0; nExp < pAllChosenExperiments_->entries(); ++nExp ) {
      experiment* pExp = (*pAllChosenExperiments_)[ nExp ];

      if ( pExp->nReadType_ == nWalk || pExp->nReadType_ == nPCREnd ) 
         pFile = pCUSTOM_PRIMERS;
      else if ( pExp->nReadType_ == nUniversalForward )
         pFile = pFORWARDS;
      else if ( pExp->nReadType_ == nUniversalReverse )
         pFile = pREVERSES;
      else
         assert( false );

      pExp->printToSummaryFile( pFile );

      pExp->printToSummaryFile2( pSORTED_SUMMARY );

      if ( pCP->bAutoFinishPrintCustomNavigationFileForChosenReads_ ) {
         pExp->printToCustomNavigationFile( pCUSTOM_NAVIGATION );
      }
   }

   fclose( pREVERSES );
   fclose( pFORWARDS );
   fclose( pCUSTOM_PRIMERS );
   fclose( pSORTED_SUMMARY );
   if ( pCP->bAutoFinishPrintCustomNavigationFileForChosenReads_ )
      fclose( pCUSTOM_NAVIGATION );
}



void Assembly :: dumpReadInfo() {

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );

         printf( "%s %s %s\n",
                 (char*) pLocFrag->soGetName().data(),
                 (char*) pLocFrag->soGetTemplateName().data(),
                 ( pLocFrag->bIsWalkingNotUniversalPrimerRead() ?
                   "walk" :
                   ( pLocFrag->bIsForwardNotReverseRead() ?
                     "univ fwd" : "univ reverse" ) ) );
      }
   }
}



void Assembly :: assignLibrariesToSubcloneTTemplates() {


   // assign subcloneTTemplate::soLibrary_

  int nTemplate;
   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {
      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];
      
      pSub->transferLibraryNameFromReadsToSubcloneTTemplate();
   }



   // guard against doing this over and over
   if ( !pCP->pDefaultLibrary_ ) {

      pCP->pDefaultLibrary_ = new library( );

      pCP->pDefaultLibrary_->soName_ = "default";

      
      pCP->pDefaultLibrary_->nMeanInsertSize_ = 0;
      pCP->pDefaultLibrary_->nStandardDeviationOfInsertSize_ = 0;

      pCP->pDefaultLibrary_->nDefaultInsertSize_ =
         pCP->nAutoFinishAverageInsertSize_;

      // nAverageInsertSizeToUse_ and nALittleLessThanAverageInsertSizeToUse_
      // will be set in finalProcessingOfInsertSize

      pCP->pDefaultLibrary_->nMaxInsertSize_ =
         pCP->nPrimersMaxInsertSizeOfASubclone_;

      pCP->pDefaultLibrary_->dCostToMakeMinilibrary_ = 
         pCP->dAutoFinishCostOfMinilibrary_;

      pCP->pDefaultLibrary_->bSingleNotDoubleStranded_ =
         pCP->bPrimersAssumeTemplatesAreDoubleStrandedUnlessSpecified_;
   }

   
   // make a temporary list of subclone templates sorted by 
   // library name


   // remember to delete this off the heap when done calculating 
   // insert sizes of the different libraries

   templatesSortedByLibrary aTemplatesSortedByLibrary;
   aTemplatesSortedByLibrary.soName_ = "templatesSortedByLibrary in assembly2.cpp";

   aTemplatesSortedByLibrary.resize( subcloneTTemplateArray_.length() );

   int n;
   for( n = 0; n < subcloneTTemplateArray_.length(); ++n )
      aTemplatesSortedByLibrary.insert( subcloneTTemplateArray_[ n ] );

   
   aTemplatesSortedByLibrary.resort2();  // sorts by library name

   RWTPtrOrderedVector<library> aLibrariesMissingFromFile;

   aLibrariesMissingFromFile.soName_ = "aLibrariesMissingFromFile in assembly2.cpp";
   

   RWCString soLastLibrary;
   library* pLastLibrary = NULL;

   for( nTemplate = 0; nTemplate < aTemplatesSortedByLibrary.length(); ++nTemplate ) {
      subcloneTTemplate* pSubTT = aTemplatesSortedByLibrary[ nTemplate ];


      if ( pSubTT->soLibrary_.isNull() ) {
         pSubTT->pLibrary_ = pCP->pDefaultLibrary_;
      }
      else {
         if ( pSubTT->soLibrary_ == soLastLibrary ) {
            pSubTT->pLibrary_ = pLastLibrary;
         }
         else {
            library* pLib = 
               pCP->pListOfLibraries_->pFindLibraryByName( pSubTT->soLibrary_ );


            if ( !pLib ) {
               pLastLibrary = new library();
               pLastLibrary->soName_ = pSubTT->soLibrary_;
               soLastLibrary = pSubTT->soLibrary_;


               // add defaults
               pLastLibrary->nMaxInsertSize_ = 
                  pCP->nPrimersMaxInsertSizeOfASubclone_;
               pLastLibrary->nDefaultInsertSize_ =
                  pCP->nAutoFinishAverageInsertSize_;
               pLastLibrary->bSingleNotDoubleStranded_ =
                  !pCP->bPrimersAssumeTemplatesAreDoubleStrandedUnlessSpecified_;

               pLastLibrary->dCostToMakeMinilibrary_ = 
                  pCP->dAutoFinishCostOfMinilibrary_;
                  
               aLibrariesMissingFromFile.insert( pLastLibrary );
            }
            else {
               pLastLibrary = pLib;
               soLastLibrary = pLib->soName_;
            }

            pSubTT->pLibrary_ = pLastLibrary;
         }
      }


   } //    for( int nTemplate = 0; nTemplate < pTemplates->length(); ...


   // check to make sure don't keep adding the default library
   if ( ! pCP->pListOfLibraries_->pFindLibraryByName( "default" ) ) {
      pCP->pListOfLibraries_->aLibraries_.insert( pCP->pDefaultLibrary_ );
   }
   
   for( n = 0; n < aLibrariesMissingFromFile.length(); ++n )
      pCP->pListOfLibraries_->aLibraries_.insert( aLibrariesMissingFromFile[ n ] );
   
   // is it important to keep this sorted?  I don't think we will
   // be doing lookups any more in this list since each subcloneTTemplate
   // now has a pointer directly to the library.

   pCP->pListOfLibraries_->sortLibrariesByName();
}
                 


   
void Assembly :: calculateLibraryInsertSizeFromForwardReversePairs() {

   // make code work in case this has been executed previously,
   // such as when someone "Show Library Info" more than once
   int nLib;
   for( nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];
      pLib->dSumOfSquaresOfInsertSizes_ = 0.0;
      pLib->nNumberOfForwardReversePairsForCalculation_ = 0;
      pLib->dSumOfInsertSizes_ = 0;

   }



   int nTemplate;
   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.length(); 
        ++nTemplate ) {

      subcloneTTemplate* pSubTT = subcloneTTemplateArray_[ nTemplate ];


      if ( !pSubTT->bCalculateInsertSizeFromForwardReversePair() ) continue;

      ++(pSubTT->pLibrary_->nNumberOfForwardReversePairsForCalculation_);

      pSubTT->pLibrary_->dSumOfInsertSizes_ +=
         pSubTT->nInsertSizeFromForwardReversePair_;

      // you can't square 50kb without getting 2,500,000,000 which is
      // a negative number in some int's
      double dInsertSize = (double) pSubTT->nInsertSizeFromForwardReversePair_;

      pSubTT->pLibrary_->dSumOfSquaresOfInsertSizes_ +=
         ( dInsertSize * dInsertSize );

   } //    for( int nTemplate = 0; nTemplate < pTemplates->length(); ...

   // now can go through libraries and calculate the mean and standard
   // deviation

   for( nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];

      pLib->finalProcessingOfInsertSize();
   }


   // now repeat this process, but exclude any template that is 3 standard
   // deviations from the mean

   for( nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];
      pLib->dSumOfSquaresOfInsertSizes_ = 0.0;
      pLib->nNumberOfForwardReversePairsForCalculation_ = 0;
      pLib->dSumOfInsertSizes_ = 0;
   }


   
   for( nTemplate = 0; nTemplate < subcloneTTemplateArray_.length(); 
        ++nTemplate ) {

      subcloneTTemplate* pSubTT = subcloneTTemplateArray_[ nTemplate ];


      if ( !pSubTT->bCalculateInsertSizeFromForwardReversePair() ) continue;

      library* pLib = pSubTT->pLibrary_;

      if ( !pLib->bAverageInsertSizeFromForwardReversePairs_ ) continue;

      // check for outliers (In practice, these do not have the
      // insert size calculated--there is some problem with the 
      // calculation.  Thus don't use these for calculating the mean and
      // standard deviation.)

      if ( ABS( pSubTT->nInsertSizeFromForwardReversePair_ - 
                pLib->nMeanInsertSize_ ) >
           3 * pLib->nStandardDeviationOfInsertSize_ ) continue; 

      ++(pSubTT->pLibrary_->nNumberOfForwardReversePairsForCalculation_);


      pSubTT->pLibrary_->dSumOfInsertSizes_ +=
         pSubTT->nInsertSizeFromForwardReversePair_;
      
      double dInsertSize = (double) pSubTT->nInsertSizeFromForwardReversePair_;

      pSubTT->pLibrary_->dSumOfSquaresOfInsertSizes_ +=
         ( dInsertSize * dInsertSize );

   } //    for( int nTemplate = 0; nTemplate < pTemplates->length(); ...
   

   // now can go through libraries and calculate the mean and standard
   // deviation

   for( nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];

      pLib->finalProcessingOfInsertSize();
      pLib->increasePrimersMaxInsertSizeOfASubcloneIfNecessary();
   }



}





void Assembly :: checkAutoFinishReads() {


   fprintf( pAO, "\n\n\nEVALUATING AUTOFINISH READS\n\n\n" );


   processSinglets();

   // for each autoFinishExp tag, see if we can find a read that has the 
   // corresponding expID

   bool bAutoFinishTagsFound = false;

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nTag = 0; nTag < pContig->nGetNumberOfTags(); ++nTag ) {
         tag* pTag = pContig->pGetTag( nTag );
   
         if ( pTag->soType_ != "autoFinishExp" ) continue;

         bAutoFinishTagsFound = true;

         autoFinishExpTag* pAutoFinishExpTag = (autoFinishExpTag*) pTag;

         evalExpCluster* pEval = new evalExpCluster( pAutoFinishExpTag );

         for( int nExp = 0; nExp < pAutoFinishExpTag->aExpID_.entries(); 
              ++nExp ) {
            int nExpID = pAutoFinishExpTag->aExpID_[ nExp ];
            RWCString soTemplate = pAutoFinishExpTag->aTemplates_[ nExp ];
         
            LocatedFragment* pLocFrag;
            if ( bFoundExpInAssembly( nExpID, pLocFrag ) ) {
               pEval->addFoundReadInAssembly( nExpID, pLocFrag );
            }
            else {
               // case in which there is no corresponding read for
               // this experiment

               singletInfo* pSingletFromAutoFinish;
               if ( bFoundExpInSinglets( nExpID, pSingletFromAutoFinish ) ) {
                  pEval->addFoundReadInSinglets( nExpID, pSingletFromAutoFinish );
               }
               else {
                  pEval->addNotFoundRead( nExpID );
               }
            }
         } //  for( int nExp = 0; nExp < pAutoF...

         // this will print out lots about this experiment cluster
         pEval->evaluate();
         
      } // for( int nTag = 0 ...
   } //    for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {


   if ( !bAutoFinishTagsFound ) {
      fprintf( pAO, "No autofinish tags found in ace file.  Did you forget \nto put run autofinish with -doExperiments?  This \noption is necessary to allow autofinish to evaluate \nhow well it did with the previous round and also to \nprevent it from picking a failed experiment again.\n" );
   }
}


bool Assembly :: bFoundExpInAssembly( const int nExpID, LocatedFragment*& pLocFrag ) {
   
   for( int n = 0; n < aExpidAndLocatedFragment_.entries(); ++n ) {
      if ( aExpidAndLocatedFragment_[ n ]->nExpID_ == nExpID ) {
         pLocFrag = aExpidAndLocatedFragment_[ n ]->pLocFrag_;
         return( true );
      }
   }

   return( false );
}


bool Assembly :: bFoundExpInSinglets( const int nExpID, 
                                      singletInfo*& pSingletFromAutoFinish ) {

   for( int n = 0; n < aSingletsFromAutoFinish_.entries(); ++n ) {
      if ( aSingletsFromAutoFinish_[ n ]->nExpID_ == nExpID ) {
         pSingletFromAutoFinish = aSingletsFromAutoFinish_[ n ];
         return( true );
      }
   }

   return( false );
}


void Assembly :: processSinglets() {


   pSingletsSortedByTemplateName_ = 
      new singletsSortedByTemplateName( aSingletPHDFilenames_.entries() );


   for( int nSinglet = 0; nSinglet < aSingletPHDFilenames_.entries(); 
        ++nSinglet ) {

      singletInfo* pSingletInfo = 
         new singletInfo();

      pSingletInfo->filPHD_ = (FileName) aSingletPHDFilenames_[ nSinglet ];
      pSingletInfo->soReadName_ = aSingletReadNames_[ nSinglet ];

      if ( pSingletInfo->filPHD_.index( ".phd." ) == RW_NPOS ) {
         fprintf( pAO, "warning--singlet %s has no phd file--autofinish can't check its suggestions\n",
                 (char*) pSingletInfo->filPHD_.data() );
         continue;

      }

      bool bFoundExpID;

      readPHDOfSinglet( bFoundExpID, pSingletInfo );

      pSingletsSortedByTemplateName_->insert( pSingletInfo );

      if ( bFoundExpID ) {
         aSingletsFromAutoFinish_.insert( pSingletInfo );
      }
   }

   pSingletsSortedByTemplateName_->resort2();

}


void Assembly :: setAllUnpaddedErrorProbabilitiesAndMore() {
   
   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      pContig->setUnpaddedErrorProbabilitiesAndMore();
   }
}


void Assembly :: setAllFindSingleSubcloneBases() {

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      pContig->autoFinishSetSingleSubcloneArrays( true ); // bObserveDoNotFinishTags
   }
}


void Assembly :: setAllContigHighQualitySegments() {
   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      
      pContig->setHighQualitySegment();
   }
}

   
void Assembly :: writeConsensusOfAllContigsToFastaFile() {

   
   FileName soAssemblyDirName( soGetAceFileName().soGetDirectory() );

   FileName soTryOutFileName = soAssemblyDirName + "allContigs.fasta";

   FileName soDirMask = soAssemblyDirName + "*.fasta";
   FileName soUserOutFileName = GuiApp::popupFileSelector( "Save all contigs to file",
                                                           soDirMask,
                                                           soTryOutFileName );

   // if user cancelled or do not supply name, cancel
   if ( soUserOutFileName.isNull() ) return;

   bool bGoAhead = true;
   if ( soUserOutFileName.bFileByThisNameExists() ) {
      bGoAhead =
         GuiApp::popupDecisionMessage( "File %s exists.  Save anyway?",
                                       (const char*) soUserOutFileName );
   }
   
   if ( bGoAhead ) {

      FILE* pfilBases = fopen( (const char*) soUserOutFileName, "w" );

      if ( pfilBases == NULL ) {
         ostrstream ost;
         ost << "Unable to open file " << soUserOutFileName << 
            " for writing bases " << endl << ends;

         SysRequestFailed srf( ost.str() );
         
         srf.includeErrnoDescription();
         throw srf;
      }


      for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
         Contig* pContig = pGetContig( nContig );

         
         pContig->writeConsensusToFastaFile3( pfilBases, 
                                              NULL, // pfilQualities
                                              false, // bWriteBothBasesAndQualFiles
                                              soUserOutFileName, // soBasesFileName
                                              "", // soQualFileName
                                              true, // bWriteWholeContig
                                              0,    // nStartUnpaddedConsPos
                                              0 ); // nEndUnpaddedConsPos
      }
      
      fclose( pfilBases );
   }
}




void Assembly :: createModelRead() {

   pModelReadNumberOfReadsAtEachLocation_ = new MBTValOrderedOffsetVector<int>( 
                                                       (size_t) 1500 );

   pModelReadNumberOfAlignedReadsAtEachLocation_ = new MBTValOrderedOffsetVector<int>(
										      (size_t) 1500 );



   pModelReadDistributions_ = new MBTValOrderedOffsetVector<float*>(
                                                       (size_t) 1500 );
   
   // some reads are complemented and some are not.  So go through the
   // list for top strand, and then go through the list for bottom strand 
   // reads

   // Only the aligned portion of reads should be used, because the
   // unaligned portion of reads should be considered completely
   // wrong--error probability of 1 (or should it be error probability  
   // of 3/4?)

   int nTotalReadsUsed = 0;


   for ( int nContig = 0; nContig < nNumContigs(); nContig++) {
      Contig*   pContig = pGetContig( nContig );

      int nNumFrags = pContig->nGetNumberOfFragsInContig();
      for (int nFrag = 0; nFrag < nNumFrags; nFrag++) {

         // get pointer to this located frag from contig
         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag);


         if ( pLocFrag->bIsAFakeRead() ) continue;
         if ( pLocFrag->bIsWholeReadUnaligned()  ) continue;

         // any other reads that should be eliminated?

         if ( pLocFrag->length() > 
              pModelReadDistributions_->length() ) {


            int nNumberOfElementsToAdd = pLocFrag->length() - 
               pModelReadDistributions_->length();

            for( int n = 0; n < nNumberOfElementsToAdd; ++n ) {
               
               float* pQualityArray = (float*) malloc( 
                    ( nMAX_QUALITY_LEVEL2 + 1) * sizeof( float ) );

               if ( !pQualityArray ) {
                  PANIC_OST( ost ) << 
                     "could not malloc memory for quality array in Assembly :: createModelRead()" << ends;
                  throw SysRequestFailed( ost.str() );
               }

               for( int nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; 
                    ++nQuality ) 
                  pQualityArray[ nQuality ] = 0.0;

               pModelReadDistributions_->append( pQualityArray );
               pModelReadNumberOfReadsAtEachLocation_->append( 0 );
               pModelReadNumberOfAlignedReadsAtEachLocation_->append( 0 );
            }
         }

         ++nTotalReadsUsed;


         // first evaluate the beginning of the read (in the direction
         // of sequencing) to include the quality of any x's
         // Stop when get to the aligned part of the read.


         int nConsPosStart;
         int nConsPosEnd;
         int nIncrement;
         
         if ( pLocFrag->bComp() ) {
            nIncrement = -1;
            nConsPosStart = pLocFrag->nGetAlignEnd();
            nConsPosEnd = pLocFrag->nGetAlignClipEnd() + 1;
         }
         else {
            nIncrement = 1;
            nConsPosStart = pLocFrag->nGetAlignStart();
            nConsPosEnd = pLocFrag->nGetAlignClipStart() - 1;
         }

         int nUnpaddedOrientedReadPos = 0; // will ++ below to 1 on 
         // first base

	 int nConsPos;
         for( nConsPos = nConsPosStart;
              ( pLocFrag->bComp() ? ( nConsPos >= nConsPosEnd ) :
                                    ( nConsPos <= nConsPosEnd ) );
              nConsPos += nIncrement ) {


            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() )
                  continue;

            ++nUnpaddedOrientedReadPos;

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) {
               // in this case, add the quality of the base
               int nQuality = 
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
               
               // Phil said (2/2000) to not count such bases
               if ( nQuality == 98 || nQuality == 99 ) 
                  continue;

               if ( nQuality > nMAX_QUALITY_LEVEL2 ) 
                  nQuality = nMAX_QUALITY_LEVEL2;
                
               float* pArrayOfQualityValues = 
                  (*pModelReadDistributions_)[ nUnpaddedOrientedReadPos ];
               
               ++ ( pArrayOfQualityValues[ nQuality ] );

               ++ (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
	       ++ (*pModelReadNumberOfAlignedReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
            } // if ( pLocFrag->ntGetFragFromConsPos( ... == 'x' ) ...
            else {
               float* pArrayOfQualityValues = 
                  (*pModelReadDistributions_)[ nUnpaddedOrientedReadPos ];
               // assume quality 0 if unaligned
               ++ ( pArrayOfQualityValues[ 0 ] );
               ++ (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
            }
         } // for( int nConsPos ...
         
         // now look through aligned portion of the read

         if ( pLocFrag->bComp() ) {
            nConsPosStart = pLocFrag->nGetAlignClipEnd();
            nConsPosEnd = pLocFrag->nGetAlignClipStart();
         }
         else {
            nConsPosStart = pLocFrag->nGetAlignClipStart();
            nConsPosEnd = pLocFrag->nGetAlignClipEnd();
         }

         for( nConsPos = nConsPosStart;
              ( pLocFrag->bComp() ? ( nConsPos >= nConsPosEnd ) :
                ( nConsPos <= nConsPosEnd ) );
              nConsPos += nIncrement ) {

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) 
                  continue;
            ++nUnpaddedOrientedReadPos;

            int nQuality =
                  pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();

            // Phil said (2/2000) to not count such bases
            if ( nQuality == 98 || nQuality == 99 ) continue;

            // fix for Touchman 5/2006
            if ( nQuality > nMAX_QUALITY_LEVEL2 ) 
               nQuality = nMAX_QUALITY_LEVEL2;


            float* pArrayOfQualityValues = 
               (*pModelReadDistributions_)[ nUnpaddedOrientedReadPos ];
               
            ++ ( pArrayOfQualityValues[ nQuality ] );

            ++ (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
	    ++ (*pModelReadNumberOfAlignedReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];

         }

         // now look for any vector bases in the remaining portion of the read

         if ( pLocFrag->bComp() ) {
            nConsPosStart = pLocFrag->nGetAlignClipStart() - 1;
            nConsPosEnd = pLocFrag->nGetAlignStart();
         }
         else {
            nConsPosStart = pLocFrag->nGetAlignClipEnd() + 1;
            nConsPosEnd = pLocFrag->nGetAlignEnd();
         }

         
         for( nConsPos = nConsPosStart;
              ( pLocFrag->bComp() ? ( nConsPos >= nConsPosEnd ) :
                ( nConsPos <= nConsPosEnd ) );
              nConsPos += nIncrement ) {

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() )
                 continue;
              
            ++nUnpaddedOrientedReadPos;

            if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
                   'x' ) {
               int nQuality =
                    pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality();
               
               if ( nQuality == 98 || nQuality == 99 ) 
                  continue;

               if ( nQuality > nMAX_QUALITY_LEVEL2 )
                  nQuality = nMAX_QUALITY_LEVEL2;

               float* pArrayOfQualityValues = 
                  (*pModelReadDistributions_)[ nUnpaddedOrientedReadPos ];
               
               ++ ( pArrayOfQualityValues[ nQuality ] );
               ++ (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
	       ++ (*pModelReadNumberOfAlignedReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];
            } // if ( pLocFrag->ntGetFragFromConsPos ... == 'x' )...
            else {
               // this is an unaligned base that is not x, so give it
               // a quality of 0
               
               float* pArrayOfQualityValues =
                  (*pModelReadDistributions_)[ nUnpaddedOrientedReadPos ];
               
               ++( pArrayOfQualityValues[0] );
               ++ (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedOrientedReadPos ];

            }
         }


         assert( pLocFrag->nGetUnpaddedSeqLength() == nUnpaddedOrientedReadPos );

      } // for( int nFrag = ...
   }


   nModelReadTotalReads_ = nTotalReadsUsed;
   
   // To avoid the model read being influenced by the few reads that 
   // are very long, cut off the model read when less than 10% of 
   // reads are that long.

   int nMinNumberOfReads = nModelReadTotalReads_ / 10;
   // bug fix in which cDNA sequencing would not clip model read even
   // when it had only 0 reads
   if ( nMinNumberOfReads < 1 )
      nMinNumberOfReads = 1;


   int nUnpaddedSeq;
   for( nUnpaddedSeq = pModelReadNumberOfReadsAtEachLocation_->nGetEndIndex(); 
        nUnpaddedSeq >= pModelReadNumberOfReadsAtEachLocation_->nGetStartIndex();
        --nUnpaddedSeq ) {
      
      if ( (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedSeq ] < 
           nMinNumberOfReads ) {

         float* pArrayOfQualityValues = 
            (*pModelReadDistributions_)[ nUnpaddedSeq ];
         free( pArrayOfQualityValues);

         pModelReadDistributions_->removeLast();
         pModelReadNumberOfReadsAtEachLocation_->removeLast();
         pModelReadNumberOfAlignedReadsAtEachLocation_->removeLast();
      }
      else
         break;
   }


   // If this lab has finishing reads that are shorter than the 
   // shotgun reads, trim the model read back to this point.

   if ( pModelReadNumberOfReadsAtEachLocation_->length() >
        pCP->nAutoFinishMaximumFinishingReadLength_ ) {

      for( nUnpaddedSeq = pModelReadNumberOfReadsAtEachLocation_->nGetEndIndex();
           nUnpaddedSeq > pCP->nAutoFinishMaximumFinishingReadLength_;
           --nUnpaddedSeq ) {
         
         float* pArrayOfQualityValues =
            (*pModelReadDistributions_)[ nUnpaddedSeq ];
         free( pArrayOfQualityValues );

         pModelReadDistributions_->removeLast();
         pModelReadNumberOfReadsAtEachLocation_->removeLast();
         pModelReadNumberOfAlignedReadsAtEachLocation_->removeLast();
      }
   }



   // convert pModelReadsDistributions_ from arrays with the absolute
   // number of reads at each quality 
   // to arrays with the cumulative fractional number (per cent) of 
   // reads at each quality 

   for( nUnpaddedSeq = pModelReadNumberOfReadsAtEachLocation_->nGetStartIndex();
        nUnpaddedSeq <= pModelReadNumberOfReadsAtEachLocation_->nGetEndIndex(); 
        ++nUnpaddedSeq ) {

      float* pArrayOfQualityValues = 
         (*pModelReadDistributions_)[ nUnpaddedSeq ];

      int nNumberOfReadsAtThisPosition = 
         (*pModelReadNumberOfReadsAtEachLocation_)[ nUnpaddedSeq ];

      // in this case, all the numbers of reads at each quality level
      // will also be zero, so no need to divide
      if ( nNumberOfReadsAtThisPosition == 0 ) {
         pArrayOfQualityValues[0] = 1.0;
         continue;
      }

      // if we want to consider shorter reads as having quality 0 at this
      // read position, add some quality zero counts

      if ( !pCP->bAutoFinishUseLongModelReadRatherThanShort_ ) {
         pArrayOfQualityValues[0] += 
            ( nModelReadTotalReads_ - nNumberOfReadsAtThisPosition );

      }




      // convert to cumulative distribution
      int nQuality;


      for( nQuality = 1; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) 
         pArrayOfQualityValues[ nQuality ] +=
            pArrayOfQualityValues[ nQuality - 1 ];

      // convert to fractional distribution

      float fDenominator = pCP->bAutoFinishUseLongModelReadRatherThanShort_ ?
         (float) nNumberOfReadsAtThisPosition :
         (float) nModelReadTotalReads_;


      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality )
         pArrayOfQualityValues[ nQuality ] /= 
            fDenominator;

      assert( pArrayOfQualityValues[ nMAX_QUALITY_LEVEL2 ] == 1.0 );
   }

   if ( pCP->bAutoFinishEmulate9_66Behavior_ )
      addRedundancyToModelReadFor9_66();
   
   // now calculate the model read


   makeModelRead( pModelRead_,
                  pArrayOfModelReadsWithSeveralTemplates_,
                  nModelReadHighQualitySegmentStart_,
                  pModelReadDistributions_ );

}



void Assembly :: makeModelRead( MBTValOrderedOffsetVector<unsigned char>*&
                                pModelRead,
                                mbtPtrVector<aModelReadWithSeveralTemplatesType>*& pArrayOfModelReadsWithSeveralTemplates,
                                int& nModelReadHighQualitySegmentStart,
                                MBTValOrderedOffsetVector<float*>* pModelReadDistributions ) {
                                
   // have an offset of 1 so that it is indexed by the number of templates,
   // starting at 1

   pArrayOfModelReadsWithSeveralTemplates = 
      new mbtPtrVector<aModelReadWithSeveralTemplatesType>( 
              (size_t) pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_,
              1 );  // starts with index 1

   int nTemplates;
   for( nTemplates = 1; 
        nTemplates <= pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_;
        ++nTemplates ) {

      MBTValOrderedOffsetVector<unsigned char>* pModelReadWithSeveralTemplates =
         new MBTValOrderedOffsetVector<unsigned char>( (size_t) pModelReadDistributions->length() );


      (*pArrayOfModelReadsWithSeveralTemplates)[ nTemplates ] = 
         pModelReadWithSeveralTemplates;

   }

   pModelRead = (*pArrayOfModelReadsWithSeveralTemplates)[ 1 ];



   // temporary array of storing the array, the square of the array,
   // and the cube of the array where the array is the cumulative
   // error probability distribution

   float* pArrayOfQualityValues = (float*) malloc( 
                    ( nMAX_QUALITY_LEVEL2 + 1) * sizeof( float ) );

   int nUnpaddedSeq;
   for( nUnpaddedSeq = pModelReadDistributions->nGetStartIndex();
        nUnpaddedSeq <= pModelReadDistributions->nGetEndIndex();
        ++nUnpaddedSeq ) {


      float* pArrayOfQualityValuesOriginal = 
         (*pModelReadDistributions)[ nUnpaddedSeq ];

      int nQuality;
      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality )
         pArrayOfQualityValues[ nQuality ] =  1.0;

      for( nTemplates = 1; 
           nTemplates <= pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_;
           ++nTemplates ) {

         for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality )
            pArrayOfQualityValues[ nQuality ] *= 
               pArrayOfQualityValuesOriginal[ nQuality ];

         for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
            if ( pArrayOfQualityValues[ nQuality ] >= 0.5 )
               break;
         }

         aModelReadWithSeveralTemplatesType* pModelReadWithSeveralTemplates =
            (*pArrayOfModelReadsWithSeveralTemplates)[ nTemplates ];

         pModelReadWithSeveralTemplates->append( (unsigned char) nQuality );
      }
   } // nUnpaddedSeq = pModelReadDistributions_->nGetStartIndex() ...


   // calculate nModelReadHighQualitySegmentStart_

   nModelReadHighQualitySegmentStart = 1; // default
   for( nUnpaddedSeq = pModelRead_->nGetStartIndex();
        nUnpaddedSeq <= pModelRead_->nGetEndIndex();
        ++nUnpaddedSeq ) {
      if ( (*pModelRead_)[ nUnpaddedSeq ] != 0 ) {
         nModelReadHighQualitySegmentStart = nUnpaddedSeq;
         break;
      }
   }

   // calculate nModelReadAlignedSegmentEnd_
   // which is used for fixing single subclone regions

   int nNumberOfAlignedReadsRequired = nModelReadTotalReads_ 
      * pCP->nAutoFinishConfidenceThatReadWillCoverSingleSubcloneRegion_
      / 100;

   nModelReadAlignedSegmentEnd_ = -1;
   for( nUnpaddedSeq = pModelReadNumberOfAlignedReadsAtEachLocation_->nGetEndIndex();
        nUnpaddedSeq >= pModelReadNumberOfAlignedReadsAtEachLocation_->nGetStartIndex();
        --nUnpaddedSeq ) {
      if ( (*pModelReadNumberOfAlignedReadsAtEachLocation_)[ nUnpaddedSeq ] >= nNumberOfAlignedReadsRequired ) {
         nModelReadAlignedSegmentEnd_ = nUnpaddedSeq;
         break;
      }
   }


   nModelReadAlignedSegmentStart_ = -1;
   for( nUnpaddedSeq = pModelReadNumberOfAlignedReadsAtEachLocation_->nGetStartIndex();
        nUnpaddedSeq <= pModelReadNumberOfAlignedReadsAtEachLocation_->nGetEndIndex();
        ++nUnpaddedSeq ) {

      if ( (*pModelReadNumberOfAlignedReadsAtEachLocation_)[ nUnpaddedSeq ] >=
           nNumberOfAlignedReadsRequired ) {
         nModelReadAlignedSegmentStart_ = nUnpaddedSeq;
         break;
      }
   }

   if ( nModelReadAlignedSegmentStart_ == -1 ||
        nModelReadAlignedSegmentEnd_ == -1 ) {
      nModelReadAlignedSegmentStart_ = pCP->nAutoFinishPotentialHighQualityPartOfReadStart_;
      nModelReadAlignedSegmentEnd_ = pCP->nAutoFinishPotentialHighQualityPartOfReadEnd_;
   }


   free( pArrayOfQualityValues );
}

   

   
   
void Assembly :: showAutofinishGoodModelRead() {
   
   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   createModelRead();   

   delete pPleaseWait;

   TextBox* pTB = new TextBox( "Autofinish Model Read",
                               20 ); // number of rows

   
   pTB->append( "\n\n" );
   pTB->appendWithArgs( "total reads used: %d\n",
                        nModelReadTotalReads_ );

   pTB->appendWithArgs( "high quality segment start = %d\n\n",
                        nModelReadHighQualitySegmentStart_ );

   pTB->appendWithArgs( "nModelReadAlignedSegmentStart_ = %d\n",
                        nModelReadAlignedSegmentStart_ );
   pTB->appendWithArgs( "nModelReadAlignedSegmentEnd_ = %d\n\n",
                        nModelReadAlignedSegmentEnd_ );
   

   pTB->append( "columns:\n" );
   pTB->append( "read position, unpadded, in direction of sequencing\n" );
   pTB->append( "median quality at that read position\n" );
   pTB->append( "number of reads used at that read position to determine quality\n" );
   pTB->append( "median maximum quality of 2 independent reads at that position\n" );
   pTB->append( "median maximum quality of 3 independent reads at that position\n" );
   pTB->append( "median maximum quality of 4 independent reads at that position\n" );
   
   pTB->append( "quality value and % of reads that have that quality value or less at that sequence position\n" );
   pTB->append( "\n" );

   pTB->append( "read quality error for errors\n" );
   pTB->append( "pos          prob  fixed\n\n\n" );
      

   double dSumErrorProbabilities = 0;
   
   int nSeqPos;
   for( nSeqPos = pModelRead_->nGetStartIndex();
        nSeqPos <= pModelRead_->nGetEndIndex();
        ++nSeqPos ) {

      int nQuality = (*pModelRead_)[ nSeqPos ];

      float* pDistribution =  (*pModelReadDistributions_)[ nSeqPos ];

      pTB->appendWithArgs( "%3d %2d %5d ", 
                           nSeqPos, 
                           nQuality,
                           (*pModelReadNumberOfReadsAtEachLocation_)[ nSeqPos ] );

      float dis2Reads[nMAX_QUALITY_LEVEL2 + 1];
      float dis3Reads[nMAX_QUALITY_LEVEL2 + 1];
      float dis4Reads[nMAX_QUALITY_LEVEL2 + 1];
      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {

         dis2Reads[ nQuality ] = 
            pDistribution[ nQuality ] * pDistribution[ nQuality ];
         dis3Reads[ nQuality ] = 
            dis2Reads[ nQuality ] * pDistribution[ nQuality ];
         dis4Reads[ nQuality ] = 
            dis3Reads[ nQuality ] * pDistribution[ nQuality ];
      }

      // find median of each distribution

      int nMedianQuality2Reads = -1;
      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
         if ( dis2Reads[ nQuality ] >= .5 ) {
            nMedianQuality2Reads = nQuality;
            break;
         }
      }

      int nMedianQuality3Reads = -1;
      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
         if ( dis3Reads[ nQuality ] >= .5 ) {
            nMedianQuality3Reads = nQuality;
            break;
         }
      }

      int nMedianQuality4Reads = -1;
      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
         if ( dis4Reads[ nQuality ] >= .5 ) {
            nMedianQuality4Reads = nQuality;
            break;
         }
      }

      pTB->appendWithArgs("%2d %2d %2d ", 
                          nMedianQuality2Reads,
                          nMedianQuality3Reads,
                          nMedianQuality4Reads );


      for( nQuality = 0; nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
         float fPercent = pDistribution[ nQuality ];
         pTB->appendWithArgs( "%d: %.3f ", nQuality, fPercent );
      }

      pTB->append( "\n" );
   }

   pTB->append("\n\n\n");
   pTB->append( "Aligned reads at each position:\n" );
   pTB->append( "\n" );
   pTB->append( "read pos  # of aligned reads   fraction of total reads\n" );
   for( nSeqPos = pModelReadNumberOfAlignedReadsAtEachLocation_->nGetStartIndex();
        nSeqPos <= pModelReadNumberOfAlignedReadsAtEachLocation_->nGetEndIndex();
        ++nSeqPos ) {
     pTB->appendWithArgs("%2d %5d %.5f\n",
			 nSeqPos,
			 (*pModelReadNumberOfAlignedReadsAtEachLocation_)[nSeqPos ],
			 (float) (*pModelReadNumberOfAlignedReadsAtEachLocation_)[nSeqPos ]/ 
			 (float) nModelReadTotalReads_ );

   }


   pTB->makeVisible();
}







static int compareTemplatesBySize( 
                             const subcloneTTemplate** ppSub1,
                             const subcloneTTemplate** ppSub2 ) {


   if ( (*ppSub1)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ &&
        !(*ppSub2)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ )
      return( -1 );
   else if ( !(*ppSub1)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ &&
             (*ppSub2)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ )
      return( 1 );
   else if ( !(*ppSub1)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ &&
             !(*ppSub2)->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ ) {
      // just to have a defined order, sort bad templates alphabetically
      return( compareTemplates( ppSub1, ppSub2 ) );
   }
   else {
      // my favorite case--when there are 2 templates that both have
      // forward/reverse pairs

      if ( (*ppSub1)->nInsertSizeFromForwardReversePair_ <
           (*ppSub2)->nInsertSizeFromForwardReversePair_ )
         return( -1 );
      else if (  (*ppSub1)->nInsertSizeFromForwardReversePair_ ==
                 (*ppSub2)->nInsertSizeFromForwardReversePair_ )
         return( 0 );
      else
         return( 1 );
   }
}
      

   





void Assembly :: showTemplateInsertLocations() {

   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   setContigTemplateArrays();


   RWTPtrOrderedVector<subcloneTTemplate> aTemplatesSortedBySize;
   aTemplatesSortedBySize.resize( subcloneTTemplateArray_.length() );

   int nSub;
   for( nSub = 0; nSub < subcloneTTemplateArray_.length(); ++nSub ) {
      aTemplatesSortedBySize.insert( subcloneTTemplateArray_[ nSub ] );
   }

   
   void* pArray = (void*) aTemplatesSortedBySize.data();
   size_t nNumberOfTemplates = aTemplatesSortedBySize.entries();
   size_t nSizeOfAnElement = sizeof( subcloneTTemplate* );
   
   qsort( pArray, nNumberOfTemplates, nSizeOfAnElement, 
          ( (int(*) ( const void*, const void*) ) compareTemplatesBySize ) );
   

   // check that the array is now sorted by template size

   
   for( nSub = 0; nSub < aTemplatesSortedBySize.entries() - 1; ++nSub ) {
      subcloneTTemplate* pSub1 = aTemplatesSortedBySize[ nSub ];
      subcloneTTemplate* pSub2 = aTemplatesSortedBySize[ nSub + 1 ];
      if ( pSub1->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ &&
           pSub2->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ ) {
         if ( ! ( pSub1->nInsertSizeFromForwardReversePair_ <=
                  pSub2->nInsertSizeFromForwardReversePair_ ) ) {
            cerr << "Elements " << nSub << " and " << nSub + 1 << 
               " out of order with insert sizes " << 
               pSub1->nInsertSizeFromForwardReversePair_ << 
               " and " << pSub2->nInsertSizeFromForwardReversePair_ << 
               endl;
         }
      }
   }



   TextBox* pTB = new TextBox( "Template Insert Locations",
                               20 ); // number of rows



   pTB->append( "Example:\n" );
   pTB->append( 
"0719                            762 ok\n" );
   pTB->append(
"   HAHCA1Q0719.scf      univ primer 38 \n" );
   pTB->append( 
"   HAHCA1T0719.scf      univ primer 33 \n" );
   pTB->append( "\n\nIn the above example, \n");
   pTB->append( "   0719 is the template name, \n" );
   pTB->append( "   762 is the insert size, \n" );
   pTB->append( "   ok indicates that this template is ok to be used by autofinish\n" );
   pTB->append( "   38 is the sequencing position of the start of the insert in universal primer read HAHCA1Q0719.scf, \n" );
   pTB->append( "   33 is the sequencing position of the start of the insert in universal primer read HAHCA1T0719.scf\n\n\n" );
   
   
   for( nSub = 0; nSub < aTemplatesSortedBySize.length(); ++nSub ) {
      
      subcloneTTemplate* pSub = aTemplatesSortedBySize[ nSub ];

      RWCString soProblems;

      if ( pSub->bOKToUseThisTemplate_ )
         soProblems += "ok ";

      if ( pSub->bBadTemplateFile_ )
         soProblems += "in bad template file ";

      if ( pSub->bBadLibraryFile_ )
         soProblems += "in bad library file ";
      
      if ( pSub->bShortInsert_ )
         soProblems += "short insert ";
      
      if ( pSub->bReadsAreInconsistent_ )
         soProblems += "reads inconsistent ";

      if ( pSub->bReadPairTooFarApart_ )
         soProblems += "forward/reverse pair too far apart ";

      if ( pSub->bUnalignedHighQualityRegionTooLong_ )
         soProblems += "unaligned high quality region too long ";

      if ( pSub->bHasSeriousHighQualityDiscrepancies_ )
         soProblems += "has serious high quality discrepancies ";
      
      if ( pSub->bIsChimeric_ )
         soProblems += "is chimeric ";

      if ( pSub->bInconsistentGapSpanning_ )
         soProblems += "incorrectly indicates the way that 2 contigs are oriented";

      
      int nInsertSize = -1;
      if ( pSub->bOKToUseThisTemplateForCalculatingInsertSizeFromForwardReversePairs_ )
         nInsertSize = pSub->nInsertSizeFromForwardReversePair_;
      else if ( pSub->bOKToUseThisTemplate_ ) {
         // this essentially just picks the "a little less than average" 
         // size for the library
         nInsertSize = pSub->nUnpaddedEnd_ -
            pSub->nUnpaddedStart_ + 1;
      }


      pTB->appendWithArgs( "%-30s  %d %s", 
                           (char*) pSub->soTemplateName_,   
                           nInsertSize,
                           (char*) soProblems );
      if ( pSub->soLibrary_.isNull() )
         pTB->append( "\n" );
      else
         pTB->appendWithArgs( " library: %s\n",
                              pSub->soLibrary_.data() );

      for( int nRead = 0; nRead < pSub->aReads_.length(); ++nRead ) {
         LocatedFragment* pLocFrag = pSub->aReads_[ nRead ];

         if ( pLocFrag->nReadType_ == nWalk ) {

            pTB->appendWithArgs( "   %-20s walk ",
                                 (char*) pLocFrag->soGetName() );

            if ( pLocFrag->bIsWholeReadUnaligned() )
               pTB->append( "totally unaligned " );
            else {
               int nLastBaseBeforeVector;
               bool bFoundXsAfterAligned;

               pLocFrag->findVectorInsertJunctionInWalkingRead( 
                                bFoundXsAfterAligned,
                                nLastBaseBeforeVector );
               

               if ( bFoundXsAfterAligned )
                  pTB->appendWithArgs( " found vector after last aligned base %d\n",
                                       nLastBaseBeforeVector );
               else
                  pTB->appendWithArgs(" did not find vector after last aligned base %d\n",
                              nLastBaseBeforeVector );
            }
         }
         else {
            int nFirstVectorInsertJunction;
            int nSecondVectorInsertJunction;
            bool bFoundFirstVectorInsertJunction;
            bool bFoundSecondVectorInsertJunction;

            pLocFrag->findVectorInsertJunctionsInUniversalPrimerRead(
                          nFirstVectorInsertJunction,
                          nSecondVectorInsertJunction,
                          bFoundSecondVectorInsertJunction );

            int nSeqPos = pLocFrag->nOrientedUnpaddedFragPosFromConsPos( 
                          nFirstVectorInsertJunction );

            
            pTB->appendWithArgs( "   %-20s univ primer %d ",
                                 (char*) pLocFrag->soGetName(),
                                 nSeqPos
                                 );

            if ( pLocFrag->bIsWholeReadUnaligned() )
               pTB->append( "totally unaligned " );
                                 
                                 
            if ( bFoundSecondVectorInsertJunction ) {
               int nSeqPos2 = pLocFrag->nOrientedUnpaddedFragPosFromConsPos(
                                   nSecondVectorInsertJunction );

               pTB->appendWithArgs( "  found second v/i junction at %d\n",
                                    nSeqPos2 );
            }
            else 
               pTB->append( "\n" );
         }
      } // for( int nRead ...
   } // for( int nSub ...

   delete pPleaseWait;

   pTB->makeVisible();
}



void Assembly :: showAutoFinishErrorsFixedFormula() {

   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   createModelRead();   

   TextBox* pTB = new TextBox( "Autofinish Errors Fixed Formula" );
   
   pTB->append( "\n\n" );
   pTB->append( "quality  new cons qual ----errors fixed----      ---error probability\n" );
   pTB->append( "con read uni red       unique   redundant        consensus   read    \n" );
   pTB->append( "\n" );

   for( int nConsensusQuality = 0; nConsensusQuality <= 90;
        nConsensusQuality += 5 ) {

      for( int nReadQuality = 0; nReadQuality <= 50; nReadQuality += 10 ) {
         
         double dConsensusErrorProbability = dErrorRateFromQuality[ nConsensusQuality ];
         double dReadErrorProbability = dErrorRateFromQuality[ nReadQuality ];

         double dErrorsFixedRedundantReadType = 
            dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                   dReadErrorProbability,
                                   TOP_STRAND_TERMINATOR_READS,
                                   TOP_STRAND_TERMINATOR_READS );

         double dErrorsFixedUniqueReadType = 
            dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                   dReadErrorProbability,
                                   TOP_STRAND_TERMINATOR_READS,
                                   BOTTOM_STRAND_TERMINATOR_READS );


         double dNewConsensusErrorProbabilityRedundant =
            dConsensusErrorProbability - dErrorsFixedRedundantReadType;
         
         double dNewConsensusErrorProbabilityUnique =
            dConsensusErrorProbability - dErrorsFixedUniqueReadType;

         double dQualityNewConsensusRedundant =
            -10.0 * log10( dNewConsensusErrorProbabilityRedundant );

         double dQualityNewConsensusUnique =
            -10.0 * log10( dNewConsensusErrorProbabilityUnique );

         pTB->appendWithArgs( "%2d  %2d  %4.1f %4.1f ",
                              nConsensusQuality,
                              nReadQuality,
                              dQualityNewConsensusUnique,
                              dQualityNewConsensusRedundant );

         // now for the fun--add another read
         // first, find the location in the model read where the
         // desired read quality value is attained as a median:
         
         bool bFoundSeqPosition = false;
         int nSeqPos;
         for( nSeqPos = pModelRead_->nGetStartIndex();
              nSeqPos <= pModelRead_->nGetEndIndex();
              ++nSeqPos ) {
            if ( (*pModelRead_)[ nSeqPos ] >= nReadQuality ) {
               bFoundSeqPosition = true;
               break;
            }
         }

         // there might not be any such point, though

         if ( bFoundSeqPosition ) { 

            float* pDistribution = (*pModelReadDistributions_)[ nSeqPos ];
   
            int nMedianQualityOf2Reads;
            for( nMedianQualityOf2Reads = 0; 
                 nMedianQualityOf2Reads <= nMAX_QUALITY_LEVEL2; 
                 ++nMedianQualityOf2Reads ) {
   
               if ( pDistribution[ nMedianQualityOf2Reads ] *
                    pDistribution[ nMedianQualityOf2Reads ] > 0.5 )
                  break;
            }
   
            dErrorsFixedRedundantReadType = 
               dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                      dErrorRateFromQuality[ nMedianQualityOf2Reads ],
                                      TOP_STRAND_TERMINATOR_READS,
                                      TOP_STRAND_TERMINATOR_READS );
            
   
            dErrorsFixedUniqueReadType = 
               dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                      dErrorRateFromQuality[ nMedianQualityOf2Reads ],
                                      TOP_STRAND_TERMINATOR_READS,
                                      BOTTOM_STRAND_TERMINATOR_READS );
            
               
            dNewConsensusErrorProbabilityRedundant =
               dConsensusErrorProbability - dErrorsFixedRedundantReadType;
            
            dNewConsensusErrorProbabilityUnique =
               dConsensusErrorProbability - dErrorsFixedUniqueReadType;
   
            dQualityNewConsensusRedundant =
               -10.0 * log10( dNewConsensusErrorProbabilityRedundant );
   
            dQualityNewConsensusUnique =
               -10.0 * log10( dNewConsensusErrorProbabilityUnique );
   
   
            pTB->appendWithArgs( "%4d 2 reads: %2d %4.1f %4.1f ",
                                 nSeqPos,
                                 nMedianQualityOf2Reads,
                                 dQualityNewConsensusUnique,
                                 dQualityNewConsensusRedundant );
   
            int nMedianQualityOf3Reads;
            for( nMedianQualityOf3Reads = 0;
                 nMedianQualityOf3Reads <= nMAX_QUALITY_LEVEL2;
                 ++nMedianQualityOf3Reads ) {
   
               if ( pDistribution[ nMedianQualityOf3Reads ] *
                    pDistribution[ nMedianQualityOf3Reads ] *
                    pDistribution[ nMedianQualityOf3Reads ]  > 0.5 )
                  break;
            }
   
            dErrorsFixedRedundantReadType =
               dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                      dErrorRateFromQuality[ nMedianQualityOf3Reads ],
                                      TOP_STRAND_TERMINATOR_READS,
                                      TOP_STRAND_TERMINATOR_READS );
   
            dErrorsFixedUniqueReadType =
               dGetErrorsFixedAtBase( dConsensusErrorProbability,
                                      dErrorRateFromQuality[ nMedianQualityOf3Reads ],
                                      TOP_STRAND_TERMINATOR_READS,
                                      BOTTOM_STRAND_TERMINATOR_READS );
   
   
            dNewConsensusErrorProbabilityRedundant =
               dConsensusErrorProbability - dErrorsFixedRedundantReadType;
            
            dNewConsensusErrorProbabilityUnique =
               dConsensusErrorProbability - dErrorsFixedUniqueReadType;
   
            dQualityNewConsensusRedundant =
               -10.0 * log10( dNewConsensusErrorProbabilityRedundant );
   
            dQualityNewConsensusUnique =
               -10.0 * log10( dNewConsensusErrorProbabilityUnique );
   
   
            pTB->appendWithArgs( "3 reads: %2d %4.1f %4.1f\n",
                                 nMedianQualityOf3Reads,
                                 dQualityNewConsensusUnique,
                                 dQualityNewConsensusRedundant );
         } // if ( bFoundSeqPosition )
         else
            pTB->append(" no model read position this good\n" );

      }
   }

   delete pPleaseWait;

   pTB->makeVisible();
}



                               
void Assembly :: showLibraryInfo() {


   PleaseWait* pPleaseWait = 
      new PleaseWait( GuiApp::pGetGuiApp()->widGetTopLevel() );

   setContigTemplateArrays();

   countReadsInEachLibrary();

   const int nNumberOfRowsVisible = 35;
   TextBox* pTextBox = new TextBox( "Library Info",
                                    nNumberOfRowsVisible );

   pTextBox->append( "columns:\n" );
   pTextBox->append( "# : # of fwd/rev pairs used in calculation\n" );
   pTextBox->append( "#2 : # of reads in library\n" );
   pTextBox->append( "   (note: whole clone reads, singlets, and fake reads\n    are not included in this number)\n" );
   pTextBox->append( "mean : calculated mean insert size of library\n" );
   pTextBox->append( "std : standard deviation of insert size of library\n" );
   pTextBox->append( "def : default insert size--only used if not enough fwd/rev pairs\n" );
   pTextBox->append( "avg : insert size that will be used\n" );
   pTextBox->append( "less : a little less than average insert size to be used\n" );
   pTextBox->append( "max : maximum insert size to use (usually from statistical analysis of distribution of fwd/rev pairs\n" );
   pTextBox->append( "maxf : maximum insert size from librariesInfo.txt\n" );
   pTextBox->append( "st : user-specified single-stranded or double-stranded\n" );
   
   pTextBox->append( "f/r : using forward/reverse pairs (not defaults)\n" );
   pTextBox->append( "ok : not in bad libraries file\n" );
   
   pTextBox->append( "\n\n" );

   pTextBox->append( "name                 #   #2    mean   std  def   avg less   max maxf st f/r ok?\n" );

   
   for( int nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];
      
      pTextBox->appendWithArgs( 
            "%-15s %6d %6d %5d %5d %4d %5d %4d %5d %5d %-2s %-3s %-3s\n",
            pLib->soName_.data(),
            pLib->nNumberOfForwardReversePairsForCalculation_,
            pLib->nNumberOfReadsFromThisLibrary_,
            pLib->nMeanInsertSize_,
            pLib->nStandardDeviationOfInsertSize_,
            pLib->nDefaultInsertSize_,
            pLib->nAverageInsertSizeToUse_,
            pLib->nALittleLessThanAverageInsertSizeToUse_,
            pLib->nMaxInsertSize_,
            pLib->nMaxInsertSizeFromFile_,
            ( pLib->bSingleNotDoubleStranded_ ? "SS" : "DS" ),
            ( pLib->bAverageInsertSizeFromForwardReversePairs_ ? "Yes" : "No" ),
            ( bIsThisLibraryOKNotBad( pLib->soName_ ) ? "ok" : "bad" )
 );
   }

   pTextBox->makeVisible();

   delete pPleaseWait;

}
   


void Assembly :: countReadsInEachLibrary() {

   for( int nLib = 0; nLib < pCP->pListOfLibraries_->aLibraries_.length();
        ++nLib ) {
      library* pLib = pCP->pListOfLibraries_->aLibraries_[ nLib ];
      
      pLib->nNumberOfReadsFromThisLibrary_ = 0;
   }


   for( int nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries();
        ++nTemplate ) {
      subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ];

      pSub->pLibrary_->nNumberOfReadsFromThisLibrary_ 
         += pSub->aReads_.length();
   }
         
}



bool Assembly :: bIsThisUniversalPrimerReadInSinglets( 
                             const int nReadType,
                             const RWCString& soTemplateName ) {

   int nIndex = 
      pSingletsSortedByTemplateName_->nFindFirstOccurrenceOfMatch( soTemplateName );

   if ( nIndex == RW_NPOS ) 
      return( false );

   bool bLookAgain = true;
   for( ; nIndex < pSingletsSortedByTemplateName_->entries() && bLookAgain; 
        ++nIndex ) {
      singletInfo* pSinglet = (*pSingletsSortedByTemplateName_)[ nIndex ];
      if ( pSinglet->soTemplate_ != soTemplateName )
         bLookAgain = false;
      else {
         if ( pSinglet->nReadType_ == nReadType )
            return( true );
      }
   }

   return( false );
}
   
   


void Assembly :: addRedundancyToModelReadFor9_66() {


   int nAmountToShiftQualityDistribution =
      (int) pCP->dAutoFinishRedundancy_ - 1;

   for( int nUnpaddedSeq = pModelReadNumberOfReadsAtEachLocation_->nGetStartIndex();
        nUnpaddedSeq <= pModelReadNumberOfReadsAtEachLocation_->nGetEndIndex();
        ++nUnpaddedSeq ) {
      
      float* pArrayOfQualityValues =
         (*pModelReadDistributions_)[ nUnpaddedSeq ];

      // shift left

      int nQuality;
      for( nQuality = 0; 
           nQuality <= nMAX_QUALITY_LEVEL2 - nAmountToShiftQualityDistribution;
           ++nQuality ) {

         pArrayOfQualityValues[ nQuality ] =
            pArrayOfQualityValues[ nQuality + nAmountToShiftQualityDistribution ];

      }

      for( nQuality = nMAX_QUALITY_LEVEL2 - nAmountToShiftQualityDistribution + 1;
           nQuality <= nMAX_QUALITY_LEVEL2; ++nQuality ) {
          pArrayOfQualityValues[ nQuality ] = 1.0;
      }
   }
}




void Assembly :: getReadLengthStatistics( double& dMean,
                                          double& dStandardDeviation ) {

   if ( !bAlreadyCalculatedReadLengthStatistics_ )
      findReadLengthStatistics();

   dMean = dMeanReadLength_;
   dStandardDeviation = dStandardDeviationOfReadLength_;
}
   
      


void Assembly :: findReadLengthStatistics() {

   // calculate mean and standard deviation of all reads that 
   // have a high quality segment.  Then throw out the outliers and do
   // it again.

   double dTotalEndOfHighQualityRegions = 0.0;
   double dTotalSquaredEndOfHighQualityRegions = 0.0;
   int nTotalReads = 0;

   int nContig;
   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig();
           ++nRead ) {

         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );

         if ( pLocFrag->bWholeReadIsLowQuality_ ) continue;

         int nEndOfHighQualityReadPos;
         if ( pLocFrag->bComp() ) {
            
            //  <-----------------------------
            // lllllHHHHHHHHHHHHHHHHHHlllllllll
            //      ^
            //      nGetHighQualityStart()
            //                                ^
            //                           nGetAlignEnd()

            nEndOfHighQualityReadPos = 
               pContig->nUnpaddedIndex( pLocFrag->nGetAlignEnd() ) -
               pContig->nUnpaddedIndex( pLocFrag->nGetHighQualityStart() ) +
               1;

         }
         else {
            // -------------------------------->
            // llllllHHHHHHHHHHHHHHHllllllllllll
            //                     ^
            //                  nGetHighQualityEnd()
            // ^
            // nGetAlignStart()


            nEndOfHighQualityReadPos =
               pContig->nUnpaddedIndex( pLocFrag->nGetHighQualityEnd() )
               -
               pContig->nUnpaddedIndex( pLocFrag->nGetAlignStart() )
               + 1;
         }

         dTotalEndOfHighQualityRegions += nEndOfHighQualityReadPos;
         dTotalSquaredEndOfHighQualityRegions +=
            ( nEndOfHighQualityReadPos * nEndOfHighQualityReadPos );
         ++nTotalReads;
      }
   }


   if ( nTotalReads == 0 ) {
      // this caused floating point exception if there were 0 
      // reads with high quality segment.  This info is not used
      // in such a case (finding stops) addStopsCausingLCQs so 
      // it really doesn't matter what the value is.
      // (Helped Kerrie Bubb, 1/14/03)

      dMeanReadLength_ = 0.0;
      dStandardDeviationOfReadLength_ = 0.0;
      bAlreadyCalculatedReadLengthStatistics_ = true;
      return;
   }

   
   dMeanReadLength_ = dTotalEndOfHighQualityRegions / (double) nTotalReads;
   dStandardDeviationOfReadLength_ = 
      sqrt( dTotalSquaredEndOfHighQualityRegions / (double) nTotalReads
            - dMeanReadLength_*dMeanReadLength_ );


   // now do this same process again, throwing out outliers

   int nTooBig = dMeanReadLength_ + 3.0*dStandardDeviationOfReadLength_;
   
   dTotalEndOfHighQualityRegions = 0.0;
   dTotalSquaredEndOfHighQualityRegions = 0.0;
   nTotalReads = 0;

   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig();
           ++nRead ) {

         LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead );

         if ( pLocFrag->bWholeReadIsLowQuality_ ) continue;

         int nEndOfHighQualityReadPos;
         if ( pLocFrag->bComp() ) {
            
            //  <-----------------------------
            // lllllHHHHHHHHHHHHHHHHHHlllllllll
            //      ^
            //      nGetHighQualityStart()
            //                                ^
            //                           nGetAlignEnd()

            nEndOfHighQualityReadPos = 
               pContig->nUnpaddedIndex( pLocFrag->nGetAlignEnd() ) -
               pContig->nUnpaddedIndex( pLocFrag->nGetHighQualityStart() ) +
               1;

         }
         else {
            // -------------------------------->
            // llllllHHHHHHHHHHHHHHHllllllllllll
            //                     ^
            //                  nGetHighQualityEnd()
            // ^
            // nGetAlignStart()


            nEndOfHighQualityReadPos =
               pContig->nUnpaddedIndex( pLocFrag->nGetHighQualityEnd() )
               -
               pContig->nUnpaddedIndex( pLocFrag->nGetAlignStart() )
               + 1;
         }

         if ( nEndOfHighQualityReadPos >= nTooBig ) continue;

         dTotalEndOfHighQualityRegions += nEndOfHighQualityReadPos;
         dTotalSquaredEndOfHighQualityRegions +=
            ( nEndOfHighQualityReadPos * nEndOfHighQualityReadPos );
         ++nTotalReads;
      }
   }


   if ( nTotalReads == 0 ) {
      // this caused floating point exception if there were 0 
      // reads with high quality segment.  This info is not used
      // in such a case (finding stops) addStopsCausingLCQs so 
      // it really doesn't matter what the value is.
      // (Helped Kerrie Bubb, 1/14/03)

      dMeanReadLength_ = 0.0;
      dStandardDeviationOfReadLength_ = 0.0;
      bAlreadyCalculatedReadLengthStatistics_ = true;
      return;
   }

   
   dMeanReadLength_ = dTotalEndOfHighQualityRegions / (double) nTotalReads;
   dStandardDeviationOfReadLength_ = 
      sqrt( dTotalSquaredEndOfHighQualityRegions / (double) nTotalReads
            - dMeanReadLength_*dMeanReadLength_ );

   bAlreadyCalculatedReadLengthStatistics_ = true;
}




void Assembly :: showMicrosatellites() {

   setAllPaddedPositionsArrays();


   gotoList* pGotoList = new gotoList();
   
   // first look through the entire assembly for runs

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
   
      const int nExtraXs = 4;
      int nConsensusLength;
      char* szUnpaddedConsensus = 
         pContig->szGetUnpaddedConsensus( nConsensusLength, nExtraXs );

      // pad on the right with more x's

      strcat( szUnpaddedConsensus + nConsensusLength, "xxxx" );

      int nStringLength = nConsensusLength + nExtraXs;
      

      int nBestScore;
      do {
         int nEndingRowOfBestScoredAlignment;
         int nEndingColOfBestScoredAlignment;

         findRepeat( szUnpaddedConsensus, nStringLength, &nBestScore,  
                     &nEndingRowOfBestScoredAlignment,
                     &nEndingColOfBestScoredAlignment );

         if ( nBestScore >= pCP->nAutoFinishMinSmithWatermanScoreOfARun_ ) {
            // report this run
            

            RWCString soAlignment1;
            RWCString soAlignment2;
            RWCString soAlignment3;
            int nBeginningRowOfBestScoredAlignment;
            int nBeginningColOfBestScoredAlignment;


            findAlignment( szUnpaddedConsensus, 
                           nStringLength,
                           nBestScore,
                           nEndingRowOfBestScoredAlignment,
                           nEndingColOfBestScoredAlignment,
                           &nBeginningRowOfBestScoredAlignment,
                           &nBeginningColOfBestScoredAlignment,
                           soAlignment1,
                           soAlignment2,
                           soAlignment3 );


            // the unpadded position is 1-based while the
            // location is zero-based
            int nUnpaddedLeft = nBeginningRowOfBestScoredAlignment + 1;
            int nUnpaddedRight = nEndingRowOfBestScoredAlignment + 1;


            int nUnpaddedLeft2 = nBeginningColOfBestScoredAlignment + 1;
            int nUnpaddedRight2 = nEndingColOfBestScoredAlignment + 1;

            // print the alignment to the xterm


            printf( "\n\n%s  score = %d\n",
                    pContig->soGetName().data(),
                    nBestScore ); 

            int nUnpaddedTop = nUnpaddedLeft;
            int nUnpaddedBottom = nUnpaddedLeft2;
            for( int nPadded = 0; nPadded <= soAlignment1.length();
                 nPadded += 50 ) {
               
               int nMaxPadded = MIN( soAlignment1.length() - 1,
                                     nPadded + 49 );

               printf( "%-9d   ", nUnpaddedTop );
	       int nPadded2;
               for( nPadded2 = nPadded; nPadded2 <= nMaxPadded; ++nPadded2 ) {
                  printf( "%c", soAlignment1[ nPadded2 ] );
                  if ( soAlignment1[ nPadded2 ] != '*' ) ++nUnpaddedTop;
               }
               printf( "   %-9d\n", nUnpaddedTop - 1 );

               printf( "            " );
               for( nPadded2 = nPadded; nPadded2 <= nMaxPadded; ++nPadded2 )
                  printf( "%c", soAlignment2[ nPadded2 ] );
               printf( "\n" );

               printf( "%-9d   ", nUnpaddedBottom );
               for( nPadded2 = nPadded; nPadded2 <= nMaxPadded; ++nPadded2 ) {
                  printf( "%c", soAlignment3[ nPadded2 ] );
                  if ( soAlignment3[ nPadded2 ] != '*' ) ++nUnpaddedBottom;
               }
               printf( "   %-9d\n", nUnpaddedBottom - 1 );
               printf( "\n\n" );
            }

            int nPaddedLeft = pContig->nPaddedIndexFast( nUnpaddedLeft );
            int nPaddedRight = pContig->nPaddedIndexFast( nUnpaddedRight );

            RWCString soNote( (long) nBestScore );

            gotoItem* pGotoItem = new gotoItem( pContig,
                                                NULL,
                                                nPaddedLeft,
                                                nPaddedRight,
                                                nUnpaddedLeft,
                                                nUnpaddedRight,
                                                soNote );
          
            pGotoList->addToList( pGotoItem );

            // set up for trying to find another run.  
            
            // this masks out most of the alignment so that, hopefully,
            // the next one will be found somewhere else

            for( int nZeroBased = nUnpaddedLeft - 1;
                 nZeroBased <= ( nUnpaddedRight - 1 ); ++nZeroBased ) {
               szUnpaddedConsensus[ nZeroBased ] = 'x';
            }

         }

      } while( nBestScore >= pCP->nAutoFinishMinSmithWatermanScoreOfARun_ );

      delete szUnpaddedConsensus;
   } // for( int nContig ...


   pGotoList->sortByPosition();

   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
      new guiMultiContigNavigator( "Microsatellites",
                                   "",
                                   "",
                                   75, // nWidthInChars
                                   "",
                                   NULL,
                                   NULL,
                                   NULL,
                                   pGotoList,
                                   NULL )
      );

}



void Assembly :: showMononucleotideRichRegions() {


   gotoList* pGotoList = new gotoList();

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      bool bFoundRegion;
      pContig->addMononucleotideRichRegionsToList(
                        pGotoList,
                        pContig->nGetUnpaddedStartIndex(),
                        pContig->nGetUnpaddedEndIndex(),
                        true, // bNeedToSetPaddedPositionsArrays
                        bFoundRegion );

   }

   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
       new guiMultiContigNavigator( "Mononucleotide-Rich Regions",
                                "",
                                "",
                                75, // nWidthInChars
                                "",
                                NULL,
                                NULL,
                                NULL,
                                pGotoList,
                                NULL ) 
       );
}
      




void Assembly :: showDinucleotideRichRegions() {

   gotoList* pGotoList = new gotoList();

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      bool bFoundRegion;
      pContig->addDinucleotideRichRegionsToList(
                       pGotoList,
                       pContig->nGetUnpaddedStartIndex(),
                       pContig->nGetUnpaddedEndIndex(),
                       true, // bNeedToSetPaddedPositionsArrays
                       bFoundRegion );
   }

   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
      new guiMultiContigNavigator( "Dinucleotide-Rich Regions",
                                   "",
                                   "",
                                   75, // nWidthInChars
                                   "",
                                   NULL,
                                   NULL,
                                   NULL,
                                   pGotoList,
                                   NULL ) 
      );
}


      







void Assembly :: showRunsCausingLCQs() {

   gotoList* pRunsGotoList = new gotoList();

   for( int nContig = 0;  nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      pContig->addRunsAndMicrosatellitesCausingLCQs( pRunsGotoList,
                                                     false );
   }


   pRunsGotoList->sortByPosition();


   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
       new guiMultiContigNavigator( 
                                "Runs and Microsatellites Causing Low Consensus Quality Regions",
                                "",
                                "",
                                75, // nWidthInChars
                                "",
                                NULL,
                                NULL,
                                NULL,
                                pRunsGotoList,
                                NULL )
       );

}
   




void Assembly :: showRunsAndStopsCausingLCQs() {

   gotoList* pRunsAndStopsGotoList = new gotoList();

   for( int nContig = 0;  nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      pContig->addStopsCausingLCQs( pRunsAndStopsGotoList, true );

      pContig->addRunsAndMicrosatellitesCausingLCQs( pRunsAndStopsGotoList,
                                                     false );
   }


   pRunsAndStopsGotoList->sortByPosition();


   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
       new guiMultiContigNavigator( 
                                "Runs and Stops Causing Low Consensus Quality Regions",
                                "",
                                "",
                                75, // nWidthInChars
                                "",
                                NULL,
                                NULL,
                                NULL,
                                pRunsAndStopsGotoList,
                                NULL )
       );

}


void Assembly :: showStopsCausingLCQs() {

   gotoList* pStopsGotoList = new gotoList();

   for( int nContig = 0;  nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      pContig->addStopsCausingLCQs( pStopsGotoList, true );
   }

   pStopsGotoList->sortByPosition();

   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
       new guiMultiContigNavigator( 
                                "Stops Causing Low Consensus Quality Regions",
                                "",
                                "",
                                75, // nWidthInChars
                                "",
                                NULL,
                                NULL,
                                NULL,
                                pStopsGotoList,
                                NULL )
       );

}


void Assembly :: showEditableLowConsensusQualityRegions() {
   cerr << "in showEditableLowConsensusQualityRegions" << endl;

   gotoList* pEditableSitesList = new gotoList();
   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );
      pContig->addEditableSites( pEditableSitesList );
   }

   ConsEd::pGetConsEd()->addGuiMultiContigNavigator( 
       new guiMultiContigNavigator( 
                                "Editable Low Consensus Quality Regions",
                                "",
                                "",
                                75, // nWidthInChars
                                "",
                                NULL,
                                NULL,
                                NULL,
                                pEditableSitesList,
                                NULL )
       );

}
   
   


void Assembly :: printMinilibrariesSummaryFile() {

   // if there is only one contig
   if ( !pContigEndPairArray_ ) return;


   for( int nPair = 0; nPair < pContigEndPairArray_->length(); ++nPair ) {
      contigEndPair* pPair = (*pContigEndPairArray_)[ nPair ];

      // there might not be any minilibrary for this contigEndPair
      if ( !pPair->pChoiceOfTemplatesForMinilibraries_ ) continue;

      fprintf( pMINILIBRARIES, "%s,%s,%s,%s",
              pPair->pContig_[0]->soGetName().data(),
              ( pPair->nWhichEnd_[0] == nLeftGap ? "left" : "right" ),
              pPair->pContig_[1]->soGetName().data(),
              ( pPair->nWhichEnd_[1] == nLeftGap ? "left" : "right" )
              );

      for( int nMini = 0; 
           ( nMini < pCP->nAutoFinishSuggestThisManyMinilibrariesPerGap_ ) &&
           ( nMini < pPair->pChoiceOfTemplatesForMinilibraries_->length() );
           ++nMini ) {
         
         templateForMinilibrary* pMini = 
            (*(pPair->pChoiceOfTemplatesForMinilibraries_))[ nMini ];
         
         fprintf( pMINILIBRARIES, ",%s",
                  pMini->pSub_->soTemplateName_.data() );
      } //   for( int nMini = 0; ...


      fprintf( pMINILIBRARIES, "\n" );

   } //  for( int nPair = 0; ...
}



void Assembly :: figureOutRealContigs() { 

   if ( aRealContigs_.length() == 0 )
      aRealContigs_.resize( (size_t) ( nNumContigs()/ 10 ) );

   aRealContigs_.clear();

   for( int nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );


      if ( pContig->bIAmARealContig() ) {
         aRealContigs_.insert( pContig );
      }
   }
}

static int nCompareContigEndPairs( const contigEndPair** ppCEP1,
                              const contigEndPair** ppCEP2 ) {

   if ( !(*ppCEP1)->bUserDefined_ && !(*ppCEP2)->bUserDefined_ ) {
      // most common case--both contigEndPair's are based on forward
      // reverse pairs
      if ( (*ppCEP1)->nNumberOfForwardReversePairs_ <
           (*ppCEP2)->nNumberOfForwardReversePairs_ )
         return( 1 );
      else if (  
               (*ppCEP1)->nNumberOfForwardReversePairs_ >
               (*ppCEP2)->nNumberOfForwardReversePairs_ )
         return( -1 );
      else 
         return( 0 );
   }
   else if ( (*ppCEP1)->bUserDefined_ && !(*ppCEP2)->bUserDefined_ )
      return( -1 );
   else if ( !(*ppCEP1)->bUserDefined_ && (*ppCEP2)->bUserDefined_ )
      return( 1 );
   else {
      // both are user defined.  In this case, I really don't care,
      // but let's sort by contig name to define an order.

      if ( (*ppCEP1)->pContig_[0]->soGetName() <
           (*ppCEP2)->pContig_[0]->soGetName() )
         return( -1 );
      else if ( (*ppCEP1)->pContig_[0]->soGetName() ==
                (*ppCEP2)->pContig_[0]->soGetName() )
         return( 0 );
      else
         return( 1 );
   }
}





// currently (July 2002) I see only 3 uses for this:
// assemblyView
// restriction digest
// showing the scaffold
// I do not see this used by autoFinish.
void Assembly :: figureOutContigOrderAndOrientation() {

   FILE* pSaveAO = pAO;
   if ( pAO == stderr && 
        !pCP->filDumpContigOrderAndOrientationInfoToThisFile_.isNull() ) {
      pAO = fopen( pCP->filDumpContigOrderAndOrientationInfoToThisFile_.data(),
                   "w" );

      if ( !pAO ) {
         fprintf( stderr, "failed to open %s: %s\n",
                  pCP->filDumpContigOrderAndOrientationInfoToThisFile_.data(),
                  soGetErrno().data() );

      }
   }

   setContigTemplateArrays();


   figureOutRealContigs();

   
   if ( pContigEndPairArray_ ) {
      pContigEndPairArray_->clearAndDestroy();
   }
   else {
      pContigEndPairArray_ = new RWTPtrOrderedVector<contigEndPair>(
          (size_t) ( aRealContigs_.length() * 2 ),
          "Assembly::figureOutContigOrderAndOrientation ) pContigEndPairArray_" );
   }


   // if we are clearAndDestroy'ing pContigEndPairArray_, we darn
   // well better not continue to point to those contigEndPair's.
   // (added Jan, 2002 to make assemblyView able to come up more
   // than once)
   int nContig;
   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      pContig->pContig_[ nLeftGap ] = NULL;
      pContig->pContig_[ nRightGap ] = NULL;

      pContig->nWhichEnd_[ nLeftGap ] = 0;
      pContig->nWhichEnd_[ nRightGap ] = 0;

      pContig->pCEP_[ nLeftGap ] = NULL;
      pContig->pCEP_[ nRightGap ] = NULL;
   }




   getUserDefinedContigEndPairs( *pContigEndPairArray_ );


   RWTPtrSortedVector<Contig> aContigsConsideredSoFar( 
                                       (size_t) nNumContigs() );


   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      for( int nGap = 0; nGap <= 1; ++nGap ) {
         int nWhichGap = ( nGap == 0 ) ? nLeftGap : nRightGap;

         pContig->findNeighborForOneGap2( 
                                 nWhichGap,
                                 aContigsConsideredSoFar,
                                 pContigEndPairArray_ );

      } //    for( int nGap = 0; 

      aContigsConsideredSoFar.insert( pContig );

   } //  for( nContig = 0; nContig


   // sort the pairs based on the number of forward/reverse pairs
   // supporting the ordering


   void* pArray = (void*) pContigEndPairArray_->data();
   size_t nNumberOfElementsInArray = pContigEndPairArray_->length();
   size_t nSizeOfAnElement = sizeof( contigEndPair* );

   qsort( pArray, nNumberOfElementsInArray, nSizeOfAnElement,
       ( ( int(*) ( const void*, const void*) ) nCompareContigEndPairs )  );


   for( int nContigEndPair = 0; nContigEndPair < 
           (int) pContigEndPairArray_->length() - 1;
        ++nContigEndPair ) {

      contigEndPair* pPair1 = (*pContigEndPairArray_)[ nContigEndPair ];
      contigEndPair* pPair2 = (*pContigEndPairArray_)[ nContigEndPair + 1 ];

      if ( !pPair1->bUserDefined_ &&
           !pPair2->bUserDefined_ &&
           pPair1->nNumberOfForwardReversePairs_ <
           pPair2->nNumberOfForwardReversePairs_ ) {

         RWCString soMessage = 
            RWCString( "qsort of pContigEndPairArray_ in assembly2.cpp ")  +
            RWCString( (long) __LINE__ ) + " did not work with positions " +
            RWCString( (long) nContigEndPair ) + 
            " and " + 
            RWCString( (long) ( nContigEndPair + 1 ) ) + 
            " out of a total of " + 
            RWCString( (long) pContigEndPairArray_->length() )  + " pairs";
         throw ProgramLogicError( soMessage );
      }
   }


   // work through the contigEndPairs from front to back, constructing 
   // chains by linking them together

   RWTPtrOrderedVector<contigEndPair> aContigEndPairsCouldNotBePlacedInFirstPass( 
           (size_t) pContigEndPairArray_->length() );

   int nTryToPlaceThisPair;
   for( nTryToPlaceThisPair = 0; 
        nTryToPlaceThisPair < pContigEndPairArray_->length(); 
        ++nTryToPlaceThisPair ) {

      contigEndPair* pTryToPlacePair = 
         (*pContigEndPairArray_)[ nTryToPlaceThisPair ];

      // see if this pair can be joined with any of the chains
      // already constructed
      
      Contig* pContig0 = pTryToPlacePair->pContig_[0];
      Contig* pContig1 = pTryToPlacePair->pContig_[1];

      int nWhichEnd0 = pTryToPlacePair->nWhichEnd_[0];
      int nWhichEnd1 = pTryToPlacePair->nWhichEnd_[1];
         

      //      int nIndexInContig0 = ( nWhichEnd0 == nLeftGap ) ? 0 : 1;
      //      int nIndexInContig1 = ( nWhichEnd1 == nLeftGap ) ? 0 : 1;

      // check for consistency with the contig end pairs already
      // recorded.  

      if ( pContig0->pContig_[ nWhichEnd0 ] &&
           pContig1->pContig_[ nWhichEnd1 ] ) {
         if ( ( pContig0->pContig_[ nWhichEnd0 ] == pContig1 ) &&
              ( pContig1->pContig_[ nWhichEnd1 ] == pContig0 ) &&
              ( pContig0->nWhichEnd_[ nWhichEnd0 ] == nWhichEnd1 ) &&
              ( pContig1->nWhichEnd_[ nWhichEnd1 ] == nWhichEnd0 ) ) {
            // we've already placed this pair, so ignore it.

            // This could happen if
            // user defined contig end pairs could have 2 copies
            // of the same pair...DG March 2004)
            continue;
         }
      }


      if ( pContig0->pContig_[ nWhichEnd0 ] ||
           pContig1->pContig_[ nWhichEnd1 ] ) {

         // inconsistency, maybe.  One (or both) of these ends 
         // is already joined to some other contig end.  But perhaps
         // the two scaffolds can be merged by interlacing them.

         aContigEndPairsCouldNotBePlacedInFirstPass.insert( pTryToPlacePair );
      }
      else {
         // neither end has yet been placed

         pContig0->pContig_[ nWhichEnd0 ] =
            pContig1;
         pContig0->nWhichEnd_[ nWhichEnd0 ] =
            nWhichEnd1;
         pContig0->pCEP_[ nWhichEnd0 ] = pTryToPlacePair;
         


         pContig1->pContig_[ nWhichEnd1 ] =
            pContig0;
         pContig1->nWhichEnd_[ nWhichEnd1 ] =
            nWhichEnd0;
         pContig1->pCEP_[ nWhichEnd1 ] = pTryToPlacePair;
      }
   } //    for( int nTryToPlaceThisPair = 0; 


   // Now see if some of the contigEndPairs that couldn't be placed
   // in the first pass are 
   // actually consistent--they just have to squeeze between
   // the consistent (and bigger) contigs.  Or perhaps they are consistent 
   // in this manner:
   // ____ ____ (existing linked contigs )
   // A    B
   // ____       ______  ("inconsistent linked contigs)
   // A             C
   // In such a case, I need to add C.
   

   for( nTryToPlaceThisPair = 0;
        nTryToPlaceThisPair < 
           aContigEndPairsCouldNotBePlacedInFirstPass.length();
        ++nTryToPlaceThisPair ) {

      contigEndPair* pInconsistentContigEndPair =
         aContigEndPairsCouldNotBePlacedInFirstPass[ nTryToPlaceThisPair ];

      if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {
         fprintf( pAO, "trying to use inconsistent link %s\n",
              pInconsistentContigEndPair->soGetDescription().data() );
      }


      tryToMergeContigEndPairWithExistingScaffold( pInconsistentContigEndPair );
   }

   // at this point, how can we find the chains?  No, better to just
   // look through the *contigs*, not the contig-end-pairs, and find
   // chains that way.  Any contig that has one or the other link
   // missing is the end of a chain.  So then follow the links to get
   // all contigs in the chain.

   // One other issue: shall we invert the chains?  We can deal with
   // that when we get to the ends.  We probably should keep a list of
   // the chains.  I think we are not inverting the scaffolds. (June, 2001)

   aHeadsOfScaffoldsOfContigs_.clear();
   aScaffoldIsCircularNotLinear_.clear();

   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      pGetContig( nContig )->bPlacedInAScaffold_ = false;
   }


   for( nContig = 0; nContig < nNumContigs(); ++nContig ) {
      Contig* pContig = pGetContig( nContig );

      // ignore little contigs that are not connected to anything

      if ( !pContig->bIAmARealContig() &&
	  !pContig->pContig_[ nLeftGap ] &&
	  !pContig->pContig_[ nRightGap ] )
         continue;


      // we are only interested in heads of scaffolds.  Heads will not
      // have pointers to previous and next contigs.
      if ( !pContig->bPlacedInAScaffold_ && ! (pContig->pContig_[nLeftGap] &&
                                            pContig->pContig_[nRightGap] ) ) {

         pContig->bPlacedInAScaffold_ = true;
         aHeadsOfScaffoldsOfContigs_.insert( pContig );
         aScaffoldIsCircularNotLinear_.insert( false );

         // we've found a contig that is in a scaffold, but
         // we haven't yet put it in the list of scaffolds
         
         pContig->pPreviousContig_ = NULL;
         // we are looking for the end that is null
         int nPreviousContigWasConnectedAtWhichEndOfThisContig =
            ( pContig->pContig_[nLeftGap] ? nRightGap : nLeftGap );


         pContig->pNextContig_ = 
            pContig->pGetNextContigInScaffold( 
                        nPreviousContigWasConnectedAtWhichEndOfThisContig,
                        pContig->bThisContigIsComplementedInTheScaffold_,
                        nPreviousContigWasConnectedAtWhichEndOfThisContig );
                               
         // this contig might be in a chain all by itself.  If not,
         // mark all other contigs in the chain.


         while( pContig->pNextContig_ ) {
            Contig* pPreviousContig = pContig;
            pContig = pContig->pNextContig_;
            pContig->pPreviousContig_ = pPreviousContig;
            pContig->bPlacedInAScaffold_ = true;
         
            pContig->pNextContig_ = 
               pContig->pGetNextContigInScaffold( 
                    nPreviousContigWasConnectedAtWhichEndOfThisContig,
                    pContig->bThisContigIsComplementedInTheScaffold_,
                    nPreviousContigWasConnectedAtWhichEndOfThisContig );
         
            
         }
         
         // when reached here, we have found the end of the chain.

      } //       if ( !pContig->bPlacedInAScaffold_ ) {
   } //  nContig = 0; nContig < aRealContigs_.length(); ++nContig ) {


   // there may be some circular scaffolds.  The contigs in such scaffolds
   // will not yet be put into any scaffolds.

   for( nContig = 0; nContig < aRealContigs_.length(); ++nContig ) {

      Contig* pContig = aRealContigs_[ nContig ];


      if ( !pContig->bPlacedInAScaffold_ ) {
         // this must be in a circular scaffold
         assert( pContig->pContig_[ nLeftGap ] && 
                 pContig->pContig_[ nRightGap ] );
         
         // start a scaffold with this contig

         pContig->bPlacedInAScaffold_ = true;
         aHeadsOfScaffoldsOfContigs_.insert( pContig );
         aScaffoldIsCircularNotLinear_.insert( true );

         // follow the way around this scaffold

         Contig* pOriginalContig = pContig;
         pContig->pPreviousContig_ = NULL;
         pContig->pNextContig_ = pContig->pContig_[ nRightGap ];
         pContig->bThisContigIsComplementedInTheScaffold_ = false;
         
         int nPreviousContigWasConnectedAtWhichEndOfThisContig = 
            pContig->nWhichEnd_[ nRightGap ];

         while( pContig->pNextContig_ != pOriginalContig ) {
            Contig* pPreviousContig = pContig;
            pContig = pContig->pNextContig_;

            pContig->pPreviousContig_ = pPreviousContig;
            pContig->bPlacedInAScaffold_ = true;
            
            pContig->pNextContig_ = 
               pContig->pGetNextContigInScaffold( 
                    nPreviousContigWasConnectedAtWhichEndOfThisContig,
                    pContig->bThisContigIsComplementedInTheScaffold_,
                    nPreviousContigWasConnectedAtWhichEndOfThisContig );


         }

         // pContig->pNextContig_ is pointing to the head of the scaffold.
         // Although this is correct, it will cause problems with the digest
         // code which wants a null at the end to indicate the end.

         pContig->pNextContig_ = NULL;
      } //  if ( !pContig->bPlacedInAScaffold_ ) {
   } //  for( nContig = 0; nContig < aRealContigs_.length(); ++nContig ) {


   // at this point, all real contigs should be placed in scaffolds


   
   RWCString soErrorMessage;
   for( nContig = 0; nContig < aRealContigs_.length(); ++nContig ) {

      Contig* pContig = aRealContigs_[ nContig ];

      if ( !pContig->bPlacedInAScaffold_ ) {
         soErrorMessage += pContig->soGetName();
         soErrorMessage += " is not in a scaffold ";
      }
   }

   if ( ! soErrorMessage.isNull() ) {
      THROW_ERROR2( soErrorMessage );
   }


   // Some of the chains will not contain any "real" contigs (contigs
   // that meet all the criteria to be shown).  I wish I could have
   // eliminated those chains earlier, but that is difficult because
   // any little contig should be included if it has fwd/rev pair data
   // connecting it to a real contig, so it can only be eliminated
   // after creating the chain.  The heads of chains may be little
   // contigs, so I can't run the filter on the heads of contigs.
   // Thus chains will be eliminated at this stage.

   // Similarly, some scaffolds will not include any user-requested contigs.
   // Eliminate such scaffolds (except for the case in which the user is
   // not requesting any contigs--then this filter should not apply).


   // reverse any chains that must be because most bases are in complemented
   // contigs

   RWTPtrOrderedVector<Contig> aUserDesiredContigs;
   if ( !pCP->soAssemblyViewContigsToShow_.isNull() ) {
      RWCTokenizer tokContigs( pCP->soAssemblyViewContigsToShow_ );
      
      RWCString soOneContig;
      while( !( soOneContig = tokContigs(',')).isNull() ) {
         Contig* pContig =
            ConsEd::pGetAssembly()->pGetContigByVariousNames( soOneContig );
         if ( pContig )
            aUserDesiredContigs.insert( pContig );
      }
   }




   int nChain;
   for( nChain = aHeadsOfScaffoldsOfContigs_.length() - 1; nChain >= 0;
        --nChain ) {

      Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nChain ];

      int nComplementedBases = 0;
      int nUncomplementedBases = 0;
      bool bFoundARealContig = false;
      bool bFoundAUserRequestedContigInThisScaffold = false;
      while( pContig ) {

         if ( pContig->bIAmARealContig() )
            bFoundARealContig = true;

         if ( aUserDesiredContigs.index( pContig ) != RW_NPOS ) {
            bFoundAUserRequestedContigInThisScaffold = true;
         }

         if ( pContig->bThisContigIsComplementedInTheScaffold_ ) 
            nComplementedBases += pContig->nGetUnpaddedSeqLength();
         else
            nUncomplementedBases += pContig->nGetUnpaddedSeqLength();
         
         pContig = pContig->pNextContig_;
      }

      if ( !bFoundARealContig || (aUserDesiredContigs.length() != 0 &&
                     !bFoundAUserRequestedContigInThisScaffold ) ) {
         // case in which the chain only has little contigs
         // So delete the chain.

         aHeadsOfScaffoldsOfContigs_.removeAt( nChain );
         continue; // next scaffold
      }


      if ( nComplementedBases > nUncomplementedBases ) {
         // reverse the entire chain

         Contig* pLastContig = NULL;

         pContig = aHeadsOfScaffoldsOfContigs_[ nChain ];
         while( 1 ) {
            Contig* pOldNextContig = pContig->pNextContig_;
            pContig->pNextContig_ = pContig->pPreviousContig_;
            pContig->pPreviousContig_ = pOldNextContig;
            pContig->bThisContigIsComplementedInTheScaffold_ = 
               !pContig->bThisContigIsComplementedInTheScaffold_;

            if ( !pOldNextContig ) {
               pLastContig = pContig;
               break;
            }

            pContig = pOldNextContig;
         }

         aHeadsOfScaffoldsOfContigs_[ nChain ] = pLastContig;
      }
   } // for( nChain = 0; ...

   

   // print out the chains

   for( nChain = 0; nChain < aHeadsOfScaffoldsOfContigs_.length(); 
        ++nChain ) {
      Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nChain ];
      
      // first contig in chain:

      if ( pContig->bThisContigIsComplementedInTheScaffold_ ) {
         if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  "right end of %s is the end of the clone\n",
                    pContig->soGetName().data() );
            }
         }
         else {
            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  "unknown contig on right of %s\n",
                    pContig->soGetName().data() );
            }
         }
      }
      else {
         if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  "left end of %s is the end of the clone\n", 
                    pContig->soGetName().data() );
            }
         }
         else {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  "unknown contig on left of %s\n",
                    pContig->soGetName().data() );
            }
         }
      }


      // set up for beginning of connection to next contig, if there is one
      if ( pContig->pNextContig_ ) {
         if ( pContig->bThisContigIsComplementedInTheScaffold_ ) {
            
            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {
               fprintf( pAO,  "left end of %s ",
                 pContig->soGetName().data() );
            }
         }
         else {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  "right end of %s ",
                        pContig->soGetName().data() );
            }
         }
      }


      while( pContig->pNextContig_ ) {
         pContig = pContig->pNextContig_;

         if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

            fprintf( pAO,  "connected to %s end of %s\n", 
                 ( pContig->bThisContigIsComplementedInTheScaffold_ ?
                   "right" : "left" ),
                    pContig->soGetName().data() );
         }

      }

      // reached end of the chain

      if ( pContig->bThisContigIsComplementedInTheScaffold_ ) {
         if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) {


            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {
               fprintf( pAO,  " and left end of %s is the clone end\n",
                    pContig->soGetName().data() );
            }
         }
         else {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  " and left end of %s is connected to unknown contig\n",
                    pContig->soGetName().data() );
            }
         }
      }
      else {
         if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) {

            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  " and right end of %s is the clone end\n",
                    pContig->soGetName().data() );
            }
         }
         else {
            if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) {

               fprintf( pAO,  " and right end of %s is connected to unknown contig\n",
                    pContig->soGetName().data() );
            }
         }
      }         


   } //    for( int nChain = 0; nChain < aChainsOfContigs.length(); ...


   if ( pAO != pSaveAO ) {
      fclose( pAO );
      pAO = pSaveAO;
   }



} // void Assembly :: figureOutContigOrderAndOrientation() {





void Assembly :: insertContigEndPairIfItIsNotAlreadyThere( 
                                          contigEndPair*& pPairToInsert ) {

   for( int nContigEndPair = 0; nContigEndPair < pContigEndPairArray_->length();
        ++nContigEndPair ) {

      contigEndPair* pPair = (*pContigEndPairArray_)[ nContigEndPair ];
      if ( ( pPair->pContig_[nLeftGap] == pPairToInsert->pContig_[nLeftGap] ) &&
           ( pPair->pContig_[nRightGap] == pPairToInsert->pContig_[nRightGap] ) &&
           ( pPair->nWhichEnd_[nLeftGap] == pPairToInsert->nWhichEnd_[nLeftGap] ) &&
           ( pPair->nWhichEnd_[nRightGap] == pPairToInsert->nWhichEnd_[nRightGap] ) ) {
         // found the same pair of contig ends.

         pPairToInsert = pPair;
         return;
      }
   }

   
   // if reached here, did not find the same contig/end pair

   pContigEndPairArray_->insert( pPairToInsert );
}