/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "addAlignedSequence.h"
#include    "soGetErrno.h"
#include    "assert.h"
#include    "soLine.h"
#include    <cstdio>
using namespace std;
#include    "rwctokenizer.h"
#include    "alignmentPiece.h"
#include    "bIsNumericMaybeWithWhitespace.h"
#include    "consed.h"
#include    "sequence.h"
#include    "szGetTime.h"
#include    "soGetDateTime.h"
#include    "consedParameters.h"
#include    "terminateIfNoPhdDir.h"
#include    "maybeTerminateIfAnotherReadWriteConsed.h"
#include    "listOfLibraries.h"
#include    "fileDefines.h"
#include    "automatedConsedInit.h"


void addAlignedSequence :: doIt() {


   automatedConsedInit( filAceFileToOpen_ );

   readAlignedSequenceFastaFile();

   saveAssembly();

   cerr << "Wrote new ace file: " << filNewAceFile_ << endl;

}


void addAlignedSequence :: readAlignedSequenceFastaFile() {


   FILE* pAlignmentFastaFile = fopen( filAlignmentFastaFile_.data(), "r" );
   if ( !pAlignmentFastaFile ) {
      RWCString soErrorMessage = "couldn't open ";
      soErrorMessage += filAlignmentFastaFile_;
      soErrorMessage += " error: ";
      soErrorMessage += soGetErrno();
      THROW_ERROR( soErrorMessage );
   }
   

   bool bFirstTime = true;
   while( 1 ) {

      bool bEndOfFile;

      alignmentPiece* pAlignmentPiece;

      getChimpHeaderLine( pAlignmentFastaFile,
                          pAlignmentPiece,
                          bEndOfFile
                          );

      if ( bEndOfFile ) {
         if ( bFirstTime ) {
            RWCString soErrorMessage = "premature end of ";
            soErrorMessage += filAlignmentFastaFile_;
            soErrorMessage += " while looking for 1st line";
            THROW_ERROR( soErrorMessage );
         }
         else {
            break;
         }
      }

      bFirstTime = false;


      getPaddedChimpSequence( pAlignmentFastaFile,
                              pAlignmentPiece );

      getHumanHeaderLine( pAlignmentFastaFile,
                          pAlignmentPiece );

      getPaddedHumanSequence( pAlignmentFastaFile,
                              pAlignmentPiece );

      findExistingReadWithWRItem();

      // process this alignment piece

      makeReadOutOfThisAlignmentPiece( pAlignmentPiece );

      delete pAlignmentPiece;
   }

   fclose( pAlignmentFastaFile );

   ConsEd::pGetAssembly()->setChanged();

}


void addAlignedSequence :: getChimpHeaderLine( 
      FILE* pAlignmentFastaFile,
      alignmentPiece*& pAlignmentPiece,
      bool& bEndOfFile ) {

   bEndOfFile = false;

   if ( fgets( soLine.data(), nMaxLineSize, pAlignmentFastaFile ) 
     == NULL ) {
      bEndOfFile = true;
      return;
   }
   
   soLine.nCurrentLength_ = strlen( soLine.data() );
   
   pAlignmentPiece = new alignmentPiece();
   pAlignmentPiece->soOrganismA_ = "chimp";
   pAlignmentPiece->soOrganismB_ = "human";

   // header line is for chimp and looks like this:
   // >chr13 chimp_unpadded_chr_positions: start=23459 end=24573 strand=+ padded_seq_length=459
   
   RWCTokenizer tok( soLine );
   RWCString soChimpChromosome = tok();
   RWCString soKeyword = tok();
   RWCString soStart = tok();
   RWCString soEnd = tok();
   RWCString soChimpStrand = tok();
   RWCString soPaddedSeqLength = tok();
   
#define CHIMP_HEADER_LINE filAlignmentFastaFile_ + " chimp header line should have been of the form: >chr13 chimp_unpadded_chr_positions: start=23459 end=24573 strand=+ padded_seq_length=459\n"
   
   
   if ( soPaddedSeqLength.isNull() ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += "but did not have enough tokens:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   // check each parameter
   
   if ( !soChimpChromosome.bStartsWith( ">chr" ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += "but didn't start with >chr Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   // get rid of leading ">"
   pAlignmentPiece->soChromosomeA_ = soChimpChromosome.soGetRestOfString( 1 );
   
   if ( soKeyword != "chimp_unpadded_chr_positions:" ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"chimp_unpadded_positions:\" was missing.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !soStart.bStartsWithAndRemove( "start=" ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"start=\" was missing.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !soEnd.bStartsWithAndRemove( "end=" ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"end=\" was missing.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !soChimpStrand.bStartsWithAndRemove( "strand=" ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"strand=\" was missing.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }

   if ( soChimpStrand == "+" )
      pAlignmentPiece->bAIsTopNotBottomStrand_ = true;
   else if ( soChimpStrand == "-" ) 
      pAlignmentPiece->bAIsTopNotBottomStrand_ = false;
   else {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"strand=\" was not followed by + or -.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !soPaddedSeqLength.bStartsWithAndRemove( "padded_seq_length=" ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but \"padded_seq_length=\" was missing.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   
   if ( !bIsNumericMaybeWithWhitespace( soStart, 
                                        pAlignmentPiece->n1UnpaddedStartA_ ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but start=(#) was not numeric.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !bIsNumericMaybeWithWhitespace( soEnd, 
                                        pAlignmentPiece->n1UnpaddedEndA_ ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but end=(#) was not numeric.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }
   
   if ( !bIsNumericMaybeWithWhitespace( soPaddedSeqLength, 
                                        pAlignmentPiece->nPaddedAlignmentLength_ ) ) {
      RWCString soErrorMessage = CHIMP_HEADER_LINE;
      soErrorMessage += " but padded_seq_length=(#) was not numeric.  Line:\n";
      soErrorMessage += soLine;
      THROW_ERROR( soErrorMessage );
   }

}
      
      
void addAlignedSequence :: getPaddedChimpSequence( 
        FILE* pAlignmentFastaFile,
        alignmentPiece* pAlignmentPiece ) {


   // this line/sequence could be very, very long.  Thus better
   // not use fgets

   pAlignmentPiece->soPaddedBasesA_.increaseMaxLength( 
      pAlignmentPiece->nPaddedAlignmentLength_ + 1 );


   pAlignmentPiece->soPaddedBasesA_.nCurrentLength_ = 0;

   // bypassing repeated RWCString checks for efficiency

   char c;
   while( ( c = fgetc( pAlignmentFastaFile ) ) != '\n' ) {

      if ( c == EOF ) {
         // bad news
         RWCString soErrorMessage = "premature end of file ";
         soErrorMessage += filAlignmentFastaFile_;
         soErrorMessage += " while reading chimp sequence";
         THROW_ERROR( soErrorMessage );
      }


      pAlignmentPiece->soPaddedBasesA_.sz_[ 
         pAlignmentPiece->soPaddedBasesA_.nCurrentLength_ 
                                        ] = c;

      ++pAlignmentPiece->soPaddedBasesA_.nCurrentLength_;
   }

   // null terminate
   pAlignmentPiece->soPaddedBasesA_.sz_[ 
         pAlignmentPiece->soPaddedBasesA_.nCurrentLength_ 
                                        ] = 0;

   assert( pAlignmentPiece->soPaddedBasesA_.nCurrentLength_ == 
           pAlignmentPiece->nPaddedAlignmentLength_ );

}


void addAlignedSequence :: getHumanHeaderLine( 
        FILE* pAlignmentFastaFile,
        alignmentPiece* pAlignmentPiece ) {

   
   if ( fgets( soLine.data(), nMaxLineSize, pAlignmentFastaFile ) == NULL ) {
      RWCString soErrorMessage = filAlignmentFastaFile_;
      soErrorMessage += " ended prematurely while looking for human header line";
      THROW_ERROR( soErrorMessage );
   }

   soLine.nCurrentLength_ = strlen( soLine.data() );

   // line is like:
   // >chr7 human_unpadded_chr_positions: start=29475465 end=29475500 desired_human_unpadded_chr_positions: start=29475465 end=29475500 seq_length=36
   // aaaaaaaaattagccgggcatggtggcaggcgcctg
   
   RWCTokenizer tok( soLine );
   RWCString soHumanChromosome = tok();
   RWCString soKeyword1 = tok();
   RWCString soStart = tok();
   RWCString soEnd = tok();
   RWCString soKeyword2 = tok();
   RWCString soDesiredStart = tok();
   RWCString soDesiredEnd = tok();
   RWCString soPaddedSeqLength = tok();


#define THROW_HUMAN_HEADER_ERROR( soVariableErrorMessage ) \
   RWCString soErrorMessage = "in file "; \
   soErrorMessage += filAlignmentFastaFile_; \
   soErrorMessage += " header header line should have been of the form: >chr7 human_unpadded_chr_positions: start=29475465 end=29475500 desired_human_unpadded_chr_positions: start=29475465 end=29475500 seq_length=36 but "; \
   soErrorMessage += soVariableErrorMessage; \
   soErrorMessage += " line:\n"; \
   soErrorMessage += soLine; \
   THROW_ERROR( soErrorMessage );


#define HUMAN_HEADER_LINE filAlignmentFastaFile_ + " header header line should have been of the form: >chr7 human_unpadded_chr_positions: start=29475465 end=29475500 desired_human_unpadded_chr_positions: start=29475465 end=29475500 seq_length=36"

   if ( soPaddedSeqLength.isNull() ) {
      THROW_HUMAN_HEADER_ERROR( "did not have enough tokens." );
   }

   if ( !soHumanChromosome.bStartsWith( ">chr" ) ) {
      THROW_HUMAN_HEADER_ERROR( "1st token didn't start with \">chr\"" );
   }

   // get rid of leading ">"
   pAlignmentPiece->soChromosomeB_ = soHumanChromosome.soGetRestOfString( 1 );

   if ( soKeyword1 != "human_unpadded_chr_positions:" ) {
      THROW_HUMAN_HEADER_ERROR( "2nd token wasn't human_unpadded_chr_positions:" );
   }

   if ( !soStart.bStartsWithAndRemove( "start=" ) ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find start= where it should be" );
   }

   if ( !soEnd.bStartsWithAndRemove( "end=" ) ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find end= where it should be" );
   }

   if ( soKeyword2 != "desired_human_unpadded_chr_positions:" ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find desired_human_unpadded_chr_positions: where it should be" );
   }

   if ( !soDesiredStart.bStartsWithAndRemove( "start=" ) ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find start= on desired human unpadded chr positions" );
   }

   if ( !soDesiredEnd.bStartsWithAndRemove( "end=" ) ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find end= on desired human unpadded chr positions" );
   }

   if ( !soPaddedSeqLength.bStartsWithAndRemove( "seq_length=" ) ) {
      THROW_HUMAN_HEADER_ERROR( "couldn't find seq_length= where it should be" );
   }

   if ( !bIsNumericMaybeWithWhitespace( soStart,
                                        pAlignmentPiece->n1UnpaddedStartB_ ) ) {
      THROW_HUMAN_HEADER_ERROR( "human start wasn't numeric" );
   }

   if ( !bIsNumericMaybeWithWhitespace( soEnd,
                                        pAlignmentPiece->n1UnpaddedEndB_ ) ) {
      THROW_HUMAN_HEADER_ERROR( "human end wasn't numeric" );
   }

   
   if ( !bIsNumericMaybeWithWhitespace( 
              soDesiredStart,
              pAlignmentPiece->n1DesiredUnpaddedStartB_ ) ) {
      THROW_HUMAN_HEADER_ERROR( "desired human start wasn't numeric" );
   }

   if ( !bIsNumericMaybeWithWhitespace(
              soDesiredEnd,
              pAlignmentPiece->n1DesiredUnpaddedEndB_ ) ) {
      THROW_HUMAN_HEADER_ERROR( "desired human end wasn't numeric" );
   }


   int nPaddedSeqLength;
   if ( !bIsNumericMaybeWithWhitespace( soPaddedSeqLength, nPaddedSeqLength ) ) {
      THROW_HUMAN_HEADER_ERROR( "padded seq length wasn't numeric" );
   }

   if ( nPaddedSeqLength != pAlignmentPiece->nPaddedAlignmentLength_ ) {
      RWCString soErrorMessage = "chimp says padded length is ";
      soErrorMessage += RWCString( (long) pAlignmentPiece->nPaddedAlignmentLength_ );
      soErrorMessage += " and human says ";
      soErrorMessage += RWCString( (long) nPaddedSeqLength );
      soErrorMessage += " but they should be equal";
      THROW_ERROR( soErrorMessage );
   }
}

         
void addAlignedSequence :: getPaddedHumanSequence(
        FILE* pAlignmentFastaFile,
        alignmentPiece* pAlignmentPiece ) {

   pAlignmentPiece->soPaddedBasesB_.increaseMaxLength(
       pAlignmentPiece->nPaddedAlignmentLength_ + 1 );

   pAlignmentPiece->soPaddedBasesB_.nCurrentLength_ = 0;

   // bypassing repeated RWCString checks for efficiency

   // note that the file might end without a terminating newline
   char c;
   while( ( c = fgetc( pAlignmentFastaFile ) ) != '\n' &&
          c != EOF ) {
      
      pAlignmentPiece->soPaddedBasesB_.sz_[
          pAlignmentPiece->soPaddedBasesB_.nCurrentLength_
                                       ] = c;

      ++pAlignmentPiece->soPaddedBasesB_.nCurrentLength_;
   }

   // null terminate

   pAlignmentPiece->soPaddedBasesB_.sz_[
       pAlignmentPiece->soPaddedBasesB_.nCurrentLength_
                                    ] = 0;

   assert( pAlignmentPiece->soPaddedBasesB_.nCurrentLength_ ==
           pAlignmentPiece->nPaddedAlignmentLength_ );

}


void addAlignedSequence :: findExistingReadWithWRItem() {

   RWCString soReadName = filPhdFileWithWRItem_;
   assert( soReadName.bContainsAndRemoveToEnd( ".phd." ) );

   pExistingReadWithWRItem_ = 
      ConsEd::pGetAssembly()->pGetLocatedFragmentByName( soReadName );
}
           

void addAlignedSequence :: makeReadOutOfThisAlignmentPiece( 
        alignmentPiece* pAlignmentPiece ) {

   // this code is a mix of compareContigs :: createFakeRead() and
   // the addNewReads functions


   // at what point should the insertColOfPads be done?  I think it
   // must be before this read is added to the contig and its
   // alignstart position set.

   insertColumnsOfPadsInAssembly( pAlignmentPiece );


   RWCString soChimpReadName = "chimp_";
   soChimpReadName += pAlignmentPiece->soChromosomeA_;
   soChimpReadName += "_";
   soChimpReadName += RWCString( (long) pAlignmentPiece->n1UnpaddedStartA_ );

   pFakeChimpRead_ = 
      new LocatedFragment( 
                          soChimpReadName,
                          1, // temporary value for alignment position
                          false, // current read is complemented
                          pExistingReadWithWRItem_->pGetContig() );


   pExistingReadWithWRItem_->pGetContig()->addLocatedFragment( 
                                              pFakeChimpRead_ );
   
   // not necessary since Contig::addLocatedFragment does this
   //   pExistingReadWithWRItem_->pGetContig()->bReadListsNeedFixing_ = true;

   pFakeChimpRead_->createPointPosArray2( 
            Sequence::cLittle, 
          // no need for big numbers since we will just put zeros in here
            pAlignmentPiece->soPaddedBasesB_.length() * 1.1,
            false ); // bFillUpArray

   // add bases and alignment information

   addBasesAndPadsToChimpSequence( pAlignmentPiece );
   

   pFakeChimpRead_->setPHDFilename( soChimpReadName + ".phd.1" );
   pFakeChimpRead_->setChromatFilename( "no_chromat" ); // see compareContigs2.cpp

   char* szTimestamp = szGetTime();
   pFakeChimpRead_->setPHDTimestamp( szTimestamp );

   wholeReadItem* pWR = new wholeReadItem( pFakeChimpRead_,
                                           "fake", // type
                                           "consed", // source
                                           soGetDateTime( nNoSlashes ),
                                           "" );


   pFakeChimpRead_->aWholeReadItems_.append( pWR );


   // should add another WR item indicating where this read came from

   RWCString soData = "chimp ";
   soData += pAlignmentPiece->soChromosomeA_;
   soData += " 1-based-chromosome-positions: start=";
   soData += RWCString( (long) pAlignmentPiece->n1UnpaddedStartA_ );
   soData += " end=";
   soData += RWCString( (long) pAlignmentPiece->n1UnpaddedEndA_ );
   soData += " strand=";
   soData += (pAlignmentPiece->bAIsTopNotBottomStrand_ ? "+" : "-" );

   wholeReadItem* pWR2 = new wholeReadItem( pFakeChimpRead_,
                                           "part-of-chromosome", // type
                                           "consed", // source
                                           soGetDateTime( nNoSlashes ),
                                           soData );

   pFakeChimpRead_->aWholeReadItems_.append( pWR2 );

   pFakeChimpRead_->writePHD( pFakeChimpRead_->filGetPHDFilename() );
}


void addAlignedSequence :: addBasesAndPadsToChimpSequence( 
             alignmentPiece* pAlignmentPiece ) {


   // must initialize nConsPos and nConsPosOfLastAddedBase

   // e.g., if both are position 1000 within chromosome, 
   // then starting position within read is 1
   int n1UnpaddedStartOfAlignmentWithinExistingRead =
      pAlignmentPiece->n1UnpaddedStartB_ - 
      pAlignmentPiece->n1DesiredUnpaddedStartB_ + 1;

   Contig* pContig = pExistingReadWithWRItem_->pGetContig();

   pContig->setPaddedPositionsArray();
   pContig->setUnpaddedPositionsArray();

   int nConsPos = pExistingReadWithWRItem_->nGetConsPosFromUnpaddedFragPos( 
                     n1UnpaddedStartOfAlignmentWithinExistingRead );

   int nUnpaddedConsPos = pContig->nUnpaddedIndex( nConsPos );


   // -----------------------------------------------------
   // IMPORTANT:
   // the alignment starts here
   // -----------------------------------------------------

   pFakeChimpRead_->nAlignStartPos_ = nConsPos;

   // set these up so first base will show no pads need be added
   int nInterveningChimpBasesAdded = 0;
   int nConsPosOfLastReferencePosition = nConsPos; 

   --nUnpaddedConsPos; // set up for first ++ 


   assert( pAlignmentPiece->soPaddedBasesA_.length() ==
           pAlignmentPiece->soPaddedBasesB_.length() );

   for( int nzAlignmentPos = 0; 
        nzAlignmentPos < pAlignmentPiece->soPaddedBasesA_.length();
        ++nzAlignmentPos ) {

      char cHumanBaseInAlignment = 
         pAlignmentPiece->soPaddedBasesB_[ nzAlignmentPos ];

      char cChimpBaseInAlignment = 
         pAlignmentPiece->soPaddedBasesA_[ nzAlignmentPos ];

      if ( cHumanBaseInAlignment != '-' ) {
         // since the base is not a pad, we have the 
         // chance to find the consensus position of the
         // corresponding base in the assembly.  find the
         // nConsPos of the corresponding base in the assembly.
         // The only reason to do this is to decide if we should
         // add pads to the chimp read (below).

         // Note:  this depends on the consensus and the human read
         // to have pads in the same places

         ++nUnpaddedConsPos;


         nConsPos = pContig->nPaddedIndexFast( nUnpaddedConsPos );

         assert( 
             pExistingReadWithWRItem_->ntGetFragFromConsPos( nConsPos ).cGetBase() ==
             tolower( cHumanBaseInAlignment ) );

         // this brings nConsPos to the corresponding base in the 
         // assembled human read

      }
            
      if ( cChimpBaseInAlignment != '-' &&
           cHumanBaseInAlignment != '-' ) {

         // this marks a reference position in which we know
         // what nConsPos the chimp base should be at.  Thus
         // we can tell if there were any additional pads
         // in the assembly than in the alignment and, if so,
         // we should add additional pads to the chimp sequence

         int nChimpPadsToAdd = nConsPos -
            nConsPosOfLastReferencePosition - 1 - nInterveningChimpBasesAdded;

         if ( nChimpPadsToAdd > 0 ) {
            Quality ucForPads = 0;
            for( int nPad = 0; nPad < nChimpPadsToAdd; ++nPad ) {
               pFakeChimpRead_->appendNtide( 
                    Ntide( '*', 
                           pCP->nAddAlignedSequenceQualityOfBases_ ) );

               pFakeChimpRead_->appendPointPos( 0 );
            }
         }
         
         // set up for next time
         nInterveningChimpBasesAdded = 0;
         nConsPosOfLastReferencePosition = nConsPos;
      }
      else {
         ++nInterveningChimpBasesAdded;
      }

      if ( cChimpBaseInAlignment == '-' )
         cChimpBaseInAlignment = '*';


      // now can add the chimp base

      pFakeChimpRead_->appendNtide( 
                Ntide( cChimpBaseInAlignment, 
                       pCP->nAddAlignedSequenceQualityOfBases_ ) 
                                   );

      pFakeChimpRead_->appendPointPos( 0 );

   } //   for( int nzAlignmentPos = 0; 


   // quality and alignment clipping

   pFakeChimpRead_->setQualAndAlignClipping( pFakeChimpRead_->nGetAlignStart(),
                                             pFakeChimpRead_->nGetAlignEnd(),
                                             pFakeChimpRead_->nGetAlignStart(),
                                             pFakeChimpRead_->nGetAlignEnd() );


}


void addAlignedSequence :: insertColumnsOfPadsInAssembly( 
             alignmentPiece* pAlignmentPiece ) {

   
   // must initialize nConsPos and nConsPosOfLastAddedBase

   // e.g., if both are position 1000 within chromosome, 
   // then starting position within read is 1

   // the start of the read is at n1DesiredUnpaddedStartB_
   // (even though the start of the alignment is n1UnpaddedStartB_ )
   // so the 1-based end of the read is:
   int n1UnpaddedEndOfAlignmentWithinExistingRead =
      pAlignmentPiece->n1UnpaddedEndB_ - 
      pAlignmentPiece->n1DesiredUnpaddedStartB_ + 1;

   int nConsPos = 
      pExistingReadWithWRItem_->nGetConsPosFromUnpaddedFragPos( 
               n1UnpaddedEndOfAlignmentWithinExistingRead );

   // This starts where there really wasn't a base added.  
   // I could instead have had a flag saying "no base added yet" to
   // signal that it shouldn't add any pads.  But then there would
   // be the problem of pads at the beginning of the alignment
   // (I can't be sure this never happens with the Santa Cruz alignments.)
   // In such a case, this works.  For example:

   // bbbb  (chimp)
   // b*** (human)
   //     ^nConsPosOfLastAddedBase
   // so this would add 3 pads before adding the b, just as we would want.

   int nConsPosOfLastNonPadBase = nConsPos + 1;
   int nPadsInAlignedHumanSeq = 0;
      
   // if we go backwards, there will be no problems will the
   // consensus positions being screwed up


   for( int nzAlignmentPos = pAlignmentPiece->soPaddedBasesB_.length() - 1;
        nzAlignmentPos >= 0;
        --nzAlignmentPos ) {

      char cHumanBaseInAlignment = 
         pAlignmentPiece->soPaddedBasesB_[ nzAlignmentPos ];

      if ( cHumanBaseInAlignment == '-' ) {
         ++nPadsInAlignedHumanSeq;
         continue;
      }

      // if reached here, the human alignment base is not a pad.  See if
      // any pads should be added to the assembly.

      // move to left in the human read over any pads

      nConsPos = nConsPosOfLastNonPadBase - 1;
      while( pExistingReadWithWRItem_->ntGetFragFromConsPos( nConsPos ).bIsPad() ) {
         --nConsPos;
      }


      assert( pExistingReadWithWRItem_->ntGetFragFromConsPos( nConsPos ).cGetBase()
                     ==
               tolower( cHumanBaseInAlignment ) );
      

      // e.g., if bb
      //           ^ nConsPosOfLastNonPadBase
      //          ^ nConsPos
      // then no pads in alignment

      int nPadsInReadInAssembly = nConsPosOfLastNonPadBase - nConsPos - 1;

      int nPadsToAdd = nPadsInAlignedHumanSeq - nPadsInReadInAssembly;

      // this number could be negative, which is dealt with above when 
      // we added pads to the chimp sequence

      if ( nPadsToAdd > 0 ) {
         for( int nPad = 0; nPad < nPadsToAdd; ++nPad ) {
            // insert just before nConsPos+1
            // since nConsPos is a non-pad we have just backed up to.
            pExistingReadWithWRItem_->pContig_->insertColOfPads( nConsPos + 1 );
         }
      }
      
      // set up for next non-pad
      nPadsInAlignedHumanSeq = 0;
      nConsPosOfLastNonPadBase = nConsPos;
   } //  for( int nzAlignmentPos = ...
}


void addAlignedSequence :: saveAssembly() {

   if ( filNewAceFile_.isNull() ) {

      filNewAceFile_ = filAceFileToOpen_.filGetNextVersion();

      // find next version that doesn't already exist

      while( filNewAceFile_.bFileByThisNameExists() ) {
         filNewAceFile_ = filNewAceFile_.filGetNextVersion();
      }
   }


   ConsEd::pGetAssembly()->saveToFile( filNewAceFile_,
                                       false ); // ace file format 2
}