/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "paddedPieceOfRead.h"
#include    "bIsAPad.h"
#include    "contig.h"
#include    "assert.h"


// if the base pointed to by nZeroBasedPaddedPos is a pad, the
// unpadded position returned is that from the previous base

int paddedPieceOfRead :: nUnpaddedContigPosFromZeroBasedPaddedPos( 
                                     const int nZeroBasedPaddedPos ) 
{
  assert( 0 <= nZeroBasedPaddedPos );
  assert( nZeroBasedPaddedPos < nGetPaddedLength() );
  
  int nUnpadded = nUnpaddedOffset_;

  // count pads up to nZeroBasedPaddedPos


  int nPads = 0;
  for( int nZeroBased2 = 0; nZeroBased2 <= nZeroBasedPaddedPos; ++nZeroBased2 ) {
     if ( bIsAPad( soBasesAndPads_[ nZeroBased2 ] ) )
        ++nPads;
  }

  //       pad      pad
  //        v        v
  // bbbbbbbPbbbbbbbbPbbbbbbbbbbbbPbbbbbbbb
  // ^                     ^
  // nUnpaddedOffset_      nZeroBasedPaddedPos
  // If the sequence is complemented, the unpadded pos counts down 
  // to the right.

  if ( bComplemented_ ) {
     return( nUnpadded = nUnpaddedOffset_ - nZeroBasedPaddedPos + nPads );
  }
  else {
     return( nUnpaddedOffset_ + nZeroBasedPaddedPos - nPads );
  }
}


int paddedPieceOfRead :: nZeroBasedAlignPosFromUnpadded( 
                                    const int nSourceUnpadded ) {

   int nNumberOfNonPads; // the number of nonpads we must
                         // pass to get to the base at nSourceUnpadded
   if ( bComplemented_ )
      nNumberOfNonPads = nUnpaddedOffset_ - nSourceUnpadded;
   else
      nNumberOfNonPads = nSourceUnpadded - nUnpaddedOffset_;

   assert( 0 <= nNumberOfNonPads );
   // this is checked below, but checked better since 
   // it is possible for the assertion to be true, but
   // nZeroBasedUnpadded to still be wrong.
   //      assert( nNumberOfNonPads <= soBasesAndPads_.length() );

   // checks:  1) not off end 2) if # of non-pads now equals nSourceUnpadded
   // Perform these checks before incrementing

   int nNonPads = 0;
   for( int nZeroBasedAlignPos = 0; 
        nZeroBasedAlignPos < soBasesAndPads_.length();
        ++nZeroBasedAlignPos ) {

      if ( !bIsAPad( soBasesAndPads_[ nZeroBasedAlignPos ] ) ) {
         ++nNonPads;
         if ( nNonPads == nNumberOfNonPads )
            return( nZeroBasedAlignPos );
      }
   }

   // should never reach here--reaching here means that this routine
   // is being passed an unpadded position that isn't in the alignment

   assert( false );
}


paddedPieceOfRead :: paddedPieceOfRead( const RWCString& soName,
                                        const RWCString& soBasesAndPads,
                                        const int nZeroBasedPaddedPos,
                                        const int nPaddedConsPos,
                                        Contig* pContig,
                                        const int nUnpaddedAlignmentStart,
                                        const int nUnpaddedAlignmentEnd,
                                        const int nPaddedAlignmentStart,
                                        const int nPaddedAlignmentEnd,
                                        const bool bComplemented ) :

  soName_( soName ),
  soBasesAndPads_( soBasesAndPads ),
  pContig_( pContig ),
  nUnpaddedAlignmentStart_( nUnpaddedAlignmentStart ),
  nUnpaddedAlignmentEnd_( nUnpaddedAlignmentEnd ),
  nPaddedAlignmentStart_( nPaddedAlignmentStart ),
  nPaddedAlignmentEnd_( nPaddedAlignmentEnd ),
  aBaseAndPadQualities_( (size_t) soBasesAndPads.length() ),
  bComplemented_( bComplemented )
{

  assert( !bIsAPad( soBasesAndPads_[ nZeroBasedPaddedPos ] ) );

  // if we go back to the Contig object, we should find the same
  // base as the one in the center of this alignment


  if ( bComplemented ) {
     assert( toupper( soBasesAndPads_[ nZeroBasedPaddedPos ] ) ==
             toupper( cComplementBase( pContig_->ntideGet( nPaddedConsPos ).cGetBase() ) ) );
  }
  else {
     assert( toupper( soBasesAndPads_[ nZeroBasedPaddedPos ] ) ==
          toupper( pContig_->ntideGet( nPaddedConsPos ).cGetBase()) );
  }

  // we need to calculate nUnpaddedOffset_
  // do this by counting the pads up to and including the
  // base at nZeroBasedPaddedPos

  // the < nZeroBasedPaddedPos instead of <= is because I know
  // that the way this is used, this base isn't a pad

  int nTotalPads = 0;
  for( int nZeroBased = 0; nZeroBased < nZeroBasedPaddedPos; ++nZeroBased ) {
     if ( bIsAPad( soBasesAndPads[ nZeroBased ] ) )
        ++nTotalPads;
  }

  int nUnpaddedOfKnownBase = pContig_->nUnpaddedIndex( nPaddedConsPos );

  if ( bComplemented_ ) {
     nUnpaddedOffset_ = 
        nUnpaddedOfKnownBase + nZeroBasedPaddedPos - nTotalPads;
  }
  else {
     nUnpaddedOffset_ = 
        nUnpaddedOfKnownBase - nZeroBasedPaddedPos + nTotalPads;
  }
     

  calculateQualities();
}


void paddedPieceOfRead :: calculateQualities() {


   int nUnpadded = -666;
   bool bFirstBase = true;

   bool bProblems = false;

   for( int nZeroBasedAlignPos = 0; 
        nZeroBasedAlignPos < soBasesAndPads_.length(); ++nZeroBasedAlignPos ) {


      char cBaseOrPad = soBasesAndPads_[ nZeroBasedAlignPos ];

      if ( bFirstBase ) {
         // this routine is very expensive, so only do it for the 
         // first base in the loop
         nUnpadded = nUnpaddedContigPosFromZeroBasedPaddedPos( 
                                                  nZeroBasedAlignPos );
         bFirstBase = false;
      }
      else {
         if ( !bIsAPad( cBaseOrPad ) ) {
            if ( bComplemented_ )
               --nUnpadded;
            else
               ++nUnpadded;
         }
      }

      // what shall we use for the quality of the initial or terminal
      // blanks, the ones that are used context bases when we have
      // reached the end of the contig?  Doesn't matter since it won't
      // be used when displaying the base.

      Quality qual = 0; 
      if ( cBaseOrPad != ' ' ) {

         // convert unpadded to padded

         int nConsPos = pContig_->nPaddedIndexFast( nUnpadded );
         
         // find the quality shade of the base in the consensus
         
         qual = pContig_->ntGetCons( nConsPos ).qualGetQualityWithout98or99();
         
         if ( !bIsAPad( cBaseOrPad ) ) {
            if ( bComplemented_ ) {

               if ( tolower( cComplementBase( cBaseOrPad ) ) !=
                    pContig_->ntGetCons( nConsPos ).cGetBase() ) {

                  bool bMatch = ( tolower( cComplementBase( cBaseOrPad ) ) ==
                               pContig_->ntGetCons( nConsPos ).cGetBase() );

                  cerr << ( bMatch ? "   " : "mis" ) << " " <<
                     (char) tolower( cComplementBase( cBaseOrPad ) ) << " " <<
                     pContig_->ntGetCons( nConsPos ).cGetBase() << " " <<
                     pContig_->nUnpaddedIndex( nConsPos ) << endl;


                  bProblems = true;
               }
               
            }
            else {

               if ( tolower( cBaseOrPad ) !=
                    pContig_->ntGetCons( nConsPos ).cGetBase() ) {


                  bool bMatch =  ( tolower( cBaseOrPad ) ==
                      pContig_->ntGetCons( nConsPos ).cGetBase()  ? 
                                   true : false );

                  cerr << ( bMatch ? "   " : "mis" ) << " " <<
                     (char) tolower( cBaseOrPad ) << " " <<
                     pContig_->ntGetCons( nConsPos ).cGetBase() << " " <<
                     pContig_->nUnpaddedIndex( nConsPos ) << endl;


                  bProblems = true;
               }
            }
         } //  if ( !bIsAPad( cBaseOrPad ) ) {
         
         // note that we are inserting qualities that come from only
         // the unpadded consensus bases.  So even if we have this situation:
         // bbbbbb*bbbbbbbb consensus
         // bbbbbb*bbbbbbbb this read
         //       ^
         //    the quality of this base will be that of the unpadded 'b'
         // just before it--not the quality of the pad in the consensus
         // (which, of course, is the average of the qualities of the two
         // unpadded bases on either side of it.
      }

      aBaseAndPadQualities_.insert( qual );
   } //    for( int nZeroBasedAlignPos = 0; 


   assert( aBaseAndPadQualities_.length() == soBasesAndPads_.length() );

   if (bProblems ) {
      cerr << soName_ << endl;
      THROW_ERROR("program bug--see xterm for details" );
   }
}


int paddedPieceOfRead :: nGetLeftMostUnpaddedConsPosOfAlignment() {
   int nUnpadded = nUnpaddedContigPosFromZeroBasedPaddedPos( 0 );
   return( nUnpadded );
}


int paddedPieceOfRead :: nGetRightMostUnpaddedConsPosOfAlignment() {
   
   // zero-based position of padded alignment (not related
   // to the contig consensus position--it is a funciton of the
   // the alignment )

   int nZeroBasedOfLastBase = soBasesAndPads_.length() - 1;

   int nUnpadded = 
      nUnpaddedContigPosFromZeroBasedPaddedPos( nZeroBasedOfLastBase );

   return( nUnpadded );
}