/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "findQueryWithinSubject.h"
#include    "getAlignment.h"
#include    <stdlib.h>
#include    <stdio.h>

#ifndef WALRUS_BUILD
#include    <malloc.h>
#endif

#include    "numutil.h"
#include    <iostream.h>
#include    "assert.h"
#include    <string.h>
#include    "consedParameters.h"


// will use the entire query, but not entire subject


static void findQueryWithinSubjectDownRight( 
    const RWCString& soQueryAcross,
    const RWCString& soSubjectDown, 
    const int nGap,
    const int nMatch,
    const int nMismatch,
    int& nSubjectEnd,
    int& nBestScore ) {

  int nBestDown;
  int nScoreDiagonallyAbove;
  int nToBeScoreDiagonallyAbove;
  int nScoreByDiagonal;
  int nAcross;
  int nDown;
  int* nScore;


  char* pQueryAcross = soQueryAcross.data();
  char* pSubjectDown = soSubjectDown.data();

  int nSeqAcrossLength = soQueryAcross.length();
  int nSeqDownLength = soSubjectDown.length();

  // the +1 is because the size of the NW matrix is 1 greater
  // than that of the sequence (in either dimension)
  nScore = (int*) calloc( nSeqAcrossLength + 1, sizeof(int) );

  // scores for for nDown = 0.


  for ( nAcross = 0; nAcross <= nSeqAcrossLength; ++nAcross ) {
     nScore[ nAcross ] = -nGap * nAcross;
  }

  nBestScore = nScore[ nSeqAcrossLength ];
  nBestDown = 0;


  // the above corresponds to a path like this:

  // >>>>>>>>>>>
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // -----------

  // That is, just across the top of the matrix.
  

  for( nDown = 1; nDown <= nSeqDownLength; ++nDown ) {
    nToBeScoreDiagonallyAbove = nScore[0];


    /* case of first column--zero since alignment can start 
     at any row of the first column */
    nScore[0] = 0;


    /* now handle case of other columns */
    for( nAcross = 1; nAcross <= nSeqAcrossLength; ++nAcross ) {
      nScoreDiagonallyAbove = nToBeScoreDiagonallyAbove;
      nToBeScoreDiagonallyAbove = nScore[nAcross];
      
         // the -1 in each case is because the NW matrix
         // has its 0,0 before the start of the sequences
         // When moving from 0,0 to 1,1 you pass the
         // 0th char of each sequence
      
      if ( pSubjectDown[nDown - 1] == pQueryAcross[nAcross - 1] ) 
         nScoreByDiagonal = nScoreDiagonallyAbove + nMatch;
      else
         nScoreByDiagonal = nScoreDiagonallyAbove - nMismatch;


      nScore[nAcross] = MAX( nScore[nAcross] - nGap, // move down
                             MAX( nScore[nAcross - 1] - nGap, // move right
                                  nScoreByDiagonal // move down right
				)
                           );


    } /* for( nAcross = 1 ... */


    // we are only interested in the best score on the right edge
    // since we want the entire query to be used
    // We are now on the right edge since nAcross is at its max

    
    if (nScore[nSeqAcrossLength] > nBestScore ) {
       nBestScore = nScore[nSeqAcrossLength];

       // nBestAcross = nAcross;  not needed since this will always
       // be nSeqAcrossLength--ouch!  This is beyond the end of the
       // sequence!  Yes, nBestDown is not a sequence position but
       // a position within the SW matrix.  The subject sequence ends
       // at nBestDown - 1.


       nBestDown = nDown;

    }


  } /* for( nDown = 1 ... */


  nSubjectEnd = nBestDown - 1; // this is because if the best path
  // are in the NW matrix is from (a,b) to (c,d), the aligned
  // sequences are from (a,b) to (c-1,d-1)

  free( (char*) nScore );
}


static void findStartOfAlignment( const RWCString& soQueryAcross,
                                  const RWCString& soSubjectDown,

// char* szText,
//                                   const int nTextLength,
//                                   char* szQuery,
//                                   const int nQueryLength,
                                  const int n0TrySubjectStart,
                                  const int n0SubjectEnd,
                                  const int nGapPenalty, // positive
                                  const int nMatchBoost, // positive
                                  const int nMismatchPenalty, // positive
                                  int&  n0SubjectStart,
                                  int& nBestScore ) {


   int nQueryAcrossLength = soQueryAcross.length();

   char* szQuery = soQueryAcross.data();
   char* szSubjectDown = soSubjectDown.data();

   // we need to find the beginning of the alignment in the
   // consensus (or read).  There may be insertion or deletion errors
   // so the number of consensus bases in the alignment may be more 
   // or less than the number of bases in the query.  However, using
   // the nMismatches, we know the furthest left it can start.

   // query------>
   // subject
   // |? (does alignment start here?)
   // |? (does alignment start here?)
   // |? (does alignment start here?)
   // |
   // |
   // |
   // |
   // |
   // v         
   

   // keep scores for one row across
   // The +1 is due to the fact that the NW matrix is 1 greater
   // than that of the sequence in either dimension
   // Thus the index as we move from right to left will be
   // from soQuery.length() to 0

   int* nScore = (int*) calloc( nQueryAcrossLength + 1, sizeof( int ) );

   // compute scores for bottom row, starting on right hand side.
   // The bottom row is nDown = n0SubjectEnd + 1 

   int nAcross;
   for( nAcross = nQueryAcrossLength; nAcross >= 0; --nAcross ) {
      nScore[ nAcross ] = -nGapPenalty * ( nQueryAcrossLength - nAcross );
   }


   int nBestScoreOnLeftEdge = nScore[0];
   int nBestDownOnLeftEdge = n0SubjectEnd + 1; 

   // I think the +1 is correct because the NW matrix goes from 0 to 
   // subject_length+1 

  // the above corresponds to a path like this:

  // -----------
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // |         |
  // <<<<<<<<<<<

  // That is, just across the bottom of the matrix.


   // now compute scores for the other rows, starting with the row
   // just above the bottom row

   int nToBeScoreDiagonallyBelowRight;
   int nScoreDiagonallyBelowRight;
   int nScoreByDiagonal;


   // is there any +1 here? --no, since we already (above) considered
   // the row nDown = n0SubjectEnd + 1

   for( int nDown = n0SubjectEnd;
        nDown >= n0TrySubjectStart;
        --nDown ) {

      nToBeScoreDiagonallyBelowRight = nScore[ nQueryAcrossLength ];

      // case of rightmost column.  The MAX with 0 allows us to start
      // the alignment somewhere besides at the very bottom.  Thus we
      // may find a better alignment than the one that ended at the
      // position agrep gave us.  This handles
      // the case of agrep giving several ending positions of the alignments
      // that are all variations of the same alignment.
      
      nScore[ nQueryAcrossLength ] = nScore[ nQueryAcrossLength ] - nGapPenalty;
      
      // now handle other columns

      for( nAcross = nQueryAcrossLength - 1; nAcross >= 0; --nAcross ) {

         nScoreDiagonallyBelowRight = nToBeScoreDiagonallyBelowRight;
         nToBeScoreDiagonallyBelowRight = nScore[ nAcross ];
         
         if ( szQuery[nAcross] == szSubjectDown[nDown] )
            nScoreByDiagonal = nScoreDiagonallyBelowRight + nMatchBoost;
         else
            nScoreByDiagonal = nScoreDiagonallyBelowRight - nMismatchPenalty;

         nScore[ nAcross ] =
            MAX( nScore[nAcross] - nGapPenalty, // move straight up
                 MAX( nScore[ nAcross + 1 ] - nGapPenalty, // move straight left
                      nScoreByDiagonal ) ); // move diagonally up left

         
      }

      // now reached left edge

      if ( nScore[0] > nBestScoreOnLeftEdge ) {
         nBestScoreOnLeftEdge = nScore[0];
         nBestDownOnLeftEdge = nDown;
      }
   } //   for( int nDown = nConsensusZeroBasedEndOfMatch - 1; ...


   free( nScore );

   n0SubjectStart = nBestDownOnLeftEdge; 
   nBestScore = nBestScoreOnLeftEdge;
}


// this uses the entire query
// but it can start and end anywhere within the subject


// need to sort out which of these penalties/boosts are positive and
// which negative--they are all positive just as in
// extendAlignment.cpp

void findQueryWithinSubject( const RWCString& soQueryAcross,
                             const RWCString& soSubjectDown,
                             const int nGap,
                             const int nMatch,
                             const int nMismatch,
                             const int nMinScore,
                             const int nMaxIndels,
                             bool& bFoundMatch,
                             int& n0SubjectStart,
                             int& n0SubjectEnd,
                             int& nScore ) {
                             

// void findQueryWithinSubject( 
//     char* pQueryAcross, 
//     char* pSubjectDown, 
//     int nSeqDownLength, 
// 	int nSeqAcrossLength,  
//     int* pnBestDown, 
//     int* pnBestAcross, 
// 	int* pnBestScore,
//     const int nGap,
//     const int nMatch,
//     const int nMismatch,
//     const int nMinScore,
//     const int nMaxIndels,
//     bool* pFoundMatch ) {

   findQueryWithinSubjectDownRight( soQueryAcross,
                                    soSubjectDown,
                                    nGap,
                                    nMatch,
                                    nMismatch,
                                    n0SubjectEnd,
                                    nScore );


   if ( nScore < nMinScore ) {
      bFoundMatch = false;
      //      cerr << "failed findQueryWithinSubjectDownRight nScore = " << nScore << endl;
      return;
   }
   
   bFoundMatch = true;

   // if reached here, we've found a match and it ends
   // at position n0SubjectEnd within soSubjectDown

   int n0TrySubjectStart = 
      n0SubjectEnd - soQueryAcross.length() - nMaxIndels + 1; 

                    // the -1 is for good-measure

   if ( n0TrySubjectStart < 0 )
      n0TrySubjectStart = 0;

   int n0SubjectStartReturned;
   int nBestScore2;
   findStartOfAlignment( soQueryAcross,
                                 soSubjectDown,
                                 n0TrySubjectStart,
                                 n0SubjectEnd,
                                 nGap,
                                 nMatch,
                                 nMismatch,
                                 n0SubjectStartReturned,
                                 nBestScore2 );


   assert( nScore == nBestScore2 );
   
   n0SubjectStart = n0SubjectStartReturned;

   // just to test

   RWCString soMatchingPartOfSubject = 
      soSubjectDown( n0SubjectStart,
                     n0SubjectEnd - n0SubjectStart + 1 );

   RWCString soAlignmentTop;
   RWCString soAlignmentBottom;
   int nScore3;

   getAlignment( soQueryAcross.data(),
                 soMatchingPartOfSubject.data(),
                 nGap,
                 nMatch,
                 nMismatch,
                 soAlignmentTop,
                 soAlignmentBottom,
                 nScore3 );

                 
   assert( nScore == nScore3 );

   assert( soAlignmentTop.length() == soAlignmentBottom.length() );

   RWCString soAlignmentMiddle( (size_t) soAlignmentTop.length() );

   for( int n = 0; n < soAlignmentTop.length(); ++n ) {
      if ( soAlignmentTop[n] == soAlignmentBottom[n] ) 
         soAlignmentMiddle += " ";
      else {
         soAlignmentMiddle += "x";
         
//          if ( soAlignmentTop[n] == '*' ) {
//             ++pCP->nDebugging_;
//          }
//          else if ( soAlignmentBottom[n] == '*' ) {
//             ++pCP->nDebugging2_;
//          }
//          else {
//             ++pCP->nDebugging3_;
//          }
      }
   }

   
//    if ( pCP->soDebuggingString_ == "FGXQUUK01BJZM7" ) {

//          cerr << soAlignmentTop << endl;
//          cerr << soAlignmentMiddle << endl;
//          cerr << soAlignmentBottom << endl;
//    }

}