/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "primerType.h"
#include    "matchScores.h"
#include    "assert.h"
#include    "bIntersect.h"
#include    "consedParameters.h"


static int nScoreOfPairOfBases_[ 26 ][ 26 ];
static bool bScoreOfPairOfBasesIsSet = false;


static void setScoreOfPairOfBases() {

   // set it to zero initially

   for( int nBase1 = 0; nBase1 < 26; ++nBase1 ) {
      for( int nBase2 = 0; nBase2 < 26; ++nBase2 ) {
         nScoreOfPairOfBases_[ nBase1 ][ nBase2 ] = 0;
      }
   }

   int nA = 'a' - 'a';
   int nC = 'c' - 'a';
   int nG = 'g' - 'a';
   int nT = 't' - 'a';

   nScoreOfPairOfBases_[ nA ][ nA ] = nMismatch;
   nScoreOfPairOfBases_[ nA ][ nC ] = nMismatch;
   nScoreOfPairOfBases_[ nA ][ nG ] = nMismatch;
   nScoreOfPairOfBases_[ nA ][ nT ] = nATMatch;

   nScoreOfPairOfBases_[ nC ][ nA ] = nMismatch;
   nScoreOfPairOfBases_[ nC ][ nC ] = nMismatch;
   nScoreOfPairOfBases_[ nC ][ nG ] = nGCMatch;
   nScoreOfPairOfBases_[ nC ][ nT ] = nMismatch;

   nScoreOfPairOfBases_[ nG ][ nA ] = nMismatch;
   nScoreOfPairOfBases_[ nG ][ nC ] = nGCMatch;
   nScoreOfPairOfBases_[ nG ][ nG ] = nMismatch;
   nScoreOfPairOfBases_[ nG ][ nT ] = nMismatch;

   nScoreOfPairOfBases_[ nT ][ nA ] = nATMatch;
   nScoreOfPairOfBases_[ nT ][ nC ] = nMismatch;
   nScoreOfPairOfBases_[ nT ][ nG ] = nMismatch;
   nScoreOfPairOfBases_[ nT ][ nT ] = nMismatch;

}


bool bDoPCRPrimersStickToEachOther( 
                                   primerType* pFirstPCRPrimer,
                                   primerType* pSecondPCRPrimer ) {

   if ( !bScoreOfPairOfBasesIsSet ) {
      bScoreOfPairOfBasesIsSet = true;
      setScoreOfPairOfBases();
   }

   // -----------> 3' (first primer)
   //       3'   <---- 5' (second primer )
   //  ----------->  
   //            <----
   //   ----------->  
   //            <----
   //    ----------->  
   //            <----
   //     ----------->  
   //            <----
   //      ----------->  
   //            <----
   //       ----------->  
   //            <----
   //        ----------->  
   //            <----
   //         ----------->  
   //            <----
   //         ----------->  
   //            <----
   //          ----------->  
   //            <----
   //           ----------->  
   //            <----
   //            ----------->  
   //            <----
   //             ----------->  
   //            <----
   //              ----------->  
   //            <----
   //               ----------->  
   //            <----
   //                -----------> 3' (first primer)
   //       3'   <---- 5' (second primer )
   //            ^
   //            0
   //                ^
   //                pSecondPCRPrimer->nUnpaddedLength_ - 1
   //                           ^
   //                           pSecondPCRPrimer->nUnpaddedLength_ - 1 +
   //                           pFirstPCRPrimer->nUnpaddedLength_ - 1

   // Notice that the 3' end of the first primer moves from
   // position 0 in the coordinates of the reversed 2nd primer to position


   int nMaxScore = -66666;
   int nMax3PrimeEndOfFirstPCRPrimer = -2;
   int nMaxSubOligoEnd = -2;
   bool bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer;

   int nSecondPCRPrimerRight = pSecondPCRPrimer->nUnpaddedLength_ - 1;

   int nSecondPCRPrimerLeft =  0;

   // nFirstPCRPrimerRight and nFirstPCRPrimerLeft are in coordinates
   // of the 2nd PCR primer.  That is, 
   //
   //   xxxxxxxxxx
   //            yyyyyyy   then nFirstPCRPrimerRight is 0 
   //                      and nFirstPCRPrimerLeft -9
   //    xxxxxxxxxx
   //            yyyyyyy   then nFirstPCRPrimerRight is 1
   //                      and nFirstPCRPrimerLeft -8
   //     xxxxxxxxxx
   //            yyyyyyy   then nFirstPCRPrimerRight is 2
   //                      and nFirstPCRPrimerLeft -7
      

   for( int nFirstPCRPrimerRight = 0;
        nFirstPCRPrimerRight <= pSecondPCRPrimer->nUnpaddedLength_ - 1
           + pFirstPCRPrimer->nUnpaddedLength_ - 1;
        ++nFirstPCRPrimerRight ) {
      
      int nFirstPCRPrimerLeft = nFirstPCRPrimerRight -
         pFirstPCRPrimer->nUnpaddedLength_ + 1;


      // The primers are like this:
      //     ---------->
      //  <--------
      // with the 3' ends exposed
      // or like this:
      //   ------------>
      //       <-------------
      // with the 3' ends covered.
      // Our score system is an indication of how much the priming
      // efficiency is inhibited, and the greater the score, the
      // worse things are.
      // Thus we need to separately calculate the inhibition for
      // each primer.  First we do it for the 2nd primer and then
      // for the 1st primer.
      // For the second primer, we calculate the score starting at
      // the 3' end of the second primer--this is the left end.
      // If the 3' end is exposed (not covered by the first primer),
      // that is good--the score is reduced by a mismatch score for
      // every 3' end base that is exposed.  

      int nScoreFrom3PrimeEndOfSecondPrimer = 0;
      if ( nFirstPCRPrimerLeft > 0 ) 
         nScoreFrom3PrimeEndOfSecondPrimer = nFirstPCRPrimerLeft * nMismatch;

      int nOverlapRegionLeft;
      int nOverlapRegionRight;

      assert( bIntersect( nFirstPCRPrimerLeft,
                          nFirstPCRPrimerRight,
                          nSecondPCRPrimerLeft,
                          nSecondPCRPrimerRight,
                          nOverlapRegionLeft,
                          nOverlapRegionRight ) );

      // nFirstPCRPrimerLeft,
      // nFirstPCRPrimerRight,
      // nSecondPCRPrimerLeft,
      // nSecondPCRPrimerRight,
      // are in the 0-based coordinates of the 2nd pcr primer, hence
      // nOverlapRegionLeft,
      // nOverlapRegionRight
      // are, too.


      // Hence nSubOligoEnd are in the coordinate system of the
      // 2nd PCR primer.

      int nSubOligoEnd;
      for( nSubOligoEnd = nOverlapRegionLeft;
           nSubOligoEnd <= nOverlapRegionRight;
           ++nSubOligoEnd ) {

         // convert nSubOligoEnd to the coordinate system of the
         // 1st primer by subtracting the offset nFirstPCRPrimerLeft

         char cFirstPrimerBase = pFirstPCRPrimer->szPrimer_[
              nSubOligoEnd - nFirstPCRPrimerLeft ];

         char cSecondPrimerBase = pSecondPCRPrimer->szPrimer_[ 
              pSecondPCRPrimer->nUnpaddedLength_ - 1 - nSubOligoEnd ];

         nScoreFrom3PrimeEndOfSecondPrimer += 
            nScoreOfPairOfBases_[ cSecondPrimerBase - 'a' ][ cFirstPrimerBase - 'a' ];

         if ( nScoreFrom3PrimeEndOfSecondPrimer > nMaxScore ) {
            nMaxScore = nScoreFrom3PrimeEndOfSecondPrimer;
            nMax3PrimeEndOfFirstPCRPrimer = nFirstPCRPrimerRight;
            nMaxSubOligoEnd = nSubOligoEnd;
            bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer = false;
         }
      }


      // now consider the same pair in the same orientations, but consider
      // suboligos from the 3' end of the 1st primer, hence suboligos
      // from the right of the first primer


      //         ----------->  (first PCR primer)
      //                  <----      (second PCR primer )

      int nScoreFrom3PrimeEndOfFirstPrimer = 0;
      
      if ( nFirstPCRPrimerRight > nSecondPCRPrimerRight  )
         nScoreFrom3PrimeEndOfFirstPrimer = 
            ( nFirstPCRPrimerRight - nSecondPCRPrimerRight ) * nMismatch;

      for( nSubOligoEnd = nOverlapRegionRight;
           nSubOligoEnd >= nOverlapRegionLeft; --nSubOligoEnd ) {

         char cFirstPrimerBase = pFirstPCRPrimer->szPrimer_[
              nSubOligoEnd - nFirstPCRPrimerLeft ];

         char cSecondPrimerBase = pSecondPCRPrimer->szPrimer_[
              pSecondPCRPrimer->nUnpaddedLength_ - 1 - nSubOligoEnd ];
         

         nScoreFrom3PrimeEndOfFirstPrimer += 
            nScoreOfPairOfBases_[ cSecondPrimerBase - 'a' ][ cFirstPrimerBase - 'a' ];


         if ( nScoreFrom3PrimeEndOfFirstPrimer > nMaxScore ) {
            nMaxScore = nScoreFrom3PrimeEndOfFirstPrimer;
            nMax3PrimeEndOfFirstPCRPrimer = nFirstPCRPrimerRight;
            nMaxSubOligoEnd = nSubOligoEnd;
            bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer = true;
         }
      }

   } //    for( int nFirstPCRPrimerRight = 0; ...

//    int nMaxFirstPCRPrimerLeft = nMax3PrimeEndOfFirstPCRPrimer -
//       pFirstPCRPrimer->nUnpaddedLength_ + 1;

//    int nBias = MIN( nMaxFirstPCRPrimerLeft, nSecondPCRPrimerLeft );


//    // print 1st PCR primer

//    int nNumberOfBlanks = nMaxFirstPCRPrimerLeft - nBias;
//    for( int n = 0; n < nNumberOfBlanks; ++n )
//       printf( " " );
   
//    for( n = 0; n < pFirstPCRPrimer->nUnpaddedLength_; ++n )
//       printf( "%c", pFirstPCRPrimer->szPrimer_[n] );
   
//    printf( "\n" );
   
//    nNumberOfBlanks = nSecondPCRPrimerLeft - nBias;
//    for( n = 0; n < nNumberOfBlanks; ++n )
//       printf( " " );
   
//    for( n = 0; n < pSecondPCRPrimer->nUnpaddedLength_; ++n )
//       printf( "%c", 
//               pSecondPCRPrimer->szPrimer_[ pSecondPCRPrimer->nUnpaddedLength_ - 1 - n ] );
   
//    printf( "\n" );

//    nNumberOfBlanks = nMaxSubOligoEnd - nBias;
//    for( n = 0; n < nNumberOfBlanks; ++n )
//       printf( " " );
//    printf( "^\n" );

   
//    printf( "maxscore so far = %d with nMax3PrimeEndOfFirstPCRPrimer = %d and nMaxSubOligoEnd = %d %s\n",
//            nMaxScore,
//            nMax3PrimeEndOfFirstPCRPrimer,
//            nMaxSubOligoEnd,
//            ( bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer ? "from 3' end of first primer" :
//              "from 3' end of second primer" ) );
   //   if ( nMaxScore > pCP->nPrimersMaxPrimerDimerScoreForPCR_ ) {
   //      printf( "%d ", nMaxScore );
   //   }

   if ( nMaxScore > pCP->nPrimersMaxPrimerDimerScoreForPCR_ )
      return( true );
   else
      return( false );
}