/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "contigMatchTablesType.h"
#include    "lookForImperfect3Prime4MerFalsePrimerMatchesOfOnePrimerInOneContig.h"
#include    "contig.h"
#include    "nLengthOf4Mer.h"
#include    "nFindScoreOfMatch.h"
#include    "consedParameters.h"
#include    "whyIsPrimerNotAcceptableTypes.h"
#include    "nConvertFrom4Mer.h"


// We added this to handle the case in which there was a nearly perfect
// false match like this:
//  acgggaaaaaactctta
//  acgggaaaaaactgtta
// This false match would not be found by the routine
// lookForPerfect4MerFalsePrimerMatchesOfOnePrimerInOneContig
// since the latter will only find a false match if it matches perfectly
// in the last 4 bases.  Thus we added this routine, which will find
// such matches.

// This routine looks for perfect 6 base matches.  Of those, it looks
// for those which score above the nPrimersMaxMatchElsewhereScore_
// threshold.  These primers are considered unacceptable and are
// excluded.


void lookForImperfect3Prime4MerFalsePrimerMatchesOfOnePrimerInOneContig( 
                             primerType* pPrimer,
                             Contig* pContigOfTarget,
                             const bool bComplementedContig,
                             const bool bLookEverywhereInContig,
                             const int nUnpaddedConsPosOfTargetStart,
                             const int nUnpaddedConsPosOfTargetEnd,
                             const bool bForwardNotReversePrimer ) {


   contigMatchTablesType* pUnpaddedContig = bComplementedContig ?
      pContigOfTarget->pUnpaddedContigComplemented_ :
      pContigOfTarget->pUnpaddedContig_;

   int nUnpaddedConsPosOfTargetStartPossiblyComplemented;
   int nUnpaddedConsPosOfTargetEndPossiblyComplemented;

   if ( ! bLookEverywhereInContig ) {
      if ( bComplementedContig ) {
         nUnpaddedConsPosOfTargetStartPossiblyComplemented =
            pContigOfTarget->nComplementUnpaddedIndex( nUnpaddedConsPosOfTargetEnd );
         nUnpaddedConsPosOfTargetEndPossiblyComplemented =
            pContigOfTarget->nComplementUnpaddedIndex( nUnpaddedConsPosOfTargetStart );
      }
      else {
         nUnpaddedConsPosOfTargetStartPossiblyComplemented =
            nUnpaddedConsPosOfTargetStart;
         nUnpaddedConsPosOfTargetEndPossiblyComplemented =
            nUnpaddedConsPosOfTargetEnd;
      }
   }

   const int nSix = 6;


   // xxxxxxxxxxxxxxxxxxxxxxx
   //                 ^    ^ ^
   //                 a    b c
   // start here and back up
   // c = szPrimer_ + nUnpaddedLength_
   // so a = szPrimer_ + nUnpaddedLength_ - 7
   
   // I'm not sure that saving any subtotals will help since we are 
   // not even calculating every subtotal--just the ones in which
   // the end has a perfect 4-mer match within the range of consensus
   // positions we are looking for a false match

   for(    char* p6MerLeftEnd = 
      pPrimer->szPrimer_ + pPrimer->nUnpaddedLength_ - 7;
           p6MerLeftEnd >= pPrimer->szPrimer_;
           --p6MerLeftEnd ) {

      // look at last 4 bases of the 6mer

      int n4MerToLookup = nConvertFrom4Mer( p6MerLeftEnd + 2 );
      
      blockOfLocationsOfA4MerType* pBlockOfLocationsOfA4Mer =
         &( pUnpaddedContig->blockOfLocationsOfA4Mer_[ n4MerToLookup ] );

      bool bAgain = true;
      while( bAgain ) {
         for( int nIndexWithinBlock = 0;
              nIndexWithinBlock < pBlockOfLocationsOfA4Mer->nCurrentLength_;
              ++nIndexWithinBlock ) {
            
            int nUnpaddedPosOfMatchingContig4Mer =
               pBlockOfLocationsOfA4Mer->nUnpaddedPosition_[ nIndexWithinBlock ];
            // avoid segmentation faults
            if ( nUnpaddedPosOfMatchingContig4Mer < 3 ) continue;

            char* sz66Bases =
               pUnpaddedContig->szUnpaddedBases_
               + nUnpaddedPosOfMatchingContig4Mer - 3;

            // see diagram below and subtract 1 to change to zero-based
            // so nUnpaddedPosOfMatchingContig4Mer - 3 is 
            // zero-based unpadded of 66 bases


            // check that previous 2 bases match precisely
            // if not, ignore this match since we are only interested
            // in perfect 6 base matches.
            if ( memcmp( p6MerLeftEnd, sz66Bases, 2 ) != 0 ) 
               continue;

            // xxxxxxxxxxxx66mmmmxxxxxxxxxxx
            //               ^ this is nUnpaddedPosOfMatchingContig4Mer
            //             ^ is nUnpaddedPosOfMatchingContig4Mer - 2
            // Distance to leftmost x is p6MerLeftEnd - pPrimer->szPrimer_
            // So unpadded cons pos of leftmost x is
            // nUnpaddedPosOfMatchingContig4Mer - 2 -
            //        ( p6MerLeftEnd - pPrimer->szPrimer_ )
            // and then subtract 1 to get the position in zero based 
            // coordinates:

            int nContigZeroBasedUnpaddedOfFirstBase =
               nUnpaddedPosOfMatchingContig4Mer - 2 - 
               (p6MerLeftEnd - pPrimer->szPrimer_ ) - 1;


            if ( !bLookEverywhereInContig ) {
               if ( ! (
                       ( nUnpaddedConsPosOfTargetStartPossiblyComplemented <=
                         nContigZeroBasedUnpaddedOfFirstBase )
                       &&
                       ( nContigZeroBasedUnpaddedOfFirstBase <=
                         nUnpaddedConsPosOfTargetEndPossiblyComplemented )
                       ) ) {

                  // the match is outside the subclone so it will not 
                  // cause mispriming
                  continue;
               }
            }
      
            // this could happen if, for example, we have 4 perfectly 
            // matching bases right at the beginning of the contig.
            // In such a case, would the primer misprime?  We don't 
            // really know--perhaps we should exclude it.  For now, 
            // I'm ignoring it.

            if ( nContigZeroBasedUnpaddedOfFirstBase < 0 )
               continue;


            int nContigZeroBasedUnpaddedOfLastBaseOfSequenceToCompareToPrimer 
               = nContigZeroBasedUnpaddedOfFirstBase + 
               pPrimer->nUnpaddedLength_ - 1;


            // This check below insures we don't get subscript errors
            // by having a false match to very close to the end
            // of the pUnpaddedContig->szUnpaddedBases_ such that
            // when nFindScoreOfMatch runs, it runs off the end of
            // pUnpaddedContig->szUnpaddedBases_ (szFalseMatch is
            // just a pointer to within this structure )

            // pUnpaddedContig->nUnpaddedBases_ - 1 is the index in
            // unpadded zero-based coordinates of the last base in
            // pUnpaddedContig->szUnpaddedBases

            if ( nContigZeroBasedUnpaddedOfLastBaseOfSequenceToCompareToPrimer
                 >
                 ( pUnpaddedContig->nUnpaddedBases_ - 1  ) ) {
//                printf("ran off end since nContigZeroBasedUnpaddedOfFirstBase = %d and pUnpaddedContig->nUnpaddedBases_ = %d and pPrimer->nUnpaddedLength_ = %d\n",
//                       nContigZeroBasedUnpaddedOfFirstBase,
//                       pUnpaddedContig->nUnpaddedBases_,
//                       pPrimer->nUnpaddedLength_ );
                      
               continue;

            }

            
            if ( bForwardNotReversePrimer &&
                 !bComplementedContig &&
                 ( pContigOfTarget == pPrimer->pContig_ ) &&
                 ( nContigZeroBasedUnpaddedOfFirstBase ==
                   ( pPrimer->nUnpaddedStart_ - 1 ) )
                 )
               continue;  // primer is the same as the target

            if ( !bForwardNotReversePrimer &&
                 bComplementedContig &&
                 ( pContigOfTarget == pPrimer->pContig_ ) ) {

               int nComplementedStartOfPrimer =
                  pPrimer->pContig_->nComplementUnpaddedIndex( 
                           pPrimer->nUnpaddedEnd_ );

               if ( nContigZeroBasedUnpaddedOfFirstBase ==
                    nComplementedStartOfPrimer - 1 )
                  continue; // primer is the same as the target
            }


            char* szFalseMatch =
               pUnpaddedContig->szUnpaddedBases_
               + nContigZeroBasedUnpaddedOfFirstBase;

            int nPosOfMaxCumulativeScore;

            int nScoreOfFalseMatch = nFindScoreOfMatch(
                                                  pPrimer->nUnpaddedLength_,
                                                  szFalseMatch,
                                                  nPosOfMaxCumulativeScore );

            // at least the last 4 bases must match--bullshit
            // As a matter of fact, in general the last 4 bases 
            // will not match--that is the purpose of this routine.


            if ( nScoreOfFalseMatch > pPrimer->nScoreOfStickiestFalseMatch_ ) {
               pPrimer->nScoreOfStickiestFalseMatch_ = nScoreOfFalseMatch;
               pPrimer->pContigOfStickiestFalseMatch_ = pContigOfTarget;
               pPrimer->nUnpaddedOfStickiestFalseMatch_ =
                  nContigZeroBasedUnpaddedOfFirstBase + 1;
               pPrimer->bContigOfStickiestFalseMatchIsComplemented_ =
                  bComplementedContig;

               if (pPrimer->nScoreOfStickiestFalseMatch_ >
                   pCP->nPrimersMaxMatchElsewhereScoreToUse_ ) {

                  pPrimer->bAcceptable_ = false;
                  pPrimer->nWhyIsPrimerNotAcceptable_ = BAD_PRIMER_STICKS_TO_SOMEWHERE_ELSE;
                  // this saves checking for other false matches
                  // Note that they may be worse, but this one is bad
                  // enough so don't even show it.
                  return;
               }
            }
         } //       for( int nIndexWithinBlock = 0; 

            
         if ( pBlockOfLocationsOfA4Mer->pNextBlockOfLocationsOfA4Mer_ )
            pBlockOfLocationsOfA4Mer =
               pBlockOfLocationsOfA4Mer->pNextBlockOfLocationsOfA4Mer_;
         else
            bAgain = false;
      } // while( bAgain )
   } //    for(    char* p6MerLeftEnd = 
}