/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "pickCombinationsOfPCRPrimers.h"
#include    "primerType.h"
#include    "contig.h"
#include    "assembly.h"
#include    "consed.h"
#include    "fileDefines.h"
#include    "consedParameters.h"
#include    "bIsPCRPrimerPairOK.h"
#include    "exclusive_or.h"
#include    "oligoTag.h"
#include    "pickPCRPrimersForOneEndOfContig.h"
#include    "regionToAmplify.h"



void pickCombinationsOfPCRPrimers :: suggestPCRIfNecessaryForClosingGaps(
      RWTPtrOrderedVector<Contig>* pContigEndsForMultiplexPCR,
      RWTValOrderedVector<unsigned char>* pContigEndsForMultiplexPCRWhichEnd )

{


   Assembly* pAssembly = ConsEd::pGetAssembly();


   // put these contig ends into the class used by pickCombinationsOfPCRPrimers

   assert( pContigEndsForMultiplexPCR->length() ==
           pContigEndsForMultiplexPCRWhichEnd->length() );

   int nContig;
   for( nContig = 0; nContig < pContigEndsForMultiplexPCR->length();
        ++nContig ) {
      Contig* pContig = (*pContigEndsForMultiplexPCR)[ nContig ];
      int nWhichEnd = (*pContigEndsForMultiplexPCRWhichEnd)[ nContig ];

      contigEndd* pCE = new contigEndd();
      pCE->pContig_ = pContig;
      pCE->nWhichEnd_ = nWhichEnd;
      pCE->bOtherEndIsCloneEnd_ =
         pContig->bIsThisEndTheEndOfTheClone( nOppositeGap( nWhichEnd ) );

      aContigEnds_.insert( pCE );
   }

   // now let's find pcr primers for each contig end
   // If there is some contigEnd for which Consed cannot find any primers,
   // skip it and consider pcr amongst the remaining contigEnds.

   int nEnd;
   for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) {
      contigEndd* pCE = aContigEnds_[ nEnd ];

      cerr << "picking pcr primer candidates for " <<
         szWhichGap( pCE->nWhichEnd_ ) <<
         " of " << pCE->pContig_->soGetName() << "..." <<
         endl;

      pickPCRPrimersForOneEndOfContig(
                      pCE->pPrimerArray_,
                      pCE->nPrimersInPrimerArray_,
                      pCE->pContig_,
                      pCE->nWhichEnd_,
                      pCE->nAcceptablePrimers_ );

   }

   // remove any contig-ends for which we could find no primers:

   for( nEnd = aContigEnds_.length() - 1; nEnd >= 0; --nEnd ) {
      contigEndd* pCE = aContigEnds_[ nEnd ];

      if ( pCE->nAcceptablePrimers_ == 0 ) {
         // there are no acceptable primers for this contig end
         // so just don't try to do pcr off this contig end

         fprintf( pAO, "could find no pcr primers for %s of %s so excluding this contig end from pcr\n",
                  szWhichGap( pCE->nWhichEnd_ ),
                  pCE->pContig_->soGetName().data() );

         aContigEnds_.removeAt( nEnd );
      }
   }


   // now we need a list of *pairs* of ends, since not every end can
   // go with every other end.  For example, we are not interested
   // in doing pcr between the 2 ends of the same contig.  Nor are we
   // interested in doing pcr between these contigs:
   //   (clone end)----------   ------        --------(clone end)
   //               contigA     contigB        contigC


   aPairsOfEnds_.resize( aContigEnds_.length() * aContigEnds_.length() + 1 );

   // fixed a very interesting bug:  if aContigEnds_.length() == 0, then
   // aContigEnds_.length() - 1 == 4294967296 (or something like that)
   // since this is unsigned and this loop runs for hours

   for( int nEnd1 = 0; nEnd1 < ( (int) aContigEnds_.length() - 1 ); ++nEnd1 ) {

      for( int nEnd2 = nEnd1 + 1; nEnd2 < aContigEnds_.length(); ++nEnd2 ) {
         contigEndd* pCE1 = aContigEnds_[ nEnd1 ];
         contigEndd* pCE2 = aContigEnds_[ nEnd2 ];

         if ( pCE1->pContig_ == pCE2->pContig_ )
            continue;

         // case in which there are more than 2 contigs so we don't
         // want to do pcr between the clone ends
         if ( pCE1->bOtherEndIsCloneEnd_ && pCE2->bOtherEndIsCloneEnd_ &&
              ( aContigEnds_.length() > 2 ) )
            continue;

         // Consider such a case:
         // -----   ------   ------- contigs
         //   A        B        C
         //     ----      ----       (subclones fwd/rev pairs linking)

         // in such a case, we would not want to do pcr between the
         // left end of contig A and the right end of contig C

         if ( pCE1->pContig_->bPlacedInAScaffold_ &&
              pCE2->pContig_->bPlacedInAScaffold_ ) {
            
            
            if ( pAssembly->nGetScaffoldForContig( pCE1->pContig_ )
                 ==
                 pAssembly->nGetScaffoldForContig( pCE2->pContig_ )
                 ) {
                    // they are in the same scaffold (presumably the
                    // ends of the scaffold) so do not make a pair out of them

                    fprintf( pAO, 
                             "contig-ends %s of %s and %s of %s are in the same scaffold so not considering pcr for them\n",
                             szWhichGap( pCE1->nWhichEnd_ ),
                             pCE1->pContig_->soGetName().data(),
                             szWhichGap( pCE2->nWhichEnd_ ),
                             pCE2->pContig_->soGetName().data() );
                    continue;
            }
         }
         




         pairOfEnds* pPOE = new pairOfEnds();
         pPOE->nEnd1_ = nEnd1;
         pPOE->nEnd2_ = nEnd2;

         aPairsOfEnds_.insert( pPOE );

      } //  for( int nEnd2 = nEn...
   } //  for( int nEnd1 = 0; nEn...

   if ( aPairsOfEnds_.length() == 0 ) {
      fprintf( pAO, "There were no pairs of ends available for pcr.\n" );
      return;
   }

   if ( aPairsOfEnds_.length() >= pCP->nAutoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions_ ) {
      fprintf( pAO, "Autofinish could have done %d unoriented pcr reactions, but\nconsed.autoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions: %d\nso not doing any\n",
               aPairsOfEnds_.length(),
               pCP->nAutoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions_ );

      fprintf( pAO, "In case you are wondering, Autofinish was considering the following unoriented pcr reactions:\n" );
      for( int nPair = 0; nPair < aPairsOfEnds_.length(); ++nPair ) {
         pairOfEnds* pPOE = aPairsOfEnds_[ nPair ];
         contigEndd* pCE1 = aContigEnds_[ pPOE->nEnd1_ ];
         contigEndd* pCE2 = aContigEnds_[ pPOE->nEnd2_ ];
         fprintf( pAO, "    between %s of %s and %s of %s\n",
                  szWhichGap( pCE1->nWhichEnd_ ),
                  pCE1->pContig_->soGetName().data(),
                  szWhichGap( pCE2->nWhichEnd_ ),
                  pCE2->pContig_->soGetName().data() );
      }

      return;
   }


   if ( aPairsOfEnds_.length() == 1 ) {
      pairOfEnds* pPairOfEnds = aPairsOfEnds_[ 0 ];
      contigEndd* pContigEndA = aContigEnds_[ pPairOfEnds->nEnd1_ ];
      contigEndd* pContigEndB = aContigEnds_[ pPairOfEnds->nEnd2_ ];

      fprintf( pAO, "contig ends were unoriented, but only 2 of them so using method of finding pcr primers for oriented pair of contigs:\n" );

      regionToAmplify regionGap;
      regionGap.autoFinishPCRToCloseAGap( pContigEndA->pContig_,
                                          pContigEndA->nWhichEnd_,
                                          pContigEndB->pContig_,
                                          pContigEndB->nWhichEnd_ );
                                          
      return;
   }



   // try combinations of primers

   for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) {
      aContigEnds_[ nEnd ]->nCurrentlyTryingThisPrimer_ = 0;
   }


   bool bFoundSetOfPrimers;
   try {
      bFoundSetOfPrimers =
         bConsiderPrimerForOneContigEndWithPrecedingContigEnds( 0 );
   }
   catch(InputDataError ide ) {
      fprintf( pAO, "could not find compatible set of pcr primers\n" );
      fprintf( pAO, "%s\n", ide.szGetDesc() );
      return;
   }


   if ( !bFoundSetOfPrimers ) {
      fprintf( pAO, "could not find an acceptable combination of primers that works for all contigs ends at once.\n" );
   }
   else {
      fprintf( pAO, "Please make the following PCR primers:\n" );
      int nEnd;
      for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) {
         contigEndd* pCE = aContigEnds_[ nEnd ];
         primerType* pPrimer =
            pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_;

         pCE->soOligoNameOfChosenPrimer_ = pAssembly->soGetNextOligoName();

         fprintf( pAO, "   %s: ",  pCE->soOligoNameOfChosenPrimer_.data() );
         for( int nBase = 0; nBase < pPrimer->nUnpaddedLength_; ++nBase ) {
            putc( pPrimer->szPrimer_[ nBase ], pAO );
         }

         fprintf( pAO, " %d to %d (%s) for %s of %s melt: %5.1f\n",
                  pPrimer->nUnpaddedStart_,
                  pPrimer->nUnpaddedEnd_,
                  ( pPrimer->bTopStrandNotBottomStrand_ ? "top strand" : "bottom strand" ),
                  szWhichGap( pCE->nWhichEnd_ ),
                  pPrimer->pContig_->soGetName().data(),
                  pPrimer->dMeltingTemperature_ );

      }

      // now list combinations of primers

      RWCString soPCRInstructions( (size_t) 3000 );
      soPCRInstructions += "Do PCR reactions with the following pairs of primers:\n";

      for( int nPair = 0; nPair < aPairsOfEnds_.length(); ++nPair ) {
         pairOfEnds* pPOE = aPairsOfEnds_[ nPair ];

         contigEndd* pCE1 = aContigEnds_[ pPOE->nEnd1_ ];
         contigEndd* pCE2 = aContigEnds_[ pPOE->nEnd2_ ];

         primerType* pPrimer1 =
            pCE1->pPrimerArray_ + pCE1->nCurrentlyTryingThisPrimer_;

         primerType* pPrimer2 =
            pCE2->pPrimerArray_ + pCE2->nCurrentlyTryingThisPrimer_;

         soPCRInstructions += "   ";
         soPCRInstructions += pCE1->soOligoNameOfChosenPrimer_;
         soPCRInstructions += ": ";

         int nBase;
         for( nBase = 0; nBase < pPrimer1->nUnpaddedLength_; ++nBase ) {
            soPCRInstructions.append( pPrimer1->szPrimer_[ nBase ] );
         }

         soPCRInstructions += "   ";
         soPCRInstructions += pCE2->soOligoNameOfChosenPrimer_;
         soPCRInstructions += ": ";
         for( nBase = 0; nBase < pPrimer2->nUnpaddedLength_; ++nBase ) {
            soPCRInstructions.append( pPrimer2->szPrimer_[ nBase ] );
         }
         soPCRInstructions.append( '\n' );
      } //  for( int nPair = 0; nPair ...

      fprintf( pAO, "%s", soPCRInstructions.data() );

      if ( pCP->bWillDoExperiments_ &&
           pCP->bAutoFinishTagOligosWhenDoExperiments_ ) {
         // need to create oligo tags for all these

         for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) {
            contigEndd* pCE = aContigEnds_[ nEnd ];
            primerType* pPrimer =
               pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_;

            bool bComplementedWithRespectToWayPhrapCreatedContig =
               exclusive_or(
                  pPrimer->pContig_->bComplementedFromWayPhrapCreated_,
                  !pPrimer->bTopStrandNotBottomStrand_ );

            oligoTag* pOligoTag = new oligoTag(
                  pPrimer,
                  true, // clone template, not subclone template
                  bComplementedWithRespectToWayPhrapCreatedContig,
                  666, // primer id--just prevent ctor from creating new name
                  soPCRInstructions );

            // fill in real name
            pOligoTag->soName_ = pCE->soOligoNameOfChosenPrimer_;

            pAssembly->pChosenPrimersForAutofinishPCR_->insert( pOligoTag );
         }
      } //  if ( pCP->bWillDoExperiments_ && ...
   } //  if ( !bFoundSetOfPrimers ) { else ...
}



// recursive
bool pickCombinationsOfPCRPrimers :: bConsiderPrimerForOneContigEndWithPrecedingContigEnds( const int nContigEnd ) {

   // try to find a primer on contig end nContigEnd that is compatible with
   // set of primers already chosen.  If cannot find *any*, then return false.
   // If we *can* find such a primer, then set pCE->nCurrentlyTryingThisPrimer_


   contigEndd* pCE = aContigEnds_[ nContigEnd ];

   for( int nPrimer = 0; nPrimer < pCE->nPrimersInPrimerArray_; ++nPrimer ) {
      primerType* pPrimer = pCE->pPrimerArray_ + nPrimer;
      if ( !pPrimer->bAcceptable_ ) continue;

      if ( bIsThisPrimerCompatibleWithSetOfPrimersChosenSoFar( pPrimer, nContigEnd ) ) {


         pCE->nCurrentlyTryingThisPrimer_ = nPrimer;

         // great!  Now consider the next contigEnd and see if we
         // can find a primer that works with the set we've found
         // so far.

         // check if we already have considered all contig ends
         if ( nContigEnd == ( aContigEnds_.length() - 1 ) )
            return( true );

         else if (
            bConsiderPrimerForOneContigEndWithPrecedingContigEnds( nContigEnd + 1 ) )
            return( true );

         // if bConsiderPrimerForOneContigEndWithPrecedingContigEnds(
         // nContigEnd + 1 ) returns false, that means that there was
         // no full set of primers that worked with the ones already
         // chosen, including the nPrimer in this routine.  Thus we
         // must try ++nPrimer and see if we can make it all work with
         // that primer.
      }
   }

   // if reached here, then we were unable to find a set of primers that
   // included the ones chosen so far.  Thus we should return false so
   // that the calling program can increment nPrimer and thus try a different
   // set

   return( false );
}




bool pickCombinationsOfPCRPrimers :: bIsThisPrimerCompatibleWithSetOfPrimersChosenSoFar(
        primerType* pPrimerToTry, const int nContigEndOfCurrentPrimer ) {

   // note that if nContigEndOfCurrentPrimer == 0, this returns true

   for( int nContigEnd = 0; nContigEnd < nContigEndOfCurrentPrimer;
        ++nContigEnd ) {

      ++dNumberOfPrimerPairsCompared_;
      if ( dNumberOfPrimerPairsCompared_ > pCP->dAutoFinishDoNotComparePCRPrimersMoreThanThisManyTimes_ ) {
         THROW_ERROR( "could not find a compatible set of PCR primers after trying consed.autoFinishDoNotComparePCRPrimersMoreThanThisManyTimes" );
      }

      contigEndd* pCE = aContigEnds_[ nContigEnd ];
      primerType* pAlreadyChosenPrimer =
         pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_;

      bool bTempDiffTooGreat = false;
      bool bPrimersStickToEachOther = false;

      if ( !bIsPCRPrimerPairOK( pAlreadyChosenPrimer,
                                pPrimerToTry,
                                bTempDiffTooGreat,
                                bPrimersStickToEachOther ) ) {
         return( false );
      }
   }

   return( true );
}