/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "sysdepend.h"
#include    "highQualityDiscrepancyGotoList.h"
#include    "quality.h"
#include    "contig.h"
#include    "locatedFragment.h"
#include    "consedParameters.h"
#include    "assert.h"


static char* szGotoItemDesc = "high quality base disagrees with consensus";


// ctor reads through consensus of passed contig, adds a 
// gotoItem to list each time it encounters a region where
// quality is <= qualityLow (as defined by quality.h)
highQualityDiscrepancyGotoList :: highQualityDiscrepancyGotoList(
                                           Contig* pContig,
                                           const bool bExcludeCompressionOrG_dropoutTags,
                                           const bool bExcludeSpuriousPads )
{

   Contig* pOldContig = NULL;
   LocatedFragment* pOldLocFrag = NULL;
   int nOldConsPos = 0;
   gotoItem* pOldGotoItem = NULL;




   int nQualityThreshold = 
      consedParameters::pGetConsedParameters()->nQualityThresholdForFindingHighQualityDiscrepancies_;

   int nNumFrags = pContig->nGetNumberOfFragsInContig();
   for (int nFrag = 0; nFrag < nNumFrags; nFrag++) {

      // get pointer to this located frag from contig
      LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag);
      for( int nConsPos = pLocFrag->nGetAlignStart(); 
               nConsPos <= pLocFrag->nGetAlignEnd();
               ++nConsPos ) {
         
         if ( (pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >= 
              nQualityThreshold) && 
              (pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() !=
              ucQualityLowEdited) ) {


            if (
                (
                 pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() !=
                pContig->cGetConsensusBase( nConsPos )
                 )
                // Phil said to allow pads as highQualityDiscrepancies
                // on Nov 26, 1997
                //                &&
                //                !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() 

                ) {

               // Phil decided Dec 27, 1997 to only show high quality
               // discrepancies if in the aligned part of the read
               if ( !pLocFrag->bIsInAlignedPartOfRead( nConsPos ) )
                  continue;


               if ( nConsPos <
                    ( pLocFrag->nGetAlignClipStart() +
consedParameters::pGetConsedParameters()->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) )
                  continue;

               if ( ( pLocFrag->nGetAlignClipEnd() -
consedParameters::pGetConsedParameters()->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) 
                    < nConsPos )
                  continue;
               
               if ( bExcludeCompressionOrG_dropoutTags ) {
                  if ( pLocFrag->bIsWithinACompressionOrG_dropoutTag( nConsPos ) ) 
                     continue;
               }

               if ( bExcludeSpuriousPads ) {
                  if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase()
                       == '*' ) {
                     // the high quality discrepancy is a pad.  Let's see
                     // what the consensus has here and if it is a repeated base.
                     // If it is a possible misassembly, then show it in the
                     // hqd list.  If it is a spurious hqd pad, do not show 
                     // it.

                     if ( !pContig->bDoesPadIndicateAPossibleMisassemblyHere(
                                       pLocFrag,
                                       nConsPos ) ) {
                        // these are the hqd's that would be eliminated from
                        // the hqd list

                        continue;

                     }
                  }
               }

               if ( (pOldContig == pContig) &&
                    (pOldLocFrag == pLocFrag) &&
                    ( ( nOldConsPos + 1) == nConsPos ) ) {

                  pOldGotoItem->setNewEnd( nConsPos,
                                           pContig->nUnpaddedIndex( nConsPos ) );
                  nOldConsPos = nConsPos;

               }
               else {

                  gotoItem* pGotoItem = 
                     new gotoItem(
                           pContig,
                           pLocFrag,
                           nConsPos,  // start padded
                           nConsPos,  // end padded
                           pContig->nUnpaddedIndex( nConsPos ),// start unpadded
                           pContig->nUnpaddedIndex( nConsPos ), // end unpadded
                           szGotoItemDesc,
                           true, // bPrefixContigToDescription
                           NULL ); // pOtherData
                

                  addToList(pGotoItem);  

                  pOldGotoItem = pGotoItem;
                  pOldContig = pContig;
                  pOldLocFrag = pLocFrag;
                  nOldConsPos = nConsPos;

               }
            }
         }
      }
   } 

   sortByPosition();

}  // highQualityDiscrepancyGotoList