/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "testReadPhdBall.h"
#include    <time.h>
#include    "mbt_exception.h"
#include    "soLine.h"
#include    "readsSortedByReadName.h"
#include    <math.h>
#include    "locatedFragment.h"
#include    <unistd.h>
#include    "findQueryWithinSubject.h"


void testReadPhdBall :: doIt() {



   RWCString soQuery = "ttcaaagcaccat";
   //   RWCString soSubject = "ttcaaggcaccat";
   RWCString soSubject = "ttcaaagcacttcacaatgggtcccacacacctcctaattcaaggcaccattttcagagtccaacccccagcgggctgcatcctgccctggggcccactaagtagctacttcaaagcaccat";


   bool bFoundMatch = false;
   int n0SubjectStart;
   int n0SubjectEnd;
   int nScore;

   findQueryWithinSubject( soQuery,
                           soSubject,
                           3, // gap
                           1, // match
                           2, // mismatch
                           5, // minscore
                           10, // max indels
                           bFoundMatch,
                           n0SubjectStart,
                           n0SubjectEnd,
                           nScore );
                           


   cerr << "bFoundMatch = " << szPrintBool( bFoundMatch ) <<
      " n0SubjectStart = " << n0SubjectStart << " n0SubjectEnd = " <<
      n0SubjectEnd << " nScore = " << nScore << endl;


   _exit( 0 );

   readReadList();


   time_t timeStart = time( NULL );

   pPhdBall_ = fopen( filPhdBall_.data(), "r" );
   if ( !pPhdBall_ ) {
      THROW_FILE_ERROR( filPhdBall_ );
   }

   nLine_ = 0;

   justReadForBeginSequence();

   fclose( pPhdBall_ );

   time_t timeEnd = time( NULL );

   double dTotalTime = difftime( timeEnd, timeStart );

   cerr << "time: " << dTotalTime << endl;
}



void testReadPhdBall :: justReadForBeginSequence() {

   int nNumberOfReadsRead = 0;
   int nNumberOfReadsFound = 0;

   while( fgets( soLine.data(), nMaxLineSize, pPhdBall_ ) != NULL ) {

      if ( memcmp( soLine.data(), "BEGIN_SEQUENCE", 14 ) == 0 ) {
         soLine.nCurrentLength_ = strlen( soLine.data() );

         size_t nPos = 15;

         RWCString soReadName = soLine.soGetNextToken( nPos );
         ++nNumberOfReadsRead;

         if ( aReadList_.bContains( soReadName ) ) 
            ++nNumberOfReadsFound;

//          if ( nNumberOfReads % 100 == 0 ) {
//             cout << soReadName << endl;
//          }

      }
   }

   cerr << "read read: " << nNumberOfReadsRead << " found: " <<
      nNumberOfReadsFound << endl;
}



void testReadPhdBall :: readReadList() {

   FileName filReadList = "read_list.txt";
   
   FILE* pReadList = fopen( filReadList.data(), "r" );
   if ( !pReadList ) {
      THROW_FILE_ERROR( filReadList );
   }

   while( fgets( soLine.data(), nMaxLineSize, pReadList ) != NULL ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );
      soLine.stripTrailingWhitespaceFast();

      aReadList_.insert( soLine );
   }

   fclose( pReadList );

   aReadList_.resort();

   cerr << "done sorting read list" << endl;
}



void testReadPhdBall :: timeSort() {

   const int nNumberOfReads = 10000000;
   readsSortedByReadName aManyReads( (size_t) nNumberOfReads );

   RWCString soReadName( (size_t) 10 );
   for( int nRead = 0; nRead < nNumberOfReads; ++nRead ) {

      int nRandom = rand();

      soReadName.nCurrentLength_ = 
         sprintf( soReadName.data(), "fixed%.4d_%d", nRandom, nRead );

      LocatedFragment* pLocFrag = new LocatedFragment( soReadName,
                                                       0,
                                                       false,
                                                       NULL );

      if ( nRead % 1000000 == 0 ) {
         cerr << "created " << nRead << " reads " << endl;
         cerr.flush();
      }

      aManyReads.insert( pLocFrag );
   }

   cerr << "about to sort" << endl;
   cerr.flush();


   time_t timeStart = time( NULL );

   aManyReads.resort();

   time_t timeEnd = time( NULL );

   double dTotalTime = difftime( timeEnd, timeStart );

   cerr << "time: " << dTotalTime << endl;
   cerr.flush();
}