/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "removeContigs.h"
#include    "consed.h"
#include    "locatedFragment.h"
#include    "contig.h"
#include    "automatedConsedInit.h"
#include    "soLine.h"
#include    "guiRemoveReads.h"


static int nCompareContigsByPointer( Contig** ppContig1,
                                     Contig** ppContig2 ) {

   if ( *ppContig1 < 
        *ppContig2 )
      return( -1 );
   else if ( *ppContig1 ==
             *ppContig2 ) {
      
      return( 0 );
   }
   else {
      return( 1 );
   }
}



void removeContigs :: doIt() {

   automatedConsedInit( filAceFileToOpen_ );

   guiRemoveReads myGuiRemoveReads;

   readFileOfContigsToRemove( myGuiRemoveReads.aReadsToRemove_ );

   RWCString soMessage;
   myGuiRemoveReads.doItNoGui( pCP->bRemoveReadsDeleteNotJustPutInOwnContig_,
                               soMessage );

   cerr << soMessage << endl;

   ConsEd::pGetAssembly()->saveAssemblyToUserSpecifiedOrNextAvailableVersionOfAceFile( filNewAceFile_ );
}


void removeContigs :: readFileOfContigsToRemove(
             RWTPtrOrderedVector<LocatedFragment>& aReadsToRemove ) {

   FILE* pContigsToRemove = fopen( filContigsToBeRemoved_.data(), "r" );
   if ( !pContigsToRemove ) {
      RWCString soMessage = "file of contigs to be removed: " +
         filContigsToBeRemoved_;
      THROW_FILE_ERROR( soMessage );
   }

   Assembly* pAssembly = ConsEd::pGetAssembly();

   int nNumberOfContigs = 0;
   while( fgets( soLine.data(), nMaxLineSize, pContigsToRemove ) != NULL ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      // there might be leading and/or trailing whitespace
      soLine.stripAllWhitespaceExceptInternal();

      // see if we can find this contig

      Contig* pContig = pAssembly->pGetContigByName( soLine );

      if ( !pContig ) {
         cerr << "couldn't find contig " << soLine << endl;
         continue;
      }

      ++nNumberOfContigs;
   }

   RWTPtrOrderedVector<Contig> aContigs( (size_t) nNumberOfContigs );

   rewind( pContigsToRemove );

   while( fgets( soLine.data(), nMaxLineSize, pContigsToRemove ) != NULL ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      // there might be leading and/or trailing whitespace
      soLine.stripAllWhitespaceExceptInternal();

      // see if we can find this contig

      Contig* pContig = pAssembly->pGetContigByName( soLine );

      if ( !pContig ) {
         cerr << "couldn't find contig " << soLine << endl;
         continue;
      }

      aContigs.insert( pContig );
   }
   
   fclose( pContigsToRemove );

   // remove any duplicates

   removeDuplicatedContigs( aContigs );


   // now add reads

   int nNumberOfReads = 0;
   int nContig;
   for( nContig = 0; nContig < aContigs.length(); ++nContig ) {
      Contig* pContig = aContigs[ nContig ];
      nNumberOfReads += pContig->nGetNumberOfReads();
   }

   aReadsToRemove.resize( (size_t) nNumberOfReads );


   for( nContig = 0; nContig < aContigs.length(); ++nContig ) {
      Contig* pContig = aContigs[ nContig ];

      for( int nRead = 0; nRead < pContig->nGetNumberOfReads(); ++nRead ) {
         LocatedFragment* pLocFrag = pContig->pGetLocatedFragment( nRead );

         aReadsToRemove.insert( pLocFrag );
      }
   }
}




void removeContigs :: removeDuplicatedContigs( RWTPtrOrderedVector<Contig>& aContigs ) {
   
   
   void* pArray = aContigs.data();
   size_t nNumberOfElements = aContigs.length();
   size_t nSizeOfAnElement = sizeof( Contig* );

   qsort( pArray, nNumberOfElements, nSizeOfAnElement,
          ( int(*) ( const void*, const void*) ) nCompareContigsByPointer );

   // check they are in order

   bool bInOrder = true;
   int nContig;
   for( nContig = 1; nContig < aContigs.length(); ++nContig ) {
      if ( aContigs[ nContig - 1 ] > aContigs[ nContig ] ) {
         bInOrder = false;
         cerr << "out of order " << nContig - 1 << " and " << nContig << endl;
      }
   }

   if ( !bInOrder) {
      THROW_ERROR( "qsort failed" );
   }

   // remove any duplicates from the contigs

   for( nContig = aContigs.length() - 2; nContig >= 0; --nContig ) {
      Contig* pContigA = aContigs[ nContig ];
      Contig* pContigB = aContigs[ nContig + 1 ];

      if ( pContigA == pContigB ) {
         aContigs.removeAt( nContig + 1 );
      }
   }
   
}