/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "removeContigs.h" #include "consed.h" #include "locatedFragment.h" #include "contig.h" #include "automatedConsedInit.h" #include "soLine.h" #include "guiRemoveReads.h" static int nCompareContigsByPointer( Contig** ppContig1, Contig** ppContig2 ) { if ( *ppContig1 < *ppContig2 ) return( -1 ); else if ( *ppContig1 == *ppContig2 ) { return( 0 ); } else { return( 1 ); } } void removeContigs :: doIt() { automatedConsedInit( filAceFileToOpen_ ); guiRemoveReads myGuiRemoveReads; readFileOfContigsToRemove( myGuiRemoveReads.aReadsToRemove_ ); RWCString soMessage; myGuiRemoveReads.doItNoGui( pCP->bRemoveReadsDeleteNotJustPutInOwnContig_, soMessage ); cerr << soMessage << endl; ConsEd::pGetAssembly()->saveAssemblyToUserSpecifiedOrNextAvailableVersionOfAceFile( filNewAceFile_ ); } void removeContigs :: readFileOfContigsToRemove( RWTPtrOrderedVector& aReadsToRemove ) { FILE* pContigsToRemove = fopen( filContigsToBeRemoved_.data(), "r" ); if ( !pContigsToRemove ) { RWCString soMessage = "file of contigs to be removed: " + filContigsToBeRemoved_; THROW_FILE_ERROR( soMessage ); } Assembly* pAssembly = ConsEd::pGetAssembly(); int nNumberOfContigs = 0; while( fgets( soLine.data(), nMaxLineSize, pContigsToRemove ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); // there might be leading and/or trailing whitespace soLine.stripAllWhitespaceExceptInternal(); // see if we can find this contig Contig* pContig = pAssembly->pGetContigByName( soLine ); if ( !pContig ) { cerr << "couldn't find contig " << soLine << endl; continue; } ++nNumberOfContigs; } RWTPtrOrderedVector aContigs( (size_t) nNumberOfContigs ); rewind( pContigsToRemove ); while( fgets( soLine.data(), nMaxLineSize, pContigsToRemove ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); // there might be leading and/or trailing whitespace soLine.stripAllWhitespaceExceptInternal(); // see if we can find this contig Contig* pContig = pAssembly->pGetContigByName( soLine ); if ( !pContig ) { cerr << "couldn't find contig " << soLine << endl; continue; } aContigs.insert( pContig ); } fclose( pContigsToRemove ); // remove any duplicates removeDuplicatedContigs( aContigs ); // now add reads int nNumberOfReads = 0; int nContig; for( nContig = 0; nContig < aContigs.length(); ++nContig ) { Contig* pContig = aContigs[ nContig ]; nNumberOfReads += pContig->nGetNumberOfReads(); } aReadsToRemove.resize( (size_t) nNumberOfReads ); for( nContig = 0; nContig < aContigs.length(); ++nContig ) { Contig* pContig = aContigs[ nContig ]; for( int nRead = 0; nRead < pContig->nGetNumberOfReads(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pGetLocatedFragment( nRead ); aReadsToRemove.insert( pLocFrag ); } } } void removeContigs :: removeDuplicatedContigs( RWTPtrOrderedVector& aContigs ) { void* pArray = aContigs.data(); size_t nNumberOfElements = aContigs.length(); size_t nSizeOfAnElement = sizeof( Contig* ); qsort( pArray, nNumberOfElements, nSizeOfAnElement, ( int(*) ( const void*, const void*) ) nCompareContigsByPointer ); // check they are in order bool bInOrder = true; int nContig; for( nContig = 1; nContig < aContigs.length(); ++nContig ) { if ( aContigs[ nContig - 1 ] > aContigs[ nContig ] ) { bInOrder = false; cerr << "out of order " << nContig - 1 << " and " << nContig << endl; } } if ( !bInOrder) { THROW_ERROR( "qsort failed" ); } // remove any duplicates from the contigs for( nContig = aContigs.length() - 2; nContig >= 0; --nContig ) { Contig* pContigA = aContigs[ nContig ]; Contig* pContigB = aContigs[ nContig + 1 ]; if ( pContigA == pContigB ) { aContigs.removeAt( nContig + 1 ); } } }