/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #ifndef addNewReads_included #define addNewReads_included #include "bool.h" #include "filename.h" #include using namespace std; #include "rwtptrorderedvector.h" #include "rwtvalorderedvector.h" #include "readsSortedByReadName.h" #include "mbtValOrderedVectorOfRWCString.h" #include "mbtPtrOrderedVector.h" #include "filename.h" #define SZLINE_SIZE 500 class Contig; class LocatedFragment; class crossMatchInfoForRead; class addNewReadsWithExistingAlignments; class regionOfSequence; class regionOfSequenceArray; class contigEndReadList; class contigInterval { public: Contig* pContig_; int nConsPosLeft_; int nConsPosRight_; bool bInUse_; public: bool operator==( const contigInterval& CI ) const { return( this == &CI ); } contigInterval( Contig* pContig, const int nConsPosLeft, const int nConsPosRight ) : pContig_( pContig ), nConsPosLeft_( nConsPosLeft ), nConsPosRight_( nConsPosRight ), bInUse_( true ) {} }; #define INITIALIZE_ADDNEWREADS \ nLastSizeOfCrossMatchInfoForReads_( 0 ), \ pAddNewReadsWithExistingAlignments_( NULL ), \ paContigEndReadList_( NULL ) #define INITIALIZE_ADDNEWREADS_ARRAYS \ aContigIntervals_.soName_ = "aContigIntervals_"; \ aLocatedFragmentsThatDidNotAlign_.soName_ = "aLocatedFragmentsThatDidNotAlign_"; \ aCrossMatchInfoForReads_.soName_ = "aCrossMatchInfoForReads_"; \ aTryingToAddTheseReads_.soName_ = "aTryingToAddTheseReads_"; class addNewReads { public: addNewReads( const int nMode = nAddReads2ConsedScript, const FileName& filAceFileToOpen = "" ) : nMode_( nMode ), filAceFileToOpen_( filAceFileToOpen ), INITIALIZE_ADDNEWREADS { INITIALIZE_ADDNEWREADS_ARRAYS } // nMode = nAddNewReadsToSelectedRegions, addNewReads( const FileName& filAceFileToOpen, const FileName& filRegions, const FileName& filAlignmentsFOF ) : nMode_( nAddNewReadsToSelectedRegions ), filAceFileToOpen_( filAceFileToOpen ), filRegions_( filRegions ), filAlignmentsFOF_( filAlignmentsFOF ), INITIALIZE_ADDNEWREADS { INITIALIZE_ADDNEWREADS_ARRAYS } ~addNewReads(); void doIt(); bool bCheckThatAceFileOnDiskIsCurrent(); bool bCheckThatAllExecutablesExist(); bool bAskUserForNameOfFileWithReadNames(); bool bCheckThatFOFExists(); bool bCheckThatFOFExistsNotSolexa(); bool bAreThereAnyDuplicates( RWCString& soErrorMessage ); bool bRunAddReads2Consed(); bool bRunStatusWasOK(); bool bParseAlignmentsAndAddReads(); void parseAlignmentsAndAddReads2( FILE* pAlignmentsFile ); void tellUserAllAboutIt(); void tellUserAllAboutIt2( RWCString& soMessage ); void parseAlignmentHeaderLine( crossMatchInfoForRead*& pCrossMatchInfoForRead ); void parseOneDiscrepancyForOneAlignment( crossMatchInfoForRead* pCrossMatchInfoForRead ); void parseOneDiscrepancyForOneAlignmentSelectRegions( RWCString& soLine, regionOfSequence* pRegion, crossMatchInfoForRead* pCrossMatchInfoForRead ); void parseOneLineOfReadBases(); void parseOneLineOfContigBases(); void readPHDFilesForNewReads(); void readPHDFilesJustForTagsAndWholeReadItems(); void insertPadsInContigs(); void insertPadsInOneContig( Contig* pContig ); bool bThereIsAnAlignmentWithThisContig( Contig *pContig, int& nIndexOfFirstAlignmentAgainstThisContig ); void insertPadsInNewReadsAndSetReadBases(); void insertPadsInOneNewReadAndSetReadBases( crossMatchInfoForRead* pCrossMatchInfoForRead ); void updateListOfReadsInTopLevelWindow(); void updateFirstAndLastDisplayableContigPositions(); void makeThisReadIntoItsOwnContig( const RWCString& soReadName ); void makeReadsThatDidNotAlignIntoTheirOwnContigs(); bool bWasThisReadAdded( const RWCString& soReadName ); void convertVectorTagsToXsInNewReads(); void recalculateConsensusQualityValues(); bool bEndOrAdvanceToNextUnused( int& nContigInterval ); void createNavigatorWindow(); void createNavigationFile(); FileName filMakeAlignmentsFilename(); void readAlignmentFile( const FileName& filAlignmentFile ); void readAlignmentFileHeaderToGetPhdBalls( FILE* pAlignmentsFile ); void readBasesAndQualitiesForAddedReads(); void readBasesAndQualitiesForSelectRegions(); void checkThatCrossMatchRunWithCorrectParameters( FILE* pAlignmentsFile ); void doItSelectRegions(); void readAndProcessRegions(); void parseAndSelectAlignments(); void parseAndSelectAlignmentsFromOneFile( const RWCString& filAlignmentsFile ); void parseAlignmentHeaderLineSelectRegions( RWCString& soLine, crossMatchInfoForRead*& pCrossMatchInfoForRead, regionOfSequence*& pRegion, bool& bOKToLookForDiscrepancyLines ); void openNewPhdBall( FILE*& pNewPhdBall ); int nGetNumberOfAlignments( FILE* pAlignmentsFile ); void runMiniAssembliesOnProtrudingReads(); void openAceFileAndAlignReads( const FileName& filAceFileName, contigEndReadList* pCERL ); void fixCrossMatchOutputCoordinates(); public: enum { nAddReads2ConsedScript = 1, nAddNewReadsWithExistingAlignments = 2, nAddNewReadsToSelectedRegions = 3 }; int nMode_; // nAddReads2ConsedScript, nAddNewReadsWithExistingAlignments, or nAddNewReadsToSelectedRegions bool bIfAReadDoesNotFitPutItIntoItsOwnContig_; bool bRecalculateConsensusQualityValues_; char szLine_[ SZLINE_SIZE ]; int nCurrentLine_; RWCString soCurrentReadLineEndPos_; RWCString soCurrentContigLineEndPos_; FileName filReadsToAddFOF_; FileName filAlignments_; RWCString soCurrentRead_; LocatedFragment* pCurrentLocFrag_; bool bCurrentReadIsComplemented_; Contig* pCurrentContig_; mbtValOrderedVectorOfRWCString aTryingToAddTheseReads_; RWTPtrOrderedVector aLocatedFragmentsThatDidNotAlign_; RWTPtrOrderedVector aContigIntervals_; FileName filContigFastaFile_; FileName filReadFastaFile_; // sorted by read name mbtPtrOrderedVector aCrossMatchInfoForReads_; int nLastSizeOfCrossMatchInfoForReads_; addNewReadsWithExistingAlignments* pAddNewReadsWithExistingAlignments_; // moved here from addNewReadsWithExistingAlignments RWTValOrderedVector aArrayOfPhdBallsForOneAlignment_; // used by selectRegions: RWTValOrderedVector aArrayOfPhdBallsAll_; // for selectRegions regionOfSequenceArray* pRegionsArray_; FileName filAceFileToOpen_; FileName filRegions_; FileName filAlignmentsFOF_; // for extending consensus using phrap mbtPtrOrderedVector* paContigEndReadList_; }; #endif