/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "fixContigEnds.h" #include "consed.h" #include "assembly.h" #include "mbt_exception.h" #include using namespace std; #include #include "consedParameters.h" #include "terminateIfNoPhdDir.h" #include "maybeTerminateIfAnotherReadWriteConsed.h" #include "fileDefines.h" #include "listOfLibraries.h" #include "szGetTime.h" #include "soGetDateTime.h" #include "openLogFiles.h" #include "fixContigEnd.h" #include "soLine.h" #include "rwctokenizer.h" void fixContigEnds :: doIt() { pCP->bOKToUseGui_ = false; // create the ConsEd object, which owns and manages the // Assembly data structure(s) as well as the lists of // currently existing ContigWin and Teditor objects. // there is now a global pointer to this sole instance, // set in the ctor, accessible as ConsEd::global() ConsEd* pConsed = new ConsEd(); openLogFiles( filAceFileToOpen_ ); terminateIfNoPhdDir(); maybeTerminateIfAnotherReadWriteConsed(); // I want to read the list of libraries before reading the ace file // to make debugging by the user easier pCP->pListOfLibraries_ = new listOfLibraries(); pCP->pListOfLibraries_->aLibraries_.soName_ = "pCP->pListOfLibraries_->aLibraries_ in autoEdit.cpp"; pCP->pListOfLibraries_->parseLibraryFile(); // this does "new Assembly" ConsEd::pGetConsEd()->openAssemblyFile( filAceFileToOpen_ ); ConsEd::pGetConsEd()->whatToDoBeforeModifyAssembly(); saveDesiredContigEnds(); doItMore(); saveAssembly(); cerr << "Wrote new ace file: " << filNewAceFile_ << endl; cerr << "See output in " << pCP->filAutoFinishFullOutput_ << endl; fclose( pAO ); } void fixContigEnds :: saveAssembly() { if ( filNewAceFile_.bIsNull() ) { filNewAceFile_ = filAceFileToOpen_.filGetNextVersion(); } filNewAceFile_ = filNewAceFile_.filFindOneHigherThanHighestVersion( filNewAceFile_.soGetDirectory(), 1 ); // version if none already exists fprintf( pAO, "writing new ace file %s\n", filNewAceFile_.data() ); ConsEd::pGetAssembly()->saveToFile( filNewAceFile_, false ); // ace file format2 } void fixContigEnds :: doItMore() { int nContigEnds[2]; nContigEnds[0] = nLeftGap; nContigEnds[1] = nRightGap; RWTPtrOrderedVector aCopyOfListOfContigs( ConsEd::pGetAssembly()->dapContigs_ ); // use the same string for all files in this run pCP->soDateTimeDotInMiddle_ = soGetDateTime( nDotInMiddle ); for( int nContig = 0; nContig < aCopyOfListOfContigs.length(); ++nContig ) { Contig* pContig = aCopyOfListOfContigs[ nContig ]; if ( pContig->nGetNumberOfReads() == 1 ) continue; RWCString soOldName = pContig->soGetName(); // doing this so that if a few contigs have problems, // the other thousands can still be fixed try { Contig* pContigAfterFixingOnRight = pContig; if ( bWantThisContigEnd( soOldName, nRightGap, pContig ) ) { cerr << "working on " << soOldName << " which is " << nContig << " out of " << aCopyOfListOfContigs.length() << " " << szWhichGap( nRightGap ) << endl; fprintf( pAO, "fixing right end of %s\n", soOldName.data() ); fixContigEnd fixContigEndRight( pContig, nRightGap ); pContigAfterFixingOnRight = pContig; fixContigEndRight.doIt( pContigAfterFixingOnRight ); } Contig* pContigAfterFixingBothEnds = pContigAfterFixingOnRight; if ( bWantThisContigEnd( soOldName, nLeftGap, pContigAfterFixingOnRight ) ) { fprintf( pAO, "fixing left end of %s\n", soOldName.data() ); cerr << "working on " << soOldName << " which is " << nContig << " out of " << aCopyOfListOfContigs.length() << " " << szWhichGap( nLeftGap ) << endl; fixContigEnd fixContigEndLeft( pContigAfterFixingOnRight, nLeftGap ); fixContigEndLeft.doIt( pContigAfterFixingBothEnds ); } } // try catch( ExceptionBase eb ) { fprintf( pAO, "error with contig %s %s so ignoring this contig\n", soOldName.data(), eb.szGetDesc() ); } } } void fixContigEnds :: saveDesiredContigEnds() { if ( filContigEndsFOF_.bIsNull() ) { return; } FILE* pContigEnds = fopen( filContigEndsFOF_.data(), "r" ); if ( !pContigEnds ) { THROW_FILE_ERROR( filContigEndsFOF_ ); } while( fgets( soLine.data(), nMaxLineSize, pContigEnds ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); // this eliminates multiple spaces in between and any trailing // whitespace. It also converts a tab to a space RWCTokenizer tok( soLine ); RWCString soContig = tok(); RWCString soEnd = tok(); soEnd.toLower(); if ( soEnd != "left" && soEnd != "right" ) { THROW_ERROR( "in file " + filContigEndsFOF_ + " line " + soLine + " should have (contig) (left or right) " + " but second word is " + soEnd ); } RWCString soNewLine = soContig + " " + soEnd; aDesiredContigEnds_.append( soNewLine ); } fclose( pContigEnds ); aDesiredContigEnds_.resort(); } bool fixContigEnds :: bWantThisContigEnd( const RWCString& soOldName, const int nWhichEnd, Contig* pContig ) { if ( pContig->nGetNumberOfReads() < pCP->nFixContigEndsMinNumberOfReadsInContig_ ) { fprintf( pAO, "ignoring %s %s because there are only %d reads which is less than minimum %d\n", soOldName.data(), szWhichGap( nWhichEnd ), pContig->nGetNumberOfReads(), pCP->nFixContigEndsMinNumberOfReadsInContig_ ); return false; } if ( filContigEndsFOF_.bIsNull() ) return true; RWCString soKey = soOldName + " " + szLeftOrRight( nWhichEnd ); if ( aDesiredContigEnds_.bContains( soKey ) ) return true; else return false; }