/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include using namespace std; #include "mbt_exception.h" #include "addNewReadsAutomated.h" #include "consed.h" #include "rwcstring.h" #include "consedParameters.h" #include "terminateIfNoPhdDir.h" #include "popupErrorMessage.h" #include "maybeTerminateIfAnotherReadWriteConsed.h" #include "rwcregexp.h" #include "filGetUniqueFilename.h" #include "fileDefines.h" #include "addNewReads.h" #include "soGetErrno.h" static RWCRegexp regPhdFile( "\\.phd\\.[0-9]+$" ); void addNewReadsAutomated :: doIt( ) { pCP->bOKToUseGui_ = false; pCP->filAutoFinishFullOutput_ = filGetUniqueFilename( filAceFileToOpen_.soGetProjectFromAceFilename() ) + ".out"; pAO = fopen( pCP->filAutoFinishFullOutput_.data(), "w" ); if ( !pAO ) { RWCString soError = "could not open log file "; soError += pCP->filAutoFinishFullOutput_; soError += " for writing."; THROW_ERROR( soError ); } pFILE = pAO; // Nov 2006--record a little file saying what the name of the file // with the addNewReads output FILE* pFOF = fopen( "addNewReads.fof", "w" ); if ( pFOF ) { // just ignore problem if can't write this file fprintf( pFOF, "%s\n", pCP->filAutoFinishFullOutput_.data() ); fclose( pFOF ); } try { // use some of the code from addNewReads--the interactive // method of adding new reads pAddNewReads_ = new addNewReads(); // create the ConsEd object, which owns and manages the // Assembly data structure(s) as well as the lists of // currently existing ContigWin and Teditor objects. // there is now a global pointer to this sole instance, // set in the ctor, accessible as ConsEd::global() ConsEd* pConsed = new ConsEd(); // this uses the autofinish output files to report the error // so it must have openAutoFinishOutputFile before it terminateIfNoPhdDir(); maybeTerminateIfAnotherReadWriteConsed(); parseFileOfReads(); // this does "new Assembly" ConsEd::pGetConsEd()->openAssemblyFile( filAceFileToOpen_ ); if ( pCP->soAddNewReadsPutReadIntoItsOwnContig_.returnToLower() == "always" ) { for( int nRead = 0; nRead < pAddNewReads_->aTryingToAddTheseReads_.length(); ++nRead ) { FileName filPhdFile = pAddNewReads_->aTryingToAddTheseReads_[ nRead ]; if ( filPhdFile.index(regPhdFile ) == RW_NPOS ) { // the user has entered a list of reads rather // than a list of phd files. So convert them to phd files // for use by addSingleRead filPhdFile += ".phd.1"; } addSingleRead( filPhdFile ); } } else { // aReads_ must really be reads runAddReads2Consed(); // now use the addNewReads parser parseAlignmentsAndAddNewReads(); pAddNewReads_->readPHDFilesForNewReads(); pAddNewReads_->insertPadsInContigs(); pAddNewReads_->insertPadsInNewReadsAndSetReadBases(); pAddNewReads_->readPHDFilesJustForTagsAndWholeReadItems(); pAddNewReads_->convertVectorTagsToXsInNewReads(); if ( pCP->soAddNewReadsPutReadIntoItsOwnContig_.returnToLower() == "ifunaligned" ) { fprintf( pAO, "if any read did not align, putting it into its own contig\n" ); pAddNewReads_->makeReadsThatDidNotAlignIntoTheirOwnContigs(); } else { fprintf( pAO, "not putting reads into own contigs because %s\n", pCP->soAddNewReadsPutReadIntoItsOwnContig_.data() ); } ConsEd::pGetAssembly()->setNumberOfReadsInAssemblyAccurate(); pAddNewReads_->updateFirstAndLastDisplayableContigPositions(); if ( pCP->bAddNewReadsRecalculateConsensusQuality_ ) { pAddNewReads_->recalculateConsensusQualityValues(); } RWCString soMessage; pAddNewReads_->tellUserAllAboutIt2( soMessage ); fprintf( pAO, "%s", soMessage.data() ); pAddNewReads_->createNavigationFile(); } saveAssembly(); cerr << "Wrote new ace file: " << filNewAceFile_.soGetBasename() << endl; fprintf( pAO, "Wrote new ace file: %s\n", filNewAceFile_.data() ); } catch( ExceptionBase eb ) { if ( !eb.bUserNotified() ) { fprintf( pAO, "Fatal: exception thrown: %s\n", eb.szGetDesc() ); } fclose( pAO ); cerr << "See log file: " << pCP->filAutoFinishFullOutput_ << endl; exit( 0 ); } fclose( pAO ); cerr << "See log file: " << pCP->filAutoFinishFullOutput_ << endl; } static int nLine; const size_t nMaxLineSize = 1000; static RWCString soLine( (size_t) nMaxLineSize ); static FILE* pFileOfReads; #define PARSE_PANIC( message ) \ { ostringstream ost; \ ost << "error in file of phd files " << filFileOfReads_ << \ " at line " << \ nLine << \ ": \n" << \ soLine << endl << \ message << endl << \ "Error detected from source file " << \ __FILE__ << " at " << __LINE__ << endl << ends; \ InputDataError ide(ost.str().c_str()); \ throw ide; } void addNewReadsAutomated :: parseFileOfReads() { pFileOfReads = fopen( filFileOfReads_.data(), "r" ); if ( !pFileOfReads ) { if ( errno == 2 ) { PANIC_OST( ost ) << "Error: there is no file " << filFileOfReads_ << " so terminating" << ends; SysRequestFailed srf( ost.str().c_str() ); srf.includeErrnoDescription(); throw srf; } else { PANIC_OST( ost ) << "Error: could not open file of phd files " << filFileOfReads_ << ends; SysRequestFailed srf( ost.str().c_str() ); srf.includeErrnoDescription(); throw srf; } } while(1) { if ( fgets( soLine.sz_, nMaxLineSize, pFileOfReads ) == NULL ) break; ++nLine; soLine.nCurrentLength_ = strlen( soLine.sz_ ); // changed Nov 2004 soLine.stripAllWhitespaceExceptInternal(); if ( soLine.isNull() ) continue; pAddNewReads_->aTryingToAddTheseReads_.insert( soLine ); } fclose( pFileOfReads ); RWCString soErrorMessage; if ( pAddNewReads_->bAreThereAnyDuplicates( soErrorMessage ) ) { THROW_ERROR( soErrorMessage ); } } void addNewReadsAutomated :: addSingleRead( const FileName& filPhdFile ) { RWCString soReadName = filPhdFile.soGetReadNameFromPhdFileName(); RWCString soNewContigName = ConsEd::pGetConsEd()->pGetAssembly()->soGetNewContigName(); Contig* pContig = new Contig( (char*) soNewContigName.data(), 1, // new number of reads 1, // new number of base segments false, // complemented from the way // phrap created ConsEd::pGetAssembly() ); LocatedFragment* pLocFrag = new LocatedFragment( soReadName, 1, // position within contig false, // complemented pContig ); pLocFrag->setPHDFilename( filPhdFile ); pLocFrag->setChromatFilename( soReadName ); // we also need to read tags and whole read items // It appears that readPHDFileForAddedRead does not do this. // So this should be fixed. pLocFrag->readPHDFileForAddedRead(); if ( pLocFrag->pSeqPhredCalledBases_->length() == 0 ) { popupErrorMessage( "read %s doesn't have any bases", (const char*) soReadName ); // zero length read so don't make a contig out of it // or else bGetDataStructureOK will complain return; } pContig->resize( pLocFrag->pSeqPhredCalledBases_->length() ); ConsEd::pGetConsEd()->pGetAssembly()->addContig( pContig ); for( int nPos = pLocFrag->pSeqPhredCalledBases_->nGetStartIndex(); nPos <= pLocFrag->pSeqPhredCalledBases_->nGetEndIndex(); ++nPos ) { pLocFrag->append( (*(pLocFrag->pSeqPhredCalledBases_))[ nPos ] ); pContig->append( (*(pLocFrag->pSeqPhredCalledBases_))[ nPos ] ); } pLocFrag->nAlignClipStart_ = pContig->nGetStartIndex(); pLocFrag->nAlignClipEnd_ = pContig->nGetEndIndex(); pLocFrag->nQualClipStart_ = pContig->nGetStartIndex(); pLocFrag->nQualClipEnd_ = pContig->nGetEndIndex(); pContig->addLocatedFragment( pLocFrag ); pContig->updateLastDisplayableContigPos(); pContig->setUnpaddedPositionsArray(); pContig->setPaddedPositionsArray(); BaseSegment* pBaseSeg = new BaseSegment( pLocFrag, pContig->nGetStartIndex(), pContig->nGetEndIndex() ); pContig->baseSegArray_.addPtrToNewBaseSeg( pBaseSeg ); assert( pContig->baseSegArray_.bGetDataStructureOk() ); } void addNewReadsAutomated :: saveAssembly() { if ( filNewAceFile_.isNull() ) { filNewAceFile_ = filAceFileToOpen_.filGetNextVersion(); // find next version that doesn't already exist while( filNewAceFile_.bFileByThisNameExists() ) { filNewAceFile_ = filNewAceFile_.filGetNextVersion(); } } ConsEd::pGetAssembly()->saveToFile( filNewAceFile_, false ); // ace file format2 } void addNewReadsAutomated :: runAddReads2Consed() { RWCString soCommand = consedParameters::pGetConsedParameters()->filFullPathnameOfAddReads2ConsedScript_; soCommand += " "; soCommand += ConsEd::pGetAssembly()->soGetAceFileName(); if ( !filFileOfReads_.isNull() ) { soCommand += " "; soCommand += filFileOfReads_; } filCrossMatchOutput_ = filGetUniqueFilename( "addNewReadsAutomated" ) + ".cross"; soCommand += " "; soCommand += filCrossMatchOutput_; fprintf( pAO, "about to execute: %s\n", soCommand.data() ); fflush( pAO ); int nRetStat = system( (char*) soCommand.data() ); if ( nRetStat != 0 ) { RWCString soError = "Something went wrong running "; soError += soCommand; soError += " Gave error code "; soError += soGetErrno(); soError += " See xterm for more details on what went wrong."; THROW_ERROR( soError ); } } void addNewReadsAutomated :: parseAlignmentsAndAddNewReads() { FILE* pAlignmentsFile = fopen( filCrossMatchOutput_.data(), "r" ); if ( !pAlignmentsFile ) { RWCString soError = "could not open crossmatch output file: "; soError += filCrossMatchOutput_; soError += " error: "; soError += soGetErrno(); THROW_ERROR( soError ); } pAddNewReads_->nCurrentLine_ = 0; pAddNewReads_->parseAlignmentsAndAddReads2( pAlignmentsFile ); }