/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "tagSNPs.h" #include "guiRemoveReads.h" #include "addNewReads.h" #include "locatedFragment.h" #include "wholeReadItem.h" #include "mbt_exception.h" #include "automatedConsedInit.h" #include "assembly.h" #include "consed.h" #include "contig.h" #include "consedParameters.h" #include "filGetUniqueFilename.h" #include "fileDefines.h" #include "soGetErrno.h" #include "crossMatchInfoForRead.h" void tagSNPs :: doIt() { automatedConsedInit( filAceFileToOpen_ ); pAddNewReads_ = new addNewReads(); pAddNewReads_->bRecalculateConsensusQualityValues_ = false; pAddNewReads_->bIfAReadDoesNotFitPutItIntoItsOwnContig_ = false; runAddReads2Consed(); parseAlignmentsAndAddNewReads(); pAddNewReads_->readPHDFilesForNewReads(); pAddNewReads_->insertPadsInContigs(); pAddNewReads_->insertPadsInNewReadsAndSetReadBases(); pAddNewReads_->readPHDFilesJustForTagsAndWholeReadItems(); pAddNewReads_->convertVectorTagsToXsInNewReads(); // find the read that should receive the tags // look for the read that has a genomeRegion wholeReadItem pLocFragToTag_ = pGetLocFragToTag(); tagRead(); removeAllNewReads(); saveAssembly(); } LocatedFragment* tagSNPs :: pGetLocFragToTag() { Assembly* pAssembly = ConsEd::pGetAssembly(); LocatedFragment* pDesiredLocFrag = NULL; for( int nContig = 0; nContig < pAssembly->nNumContigs(); ++nContig ) { Contig* pContig = pAssembly->pGetContig( nContig ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); for( int nWRI = 0; nWRI < pLocFrag->aWholeReadItems_.length(); ++nWRI ) { wholeReadItem* pWRI = pLocFrag->aWholeReadItems_[ nWRI ]; if ( pWRI->soType_ == "genomeRegion" ) { return( pLocFrag ); } } } } if ( !pDesiredLocFrag ) { THROW_ERROR( "could not find the read to use to transfer all SNP tags to because there was no read with a genomeRegion WR item" ); } } void tagSNPs :: tagRead() { for( int nSNPFakeRead = 0; nSNPFakeRead < pAddNewReads_->aCrossMatchInfoForReads_.length(); ++nSNPFakeRead ) { LocatedFragment* pSNPFakeRead = pAddNewReads_->aCrossMatchInfoForReads_[ nSNPFakeRead ]->pLocFrag_; // look for SNP tags for( int nTag = 0; nTag < pSNPFakeRead->aTags_.length(); ++nTag ) { tag* pTag = pSNPFakeRead->aTags_[ nTag ]; if ( pTag->soType_ == "polymorphism" ) { transferTag( pTag ); } } } } void tagSNPs :: transferTag( tag* pTag ) { if ( ! ( pLocFragToTag_->nGetAlignStart() <= pTag->nPaddedConsPosStart_ && pTag->nPaddedConsPosStart_ <= pLocFragToTag_->nGetAlignEnd() ) ) return; tag* pNewTag = new tag( pLocFragToTag_, pLocFragToTag_->pContig_, pTag->soType_, // "polymorphism" pTag->nPaddedConsPosStart_, pTag->nPaddedConsPosEnd_, true, // bWriteToPhdFileNotAceFile pTag->soComment_, pTag->soSource_, pTag->soDate_, false ); // bNoTrans pNewTag->soMiscData_ = pTag->soMiscData_; pLocFragToTag_->addTag( pNewTag ); // change the base to the ambiguity code char cBase = pTag->pLocFrag_->ntGetFragFromConsPos( pTag->nPaddedConsPosStart_ ).cGetBase(); ((Sequence*)pLocFragToTag_)->setBaseAtPos( pLocFragToTag_->nGetFragIndexFromConsPos( pTag->nPaddedConsPosStart_ ), cBase ); pLocFragToTag_->setChanged(); } void tagSNPs :: removeAllNewReads() { guiRemoveReads* pGuiRemoveReads = new guiRemoveReads(); for( int nSNPFakeRead = 0; nSNPFakeRead < pAddNewReads_->aCrossMatchInfoForReads_.length(); ++nSNPFakeRead ) { LocatedFragment* pSNPFakeRead = pAddNewReads_->aCrossMatchInfoForReads_[ nSNPFakeRead ]->pLocFrag_; pGuiRemoveReads->aReadsToRemove_.insert( pSNPFakeRead ); } pGuiRemoveReads->doItAlmostNoGui( 0, true ); // bDeleteNotJustPutInOwnContig // isn't there some fixing up of the contigs that occurs // after removing the reads? sorting the list? updating // the visible positions and the number of reads in the contig? } void tagSNPs :: runAddReads2Consed() { RWCString soCommand = pCP->filFullPathnameOfAddReads2ConsedScript_; soCommand += " "; soCommand += ConsEd::pGetAssembly()->soGetAceFileName(); soCommand += " "; soCommand += filFileOfReads_; filCrossMatchOutput_ = filGetUniqueFilename( "tagSNPs" ) + ".cross"; soCommand += " "; soCommand += filCrossMatchOutput_; fprintf( pAO, "about to execute %s\n", soCommand.data() ); fflush( pAO ); int nRetStat = system( soCommand.data() ); if ( nRetStat != 0 ) { RWCString soError = "Something went wrong running "; soError += soCommand; soError += " Gave error code "; soError += soGetErrno(); soError += " See xterm for more details on what went wrong."; THROW_ERROR( soError ); } } void tagSNPs :: parseAlignmentsAndAddNewReads() { FILE* pAlignmentsFile = fopen( filCrossMatchOutput_.data(), "r" ); if ( !pAlignmentsFile ) { RWCString soError = "could not open crossmatch output file: "; soError += filCrossMatchOutput_; soError += " error: "; soError += soGetErrno(); THROW_ERROR( soError ); } pAddNewReads_->nCurrentLine_ = 0; pAddNewReads_->parseAlignmentsAndAddReads2( pAlignmentsFile ); } void tagSNPs :: saveAssembly() { if ( filNewAceFile_.isNull() ) { filNewAceFile_ = filAceFileToOpen_.filGetNextVersion(); // find next version that doesn't already exist while( filNewAceFile_.bFileByThisNameExists() ) { filNewAceFile_ = filNewAceFile_.filGetNextVersion(); } } ConsEd::pGetAssembly()->saveToFile( filNewAceFile_, false ); // ace file format 2 cerr << "Wrote new ace file: " << filNewAceFile_ << endl; }