/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "batchChangeConsensus.h" #include "automatedConsedInit.h" #include "soLine.h" #include "mbt_exception.h" #include "contig.h" #include "rwctokenizer.h" #include "consed.h" #include "soAddCommas.h" #include "bIsNumericMaybeWithWhitespace.h" #include "writeNewAceFileNameToFile.h" #define THROW_ERR( szError ) { ostringstream ost; \ ost << szError << endl << "line: " << nLine << endl << soLine << ends; \ InputDataError ide( ost.str().c_str() ); \ throw ide; } void batchChangeConsensus :: doIt() { automatedConsedInit( filAceFileToOpen_ ); FILE* pInput = fopen( filChangeConsensus_.data(), "r" ); if ( !pInput ) { RWCString soError = "file of locations to change: " + filChangeConsensus_; THROW_FILE_ERROR( soError ); } RWCString soLastContig; Contig* pLastContig = NULL; int nLine = 0; while( fgets( soLine.data(), nMaxLineSize, pInput ) != NULL ) { ++nLine; soLine.nCurrentLength_ = strlen( soLine.data() ); if ( soLine.bIsWhitespace() ) continue; // looks like: // Contig27 28-30 a RWCTokenizer tok( soLine ); RWCString soContig = tok(); RWCString soRange = tok(); RWCString soBase = tok(); if ( soBase.bIsNull() ) { RWCString soError = "the line must look like Contig27 28-30 a where Contig27 is the contig, 28-30 is the range in unpadded positions, and a is the new base, but your line doesn't contain enough words: "; THROW_ERR( soError ); } Contig* pContig = NULL; if ( soContig == soLastContig ) { pContig = pLastContig; } else { pContig = ConsEd::pGetAssembly()->pGetContigByVariousNamesCaseInsensitive( soContig ); pLastContig = pContig; soLastContig = soContig; } if ( !pContig ) { RWCString soError = "contig " + soContig + " doesn't exist"; THROW_ERR( soError ); } // I'll allow a single position int nPaddedLeft; int nPaddedRight; if ( soRange.bContainsChar( '-' ) ) { // split into 2 parts RWCTokenizer tokRange( soRange ); RWCString soLeft = tokRange('-' ); RWCString soRight = tokRange('-' ); if ( !soLeft.bStartsWithAndRemove("*" ) ) { int nUnpaddedLeft; if (!bIsNumericMaybeWithWhitespace( soLeft, nUnpaddedLeft ) ) { RWCString soError = "the line must look like Contig27 28-30 a where Contig27 is the contig, 28-30 is the range in unpadded positions, and a is the new base, but your line, " + soLine + " has range " + soRange + " with left position " + soLeft + " which isn't numeric"; THROW_ERR( soError ); } nPaddedLeft = pContig->nPaddedIndexFast( nUnpaddedLeft ); } else { // this is a padded position already if ( !bIsNumericMaybeWithWhitespace( soLeft, nPaddedLeft ) ) { RWCString soError = "the line must look like Contig27 *28-*30 a where Contig27 is the contig, *28-*30 is the range in padded positions, and a is the new base, but your line, " + soLine + " has range " + soRange + " with left padded position " + soLeft + " which isn't numeric"; THROW_ERR( soError ); } } // now check the right position if ( !soRight.bStartsWithAndRemove("*" ) ) { int nUnpaddedRight; if ( !bIsNumericMaybeWithWhitespace( soRight, nUnpaddedRight ) ) { RWCString soError = "the line must look like Contig27 28-30 a where Contig27 is the contig, 28-30 is the range in unpadded positions, and a is the new base, but your line, " + soLine + " has range " + soRange + " with right unpadded position " + soRight + " which isn't numeric"; THROW_ERR( soError ); } nPaddedRight = pContig->nPaddedIndexFast( nUnpaddedRight ); } else { // this is a padded position already if ( !bIsNumericMaybeWithWhitespace( soRight, nPaddedRight ) ) { RWCString soError = "the line must look like Contig27 *28-*30 a where Contig27 is the contig, *28-*30 is the range in padded positions, and a is the new base, but your line, " + soLine + " has range " + soRange + " with right padded position " + soRight + " which isn't numeric"; THROW_ERR( soError ); } } } else { // not a range--just a single base. OK. What a pain... if ( !soRange.bStartsWithAndRemove("*" ) ) { int nUnpadded; if ( !bIsNumericMaybeWithWhitespace( soRange, nUnpadded ) ) { RWCString soError = "the line must look like Contig27 28 a where Contig27 is the contig, 28 is the position (or a range of positions), and a is the new base, but your line, " + soLine + " has position " + soRange + " which isn't numeric"; THROW_ERR( soError ); } nPaddedRight = pContig->nPaddedIndexFast( nUnpadded ); nPaddedLeft = nPaddedRight; } else { if ( !bIsNumericMaybeWithWhitespace( soRange, nPaddedRight ) ) { RWCString soError = "the line must look like Contig27 *28 a where Contig27 is the contig, *28 is the padded position (or a range of positions), and a is the new base, but your line, " + soLine + " has position " + soRange + "which isn't numeric"; THROW_ERR( soError ); } nPaddedLeft = nPaddedRight; } } // got contig, got range of positions. Shall we do any checking on // the base itself? if ( soBase.length() != 1 ) { RWCString soError = "the line must look like Contig27 28-30 a where Contig27 is the contig, 28-30 is the range in unpadded positions, and a is the new base, but your line, " + soLine + " has base " + soBase + " which is more than one character"; THROW_ERR( soError ); } char cNewBase = soBase[0]; if ( !isalpha( cNewBase ) && ( cNewBase != '*' ) ) { RWCString soError = "base must be a letter or * but is " + RWCString( cNewBase ); THROW_ERR( soError ); } // check that the base is within the contig if ( nPaddedRight < nPaddedLeft ) { int nTemp = nPaddedRight; nPaddedRight = nPaddedLeft; nPaddedLeft = nTemp; } if ( nPaddedLeft < pContig->nGetStartIndex() ) { RWCString soError = pContig->soGetName() + " goes from " + soAddCommas( pContig->nGetUnpaddedStartIndex() ) + " to " + soAddCommas( pContig->nGetUnpaddedEndIndex() ) + " but range " + soRange + " is outside this"; THROW_ERR( soError ); } if ( pContig->nGetEndIndex() < nPaddedRight ) { RWCString soError = pContig->soGetName() + " goes from " + soAddCommas( pContig->nGetUnpaddedStartIndex() ) + " to " + soAddCommas( pContig->nGetUnpaddedEndIndex() ) + " but range " + soRange + " is outside this"; THROW_ERR( soError ); } for( int nConsPos = nPaddedLeft; nConsPos <= nPaddedRight; ++nConsPos ) { pContig->Sequence::setBaseAtPos( nConsPos, cNewBase ); pContig->Sequence::setQualityAtSeqPos( nConsPos, ucQualityLowEdited ); } // need to add edit tag here. Also when editing the consensus. tag* pEditTag = new tag( NULL, // LocatedFragment pContig, "edit", nPaddedLeft, nPaddedRight, false, // bWriteToPhdFileNotAceFile "", // soComment "changeConsensus", // soSource soEmptyString, // current date/time false ); // bNoTrans pContig->addConsensusTag( pEditTag ); } // while( fgets( soLine.data(), nMaxLineSize, pInput ) != NULL ) { fclose( pInput ); pCP->filUserWantsToSaveToThisAceFile_ = filNewAceFile_; FileName filNewAceFile = ConsEd::pGetAssembly()->filSaveAssemblyToUserSpecifiedOrNextAvailableVersionOfAceFile(); writeNewAceFileNameToFile( filNewAceFile ); } // void batchChangeConsensus :: doIt() {