/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "filename.h" #include "autoPCRAmplify.h" #include "rwcstring.h" #include "consed.h" #include "consedParameters.h" #include "regionToAmplify.h" #include "assembly.h" #include "contig.h" #include "bIsNumericMaybeWithWhitespace.h" #include "rwctokenizer.h" #include "fileDefines.h" #include "rwcregexp.h" #include #include #include "printAutoFinishMiscInfo.h" #include "terminateIfNoPhdDir.h" autoPCRAmplify :: autoPCRAmplify( const FileName& filAceFileToOpen, const FileName& filFileOfPrimerRegions ) : filAceFileToOpen_( filAceFileToOpen ), filFileOfPrimerRegions_( filFileOfPrimerRegions ) {} void autoPCRAmplify :: doIt() { pCP->bOKToUseGui_ = false; ConsEd* pConsed = new ConsEd(); openAutoPCRAmplifyOutputFile( filAceFileToOpen_ ); // this uses the output file to report the error // so it must have openAutoFinishOutputFile before it terminateIfNoPhdDir(); printAutoFinishMiscInfo( filAceFileToOpen_, "" ); // tried moving this to above to before openAutoFinishOutputFiles // so that, if // the user specifies a bogus ace file, such as -ace -ace, // we don't create several zero-length files that have filenames // that the shell can't handle. However, if I do that, then any // error encountered by openAssemblyFile will try to write to // pAO and give a segmentation fault // this does "new Assembly" ConsEd::pGetConsEd()->openAssemblyFile( filAceFileToOpen_ ); Assembly* pAssembly = ConsEd::pGetAssembly(); pAssembly->setContigMatchTablesInAllContigs(); pAssembly->setAllPaddedPositionsArrays(); pCP->setParametersForSequencingPrimersNotPCRPrimers( false ); // this must come after opening the ace file since the code // checks each contig/region to make sure it makes sense readFileOfPrimerRegions(); for( int nRegion = 0; nRegion < aRegionsToAmplify_.length(); ++nRegion ) { regionToAmplify* pRegion = aRegionsToAmplify_[ nRegion ]; pRegion->findPCRPrimerPair(); delete pRegion; } fclose( pAO ); } static const RWCString soUsage = "(region id) (contig1) (left position-right position) (arrow) (contig2) (left position-right position) (arrow) (biggest/smallest)"; void autoPCRAmplify :: readFileOfPrimerRegions() { FILE* pFileOfPrimerRegions = fopen( filFileOfPrimerRegions_.data(), "r" ); if ( !pFileOfPrimerRegions ) { if ( errno == 2 ) { fprintf( stderr, "There is no file %s containing the primer regions", filFileOfPrimerRegions_.data() ); exit( 1 ); } else { fprintf( stderr, "Could not open file %s due to error %d which means %s", filFileOfPrimerRegions_.data(), errno, strerror( errno ) ); exit( 1 ); } } const int nMaxLineSize = 300; RWCString soLine( (size_t) ( nMaxLineSize + 1 ) ); while( fgets( soLine.sz_, nMaxLineSize, pFileOfPrimerRegions ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.sz_ ); RWCTokenizer tok( soLine ); // region5 Contig345 28974-29584 -> Contig345 40154-40937 <- RWCString soRegionID = tok(); RWCString soContig1 = tok(); RWCString soRange1 = tok(); RWCString soArrow1 = tok(); RWCString soContig2 = tok(); RWCString soRange2 = tok(); RWCString soArrow2 = tok(); RWCString soBiggestOrSmallest = tok(); if ( soRegionID.isNull() || soContig1.isNull() || soRange1.isNull() || soArrow1.isNull() || soContig2.isNull() || soRange2.isNull() || soArrow2.isNull() || soBiggestOrSmallest.isNull() ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line:%s\nwas not of the form %s\nso ignoring this line", soLine.data(), soUsage.data() ); THROW_ERROR( soError ); } Contig* pContig1 = ConsEd::pGetAssembly()->pGetContigByName( soContig1 ); if ( !pContig1 ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line:%s\nbut contig %s was not found in assembly\nso not using this line", soLine.data(), soContig1.data() ); THROW_ERROR( soError ); } Contig* pContig2 = ConsEd::pGetAssembly()->pGetContigByName( soContig2 ); if ( ! pContig2 ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line:%s\nbut contig %s was not found in assembly\nso not using this line", soLine.data(), soContig2.data() ); THROW_ERROR( soError ); } regionToAmplify* pRegion = new regionToAmplify(); pRegion->soRegionID_ = soRegionID; pRegion->region1_.pContig_ = pContig1; pRegion->region2_.pContig_ = pContig2; int nUnpaddedLeft; int nUnpaddedRight; bool bError; parseRange( soLine, soRange1, pRegion->region1_.nUnpaddedLeft_, pRegion->region1_.nUnpaddedRight_, bError ); if ( bError ) continue; if ( ! ( pRegion->region1_.pContig_->nGetUnpaddedStartIndex() <= pRegion->region1_.nUnpaddedLeft_ && pRegion->region1_.nUnpaddedLeft_ <= pRegion->region1_.nUnpaddedRight_ && pRegion->region1_.nUnpaddedRight_ <= pRegion->region1_.pContig_->nGetUnpaddedEndIndex() ) ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line:%s\nbut range in first contig is not between contig unpadded start = %d and end = %d and in correct order\n", soLine.data(), pRegion->region1_.pContig_->nGetUnpaddedStartIndex(), pRegion->region1_.pContig_->nGetUnpaddedEndIndex() ); THROW_ERROR( soError ); } parseRange( soLine, soRange2, pRegion->region2_.nUnpaddedLeft_, pRegion->region2_.nUnpaddedRight_, bError ); if ( bError ) continue; if ( ! ( pRegion->region2_.pContig_->nGetUnpaddedStartIndex() <= pRegion->region2_.nUnpaddedLeft_ && pRegion->region2_.nUnpaddedLeft_ <= pRegion->region2_.nUnpaddedRight_ && pRegion->region2_.nUnpaddedRight_ <= pRegion->region2_.pContig_->nGetUnpaddedEndIndex() ) ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line:%s\nbut range in second contig is not between contig unpadded start = %d and end = %d and in correct order\n", soLine.data(), pRegion->region2_.pContig_->nGetUnpaddedStartIndex(), pRegion->region2_.pContig_->nGetUnpaddedEndIndex() ); THROW_ERROR( soError ); } if ( soArrow1 == "<-" ) { pRegion->bPrimersInRegion1PointRightNotLeft_ = false; } else if ( soArrow1 == "->" ) { pRegion->bPrimersInRegion1PointRightNotLeft_ = true; } else { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s should be of form:\n%s\nwhere arrow is <- or -> but instead had %s so not using this line", soLine.data(), soUsage.data(), soArrow1.data() ); THROW_ERROR( soError ); } if ( soArrow2 == "<-" ) { pRegion->bPrimersInRegion2PointRightNotLeft_ = false; } else if ( soArrow2 == "->" ) { pRegion->bPrimersInRegion2PointRightNotLeft_ = true; } else { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s should be of form:\n%s\nwhere arrow is <- or -> but instead had %s so not using this line", soLine.data(), soUsage.data(), soArrow2.data() ); THROW_ERROR( soError ); } soBiggestOrSmallest.toLower(); if ( soBiggestOrSmallest == "biggest" ) pRegion->bBiggestNotSmallest_ = true; else if ( soBiggestOrSmallest == "smallest" ) pRegion->bBiggestNotSmallest_ = false; else { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s should be of form:\n%s\n where (biggest/smallest) is biggest or smallest instead of %s so not using this line", soLine.data(), soUsage.data(), soBiggestOrSmallest.data() ); THROW_ERROR( soError ); } aRegionsToAmplify_.insert( pRegion ); } // while( fgets ... } void autoPCRAmplify :: parseRange( const RWCString& soLine, const RWCString& soRange, int& nUnpaddedLeft, int& nUnpaddedRight, bool& bError ) { RWCTokenizer tokColon( soRange ); RWCString soFirst = tokColon('-'); RWCString soSecond = tokColon('-'); if ( soFirst.isNull() || soSecond.isNull() ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s\nshould be of form:\n%s\nwhere (#-#) is number-number but instead found %s so not using this line", soLine.data(), soUsage.data(), soRange.data() ); THROW_ERROR( soError ); bError = true; return; } if ( !bIsNumericMaybeWithWhitespace( soFirst, nUnpaddedLeft ) ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s\nshould be of form:\n%s\nwhere (#-#) is number-number but the first number %s was not numeric so not using this line", soLine.data(), soUsage.data(), soFirst.data() ); THROW_ERROR( soError ); bError = true; return; } if ( !bIsNumericMaybeWithWhitespace( soSecond, nUnpaddedRight ) ) { RWCString soError( (size_t) 1000 ); soError.nCurrentLength_ = sprintf( soError.data(), "line: %s\nshould be of form:\n%s\nwhere (#-#) is number-number but the second number %s was not numeric so not using this line", soLine.data(), soUsage.data(), soSecond.data() ); THROW_ERROR( soError ); bError = true; return; } bError = false; } void autoPCRAmplify :: openAutoPCRAmplifyOutputFile( const FileName& filAceFileToOpen) { // find project name RWCString soProjectName = filAceFileToOpen; RWCRegexp regFasta("\\.fasta\\.screen\\..*$"); soProjectName( regFasta ) = ""; time_t timee; time( &timee ); char szDate[8]; char szTime[8]; int nNumberOfChars; // for some reason, on HP if you change the 8 to a 7 you // get garbage characters like this: // standard.990924Ð.102551.customPrimers nNumberOfChars = strftime( szDate, 8, "%y%m%d", localtime( &timee ) ); // all this funny business is just to hide this from SCCS #define quote( A ) #A // for some reason, on HP if you change the 8 to a 7 you get // garbage characters like this: // standard.990924Ð.102551.customPrimers nNumberOfChars = strftime( szTime, 8, quote(%H)quote(%M)quote(%S), localtime( &timee ) ); pCP->filAutoFinishFullOutput_ = soProjectName + "." + szDate + "." + szTime + ".out"; pAO = fopen( (const char*) pCP->filAutoFinishFullOutput_, "w" ); if ( ! pAO ) { RWCString soError = "cannot open file: " + pCP->filAutoFinishFullOutput_; THROW_ERROR( soError ); } pFILE = pAO; cerr << "opened file " << pCP->filAutoFinishFullOutput_ << " for output" << endl; // open autoPCRAmplify.fof to record the name of the most recent // .out file FILE* pAutoPCRAmplifyFOF = fopen( "autoPCRAmplify.fof", "w" ); if ( ! pAutoPCRAmplifyFOF ) { RWCString soError = "cannot open file autoPCRAmplify.fof because of "; soError += RWCString( (long) errno ); soError += " which means "; soError += strerror( errno ); THROW_ERROR( soError ); } fprintf( pAutoPCRAmplifyFOF, "%s\n", pCP->filAutoFinishFullOutput_.data() ); fclose( pAutoPCRAmplifyFOF ); }