/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "consedParameters.h" #include "restrictionEnzymes.h" #include "popupErrorMessage.h" #include "rwctokenizer.h" #include "restrictionEnzyme.h" #include "assert.h" #define SZLINE_SIZE 500 static char szLine[ SZLINE_SIZE ]; void consedParameters :: makeListOfRestrictionEnzymes() { aRestrictionEnzymes_.clearAndDestroy(); aCommonRestrictionEnzymes_.clear(); int nNumberOfAdditionalEnzymes = 0; FILE *pFil = NULL; if ( !pCP->filFileOfAdditionalRestrictionEnzymes_.isNull() ) { pFil = fopen( pCP->filFileOfAdditionalRestrictionEnzymes_.data(), "r" ); if ( !pFil ) { popupErrorMessage( "the resource consed.fileOfAdditionalRestrictionEnzymes: %s could not open %s for reading so not including these resources", pCP->filFileOfAdditionalRestrictionEnzymes_.data() ); } else { // count the lines while( fgets( szLine, SZLINE_SIZE, pFil ) ) ++nNumberOfAdditionalEnzymes; rewind( pFil ); } } // count the default enzymes int nNumberOfDefaultEnzymes = 0; while( aszRestrictionEnzymes[ nNumberOfDefaultEnzymes ] ) ++nNumberOfDefaultEnzymes; aRestrictionEnzymes_.resize( nNumberOfDefaultEnzymes + nNumberOfAdditionalEnzymes ); for( int n = 0; n < nNumberOfDefaultEnzymes; ++n ) { RWCTokenizer soTok( aszRestrictionEnzymes[ n ] ); restrictionEnzyme* pEnz = new restrictionEnzyme(); pEnz->soName_ = soTok(); pEnz->soBases_ = soTok(); assert( !pEnz->soName_.isNull() ); assert( !pEnz->soBases_.isNull() ); aRestrictionEnzymes_.insert( pEnz ); } if ( pFil ) { int nLine = 0; while( fgets( szLine, SZLINE_SIZE, pFil ) != NULL ) { RWCTokenizer soTok( szLine ); restrictionEnzyme* pEnz = new restrictionEnzyme(); pEnz->soName_ = soTok(); pEnz->soBases_ = soTok(); if ( pEnz->soName_.isNull() || pEnz->soBases_.isNull() ) { popupErrorMessage( "line %d in file %s contains %s but should be of the form \"EcoRI GAATTC\" with just 2 words separated by whitespace", nLine, pCP->filFileOfAdditionalRestrictionEnzymes_.data(), szLine ); continue; } pEnz->soBases_.toLower(); RWCString soAllowedBases( "acgkmnrtwy" ); if ( strspn( pEnz->soBases_.data(), soAllowedBases.data() ) != pEnz->soBases_.length() ) { popupErrorMessage( "line %d in file %s contains %s but should be of the form \"EcoRI GAATTC\" with just 2 words separated by whitespace where the second word should contist just of the characters %s", nLine, pCP->filFileOfAdditionalRestrictionEnzymes_.data(), szLine, soAllowedBases.data() ); continue; } aRestrictionEnzymes_.insert( pEnz ); } // while( fgets( szLine, SZLINE_SIZE, pFil ) != NULL ) { } // if ( pFil ) { aRestrictionEnzymes_.resort(); // now parse the list of commonly used restriction enzymes RWCTokenizer tokCommonEnzymes( pCP->soCommonRestrictionEnzymes_ ); RWCString soCommonEnzyme; while( !(soCommonEnzyme = tokCommonEnzymes()).isNull() ) { restrictionEnzyme res; res.soName_ = soCommonEnzyme; int nIndex = aRestrictionEnzymes_.nFindIndexOfMatchOrSuccessor( &res ); // cerr << soCommonEnzyme << " " << nIndex << endl; if ( nIndex != RW_NPOS && aRestrictionEnzymes_[ nIndex ]->soName_ != soCommonEnzyme ) { popupErrorMessage( "consed.commonRestrictionEnzymes: %s but %s was not found in complete list of enzymes, neither in Consed's internal list nor in the file supplied with consed.fileOfAdditionalRestrictionEnzymes (if any)", pCP->soCommonRestrictionEnzymes_.data(), soCommonEnzyme.data() ); continue; } aCommonRestrictionEnzymes_.insert( soCommonEnzyme ); } }