/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include #include #include #include #include "consedResources.h" #include "filename.h" #include "rwcregexp.h" #include #include "assert.h" #include "fallbackConsedResources.h" #include "bIsNumericMaybeWithWhitespace.h" #include "bIsNumericDouble.h" #include #include "consed_version.h" consedResources* consedResources::pDefaultResources_ = NULL; consedResources* consedResources::pEnvVariableResources_ = NULL; consedResources* consedResources::pUsersHomeResources_ = NULL; consedResources* consedResources::pCurrentDirectoryResources_ = NULL; consedResources* consedResources::pCurrentResources_ = NULL; RWCString consedResources::soConsedHome_; #define PARSE_PANIC( szMessage ) \ { \ cerr << szMessage << endl \ << "Error in file " << soFilenameStatic << " at line " \ << nCurrentLineStatic << ". Line:\n" \ << soResourceLine << endl \ << "or:" << endl \ << szLine << endl \ << " consed resources error detected from source file " \ << __FILE__ << " at " << __LINE__ << endl \ << szConsedVersion << endl \ << ends; \ exit( EXIT_FAILURE ); \ } #define BAD_VALUE( szMessage, szValue ) \ { \ cerr << szMessage << endl \ << "Value: \"" << szValue << "\"" \ << "Error in file " << soFilenameStatic << " at line " \ << nCurrentLineStatic << ". Line (soResourceLine):\n" \ << soResourceLine << endl \ << "or (szLine):" << endl \ << szLine << endl \ << " source file: " \ << __FILE__ << " at " << __LINE__ << endl \ << szConsedVersion << endl \ << ends; \ exit( EXIT_FAILURE ); \ } #define SZLINE_SIZE 100000 char szLine[SZLINE_SIZE]; RWCString soFilenameStatic; int nCurrentLineStatic; void consedResources :: setConsedHome() { char* pConsedHome = getenv( "CONSED_HOME" ); if ( !pConsedHome ) { // cerr << "CONSED_HOME is not set--that is ok. Will use /usr/local/genome" << endl; soConsedHome_ = "/usr/local/genome"; } else { soConsedHome_ = pConsedHome; } } void consedResources :: getConsedResources( ) { setConsedHome(); // put the fallback resources in an instanciation of the resources object pDefaultResources_ = new consedResources(); loadDefaultResources(); // added 12/08 so doesn't print out resources just because // they contain $CONSED_HOME in the default pDefaultResources_->translateCONSED_HOMEForAllResources(); // if global variable is set, open that file and set the global resources pEnvVariableResources_ = new consedResources(); (*pEnvVariableResources_) = (*pDefaultResources_ ); loadEnvVariableResources(); // load resources from ~/.consedrc or ~/.Xdefaults (if the former doesn't // exist) pUsersHomeResources_ = new consedResources(); (*pUsersHomeResources_ ) = (*pEnvVariableResources_ ); loadUsersHomeResources(); // load resources from ./.consedrc pCurrentDirectoryResources_ = new consedResources(); (*pCurrentDirectoryResources_) = (*pUsersHomeResources_); loadCurrentDirectoryResources(); pCurrentResources_ = pCurrentDirectoryResources_; pCurrentResources_->translateCONSED_HOMEForAllResources(); pCurrentResources_->additionalChecksOnResources(); } // this is useful for doing checks that depend on combinations // of resources, rather than just one resource at a time void consedResources :: additionalChecksOnResources() { if ( ! ( nAutoFinishPotentialHighQualityPartOfReadStart_ < nAutoFinishPotentialHighQualityPartOfReadEnd_ ) ) { cerr << "Error: you must have consed.autoFinishPotentialHighQualityPartOfReadStart < consed.autoFinishPotentialHighQualityPartOfReadEnd but consed.autoFinishPotentialHighQualityPartOfReadStart = " << nAutoFinishPotentialHighQualityPartOfReadStart_ << " and consed.autoFinishPotentialLowQualityPartOfReadEnd = " << nAutoFinishPotentialHighQualityPartOfReadEnd_ << endl; exit( EXIT_FAILURE ); } if ( dAutoFinishRedundancy_ <= 0 ) { cerr << "Error: consed.autoFinishRedundancy must be > 0 but instead is " << dAutoFinishRedundancy_ << endl; exit( EXIT_FAILURE ); } if ( nAutoFinishCallHowManyReversesToFlankGaps_ <= 0 ) { cerr << "consed.autoFinishCallHowManyReversesToFlankGaps must be positive. If you want to turn off gap-flanking reverses, do it by using consed.autoFinishCallReversesToFlankGaps: false" << endl; cerr << "consed.autoFinishCallHowManyReversesToFlankGaps is used to determine how many fwd/rev pairs are required to be confident in a particular order/orientation of two contigs so clearly it must be at least 1." << endl; exit( EXIT_FAILURE ); } if ( nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ < 1 ) { cerr << "consed.autoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions must be at least 1" << endl; cerr << "if you are attempting to shut off custom primer subclone reactions, do it by using consed.autoFinishAllowCustomPrimerSubcloneReads: false" << endl; exit( EXIT_FAILURE ); } if ( bShowReadsAlphabetically_ && bShowReadsInAlignedReadsWindowOrderedByFile_ ) { cerr << "consed.showReadsAlphabetically and consed.showReadsInAlignedReadsWindowOrderGivenByFile must not both be true" << endl; exit( EXIT_FAILURE ); } if ( soAlwaysRunProgramToGetChromats_ != "true" && soAlwaysRunProgramToGetChromats_ != "false" && soAlwaysRunProgramToGetChromats_ != "last" ) { cerr << "consed.alwaysRunProgramToGetChromats was " << soAlwaysRunProgramToGetChromats_ << " but must be either true, false or last. last means that the program will first try to find the chromat in chromat_dir and then, if the read is a 454 read, will try to use sff2scf. If all of these fail, it will then try the program consed.programToRunToGetChromats" << endl; exit( EXIT_FAILURE ); } } void consedResources :: loadDefaultResources() { int n = 0; for( n = 0; aszFallbackConsedResources[ n ] != NULL; ++n ) { RWCString soResourceLine = aszFallbackConsedResources[n]; parseResourceLine( pDefaultResources_, soResourceLine, true, false ); } } void consedResources :: loadEnvVariableResources( ) { char* pConsedParameters = getenv( "CONSED_PARAMETERS" ); if ( !pConsedParameters ) { // cerr << "CONSED_PARAMETERS is not set--that is ok" << endl; return; } soFilenameStatic = pConsedParameters; FILE* pEnvVariableFile = fopen( pConsedParameters, "r" ); if ( !pEnvVariableFile ) { cerr << "Error: CONSED_PARAMETERS is set to " << pConsedParameters << " but couldn't open that file due to error " << errno << " " << strerror( errno ) << endl; exit( EXIT_FAILURE ); } nCurrentLineStatic = 0; bool bEndOfFile = false; do { if ( fgets( szLine, SZLINE_SIZE, pEnvVariableFile ) == NULL ) bEndOfFile = true; else { ++nCurrentLineStatic; parseResourceLine( pEnvVariableResources_, szLine, true, false ); } } while( !bEndOfFile ); fclose( pEnvVariableFile ); } void consedResources :: loadUsersHomeResources() { char* pHOME = getenv( "HOME" ); if ( !pHOME ) { cerr << "something is really screwed up because HOME is not set. Thus no user-defined resources can be set" << endl; return; } FileName filUserResources = pHOME; filUserResources += "/.consedrc"; bool bUsingConsedrc = true; if ( !filUserResources.bFileByThisNameExists() ) { bUsingConsedrc = false; filUserResources = pHOME; filUserResources += "/.Xdefaults"; // if neither file exists, just return if ( !filUserResources.bFileByThisNameExists() ) { cerr << "no ~/.consedrc file so no user resources will be used--that's ok" << endl; return; } } soFilenameStatic = filUserResources; FILE* pUserResourcesFile = fopen( (char*) filUserResources.data(), "r" ); if ( !pUserResourcesFile ) { cerr << "Error: couldn't open user resources file " << (char*) filUserResources.data() << " due to error " << errno << " " << strerror( errno ) << endl; exit( EXIT_FAILURE ); } // if we are parsing .Xdefaults, then allow resources from elsewhere // than consed bool bAllowResourcesNotFromConsed = ( bUsingConsedrc ? false : true ); nCurrentLineStatic = 0; bool bEndOfFile = false; do { if ( fgets( szLine, SZLINE_SIZE, pUserResourcesFile ) == NULL ) bEndOfFile = true; else { ++nCurrentLineStatic; parseResourceLine( pUsersHomeResources_, szLine, true, bAllowResourcesNotFromConsed ); } } while( !bEndOfFile ); fclose( pUserResourcesFile ); } void consedResources :: loadCurrentDirectoryResources() { const int nBigNumber = 500; char szCurrentDirectory[ nBigNumber+10 ]; getcwd( szCurrentDirectory, nBigNumber ); FileName filCurrentDirectoryResourcesFile = szCurrentDirectory; filCurrentDirectoryResourcesFile += "/.consedrc"; if ( !filCurrentDirectoryResourcesFile.bFileByThisNameExists() ) { cerr << "no ./.consedrc file so no project-specific resources--that's ok" << endl; return; } soFilenameStatic = filCurrentDirectoryResourcesFile; FILE* pCurrentDirectoryResourcesFile = fopen( (char*) filCurrentDirectoryResourcesFile.data(), "r" ); if ( !pCurrentDirectoryResourcesFile ) { cerr << "Error: couldn't open current directory resources file " << (char*) filCurrentDirectoryResourcesFile.data() << " due to error " << errno << " " << strerror( errno ) << endl; exit( EXIT_FAILURE ); } nCurrentLineStatic = 0; bool bEndOfFile = false; do { if ( fgets( szLine, SZLINE_SIZE, pCurrentDirectoryResourcesFile ) == NULL ) bEndOfFile = true; else { ++nCurrentLineStatic; parseResourceLine( pCurrentDirectoryResources_, szLine, true, // allow whitespace and comments false ); // allow resources other than consed } } while( !bEndOfFile ); fclose( pCurrentDirectoryResourcesFile ); } void consedResources :: parseResourceLine( consedResources* pRes, const RWCString& soResourceLine, const bool bAllowWhitespaceAndComments, const bool bAllowResourcesOtherThanConsed ) { if ( bAllowWhitespaceAndComments ) { if ( soResourceLine.length() == 0 ) return; if ( soResourceLine[0] == '!' ) return; // check if entire line is whitespace RWCRegexp regWhitespace( "^[ \t\n\r]*$" ); if ( soResourceLine( regWhitespace ) == soResourceLine ) return; } int nTemp = soResourceLine.index( "consed."); if ( nTemp == RW_NPOS ) { if ( bAllowResourcesOtherThanConsed ) return; else PARSE_PANIC( "line doesn't begin with consed." ) } int nColon = soResourceLine.index( ":" ); if ( nColon == RW_NPOS ) PARSE_PANIC( "resource line doesn't have \":\" in it" ) // if reached here, line starts with consed.xxxx: RWCString soResourceName = soResourceLine(0, nColon ); RWCString soResourceValue; bool bThereIsAResourceValue = true; if ( nColon + 1 >= soResourceLine.length() ) bThereIsAResourceValue = false; else { soResourceValue = (soResourceLine.soGetRestOfString( nColon + 1 )).stripWhitespace( RWCString::BOTH ); if (soResourceValue.isNull() ) bThereIsAResourceValue = false; } if ( !bThereIsAResourceValue ) { // resource is not set, which normally indicates an error // Put all resources here for which it is ok to have no default value. if ( soResourceName.index( "consed.tagColorCustomTag" ) == RW_NPOS && soResourceName.index( "consed.customTag" ) == RW_NPOS && soResourceName.index( "consed.tagColorCustomConsensusTag" ) == RW_NPOS && soResourceName.index( "consed.customConsensusTag" ) == RW_NPOS && soResourceName.index( "consed.fileOfAdditionalRestrictionEnzymes" ) == RW_NPOS && soResourceName.index( "consed.defaultVectorPathnameForRestrictionFragments" ) == RW_NPOS && soResourceName.index( "consed.fileOfTagTypes" ) == RW_NPOS && soResourceName.index( "consed.dumpContigOrderAndOrientationInfoToThisFile" ) == RW_NPOS && soResourceName.index( "consed.assemblyViewOnlyShowSequenceMatchesToThisContig" ) == RW_NPOS && soResourceName.index( "consed.userWantsToSaveToThisAceFile" ) == RW_NPOS && soResourceName.index( "consed.autoReportPrintDiscrepantRegionsButIgnoreReadsContainingThis" ) == RW_NPOS ) { PARSE_PANIC( "resource line has consed.xxxx: with nothing following it" ) } else // don't set anything since no value return; } if ( soResourceName == "consed.consensusTagColorRepeat" ) { pRes->soTagColorRepeat_ = soResourceValue; return; } if ( soResourceName == "consed.nMaximumNumberOfTracesShown" ) { if ( !bIsNumericMaybeWithWhitespace( soResourceValue, pRes->nMaximumNumberOfTracesShown_ ) ) BAD_VALUE( "value for resource consed.nMaximumNumberOfTracesShown must be numeric", soResourceValue ); return; } if ( soResourceName == "consed.showProteinTranslation" ) { soResourceValue.toLower(); if ( soResourceValue == "true" ) pRes->bShowProteinTranslation_ = true; else if ( soResourceValue == "false" ) pRes->bShowProteinTranslation_ = false; else BAD_VALUE( "value for resource consed.showProteinTranslation must be either true or false", soResourceValue ) return; } if ( soResourceName == "consed.primersMinInsertSizeOfASubclone" ) { cerr << "Warning: resource consed.primersMinInsertSizeOfASubclone is no longer used. Instead, use consed.autoFinishIfNotEnoughFwdRevPairUseThisPerCentOfInsertSize for the same effect." << endl; return; } // this is for compatibility with the old name for this parameter if ( soResourceName == "consed.autoFinishCostOfUniversalPrimerSubcloneReaction" ) { cerr << "Warning: change resource name in .consedrc file from old name: \nconsed.autoFinishCostOfUniversalPrimerSubcloneReaction\nto new name\nconsed.autoFinishCostOfResequencingUniversalPrimerSubcloneReaction" << endl; if ( !bIsNumericDouble( soResourceValue, pRes->dAutoFinishCostOfResequencingUniversalPrimerSubcloneReaction_ ) ) BAD_VALUE( "value for resource consed.autoFinishCostOfUniversalPrimerSubcloneReaction must be a decimal number", soResourceValue ) return; } if ( soResourceName == "consed.primersALittleLessThanAverageInsertSizeOfASubclone" ) { cerr << "Warning: consed.primersALittleLessThanAverageInsertSizeOfASubclone is no longer used. Instead, use consed.autoFinishIfNotEnoughFwdRevPairUseThisPerCentOfInsertSize for the same effect." << endl; return; } if ( soResourceName == "consed.autoFinishCreateExpSummaryFiles" ) { cerr << "Warning: consed.autoFinishCreateExpSummaryFiles is no longer used." << endl; return; } if ( soResourceName == "consed.autoFinishMinNumberOfForwardReversePairsToCalculateAverageInsertSize" ) { cerr << "Warning: consed.autoFinishMinNumberOfForwardReversePairsToCalculateAverageInsertSize is not longer used. It is superceded by consed.autoFinishMinNumberOfForwardReversePairsInLibraryToCalculateAverageInsertSize" << endl; return; } if ( soResourceName == "consed.autoFinishIfNotEnoughFwdRevPairUseThisPerCentOfInsertSize" ) { cerr << "Warning: consed.autoFinishIfNotEnoughFwdRevPairUseThisPerCentOfInsertSize superceded by consed.autoFinishIfNotEnoughFwdRevPairsUseThisPerCentOfInsertSize (just changed Pair to Pairs)" << endl; exit( 1 ); } if ( soResourceName == "consed.autoFinishNumberOfGapClosingReadsPerContigEnd" ) { cerr << "Warning: consed.autoFinishNumberOfGapClosingReadsPerContigEnd is no longer used." << endl; return; } if ( soResourceName == "consed.autoFinishTryHarderToSuggestExperimentsToCoverLowQualityRegions" ) { cerr << "Warning: consed.autoFinishTryHarderToSuggestExperimentsToCoverLowQualityRegions is no longer used." << endl; return; } if ( soResourceName == "consed.autoFinishLookForRepeatedForwardUniversalPrimerReadThisFarAway" ) { cerr << "Warning: consed.autoFinishLookForRepeatedForwardUniversalPrimerReadThisFarAway is no longer used." << endl; return; } if ( soResourceName == "consed.autoFinishExcludeContigIfDepthOfCoverageOutOfLine" ) { cerr << "Warning: consed.autoFinishExcludeContigIfDepthOfCoverageOutOfLine is no longer used and is superceded by consed.autoFinishExcludeContigIfDepthOfCoverageGreaterThanThis" << endl; return; } if ( soResourceName == "consed.autoFinishExcludeContigIfDepthOfCoverageThisMuchMoreThanLargestContig" ) { cerr << "Warning: consed.autoFinishExcludeContigIfDepthOfCoverageThisMuchMoreThanLargestContig is no longer used and is superceded by consed.autoFinishExcludeContigIfDepthOfCoverageGreaterThanThis" << endl; return; } if ( soResourceName == "consed.autoFinishModelReadLengthForPoorQualityProjects" ) { cerr << "Warning: consed.autoFinishModelReadLengthForPoorQualityProjects is no longer used" << endl; return; } if ( soResourceName == "consed.autoFinishExcludeContigIfTooShort" ) { cerr << "Warning: consed.autoFinishExcludeContigIfTooShort is no longer used. Instead use consed.autoFinishExcludeContigIfThisManyBasesOrLess which can even be 0" << endl; return; } if ( soResourceName == "consed.showReadsSortedByQualityValuesAtCursor" ) { cerr << "Warning: consed.showReadsSortedByQualityValuesAtCursor is no longer used. Instead use consed.showReadsAtCursorSortedHow: which can take values quality, base, or none. Note that currently this only applies when the cursor is set on the consensus position. When scrolling, the reads are sorted according to consed.showReadsAlphabetically and consed.showReadsInAlignedReadsWindowOrderedByFile" << endl; return; } if ( soResourceName == "consed.addNewReadsExtendConsensusUsingProtrudingNewReads" ) { PARSE_PANIC( "FATAL: consed.addNewReadsExtendConsensusUsingProtrudingNewReads is no longer used. Instead, add new reads without extending and then use consed -ace (ace file) -fixContigEnds" ); } if ( soResourceName == "consed.addNewReadsExtendConsensusScript" ) { cerr << "Warning: consed.addNewReadsExtendConsensusScript is no longer used. If you are trying to extend the consensus, use consed (ace file) -fixContigEnds" << endl; return; }