/***************************************************************************** nucBedMain.cpp (c) 2009 - Aaron Quinlan Hall Laboratory Department of Biochemistry and Molecular Genetics University of Virginia aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "nucBed.h" #include "version.h" using namespace std; // define our program name #define PROGRAM_NAME "bedtools nuc" // define our parameter checking macro #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) // function declarations void nuc_help(void); int nuc_main(int argc, char* argv[]) { // our configuration variables bool showHelp = false; // input files string fastaDbFile; string bedFile; string pattern; // checks for existence of parameters bool haveFastaDb = false; bool haveBed = false; bool printSeq = false; bool hasPattern = false; bool forceStrand = false; bool ignoreCase = false; bool useFullHeader = false; // check to see if we should print out some help if(argc <= 1) showHelp = true; for(int i = 1; i < argc; i++) { int parameterLength = (int)strlen(argv[i]); if((PARAMETER_CHECK("-h", 2, parameterLength)) || (PARAMETER_CHECK("--help", 5, parameterLength))) { showHelp = true; } } if(showHelp) nuc_help(); // do some parsing (all of these parameters require 2 strings) for(int i = 1; i < argc; i++) { int parameterLength = (int)strlen(argv[i]); if(PARAMETER_CHECK("-fi", 3, parameterLength)) { if ((i+1) < argc) { haveFastaDb = true; fastaDbFile = argv[i + 1]; i++; } } else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { if ((i+1) < argc) { haveBed = true; bedFile = argv[i + 1]; i++; } } else if(PARAMETER_CHECK("-seq", 4, parameterLength)) { printSeq = true; } else if(PARAMETER_CHECK("-s", 2, parameterLength)) { forceStrand = true; } else if(PARAMETER_CHECK("-C", 2, parameterLength)) { ignoreCase = true; } else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) { if ((i+1) < argc) { hasPattern = true; pattern = argv[i + 1]; i++; } } else if(PARAMETER_CHECK("-fullHeader", 11, parameterLength)) { useFullHeader = true; } else { cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; showHelp = true; } } if (!haveFastaDb || !haveBed) { showHelp = true; } if (!showHelp) { NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq, hasPattern, pattern, forceStrand, ignoreCase, useFullHeader); delete nuc; } else { nuc_help(); } return 0; } void nuc_help(void) { cerr << "\nTool: bedtools nuc (aka nucBed)" << endl; cerr << "Version: " << VERSION << "\n"; cerr << "Summary: Profiles the nucleotide content of intervals in a fasta file." << endl << endl; cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi -bed " << endl << endl; cerr << "Options: " << endl; cerr << "\t-fi\tInput FASTA file" << endl << endl; cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl; cerr << "\t-s\tProfile the sequence according to strand." << endl << endl; cerr << "\t-seq\tPrint the extracted sequence" << endl << endl; cerr << "\t-pattern\tReport the number of times a user-defined sequence" << endl; cerr << "\t\t\tis observed (case-sensitive)." << endl << endl; cerr << "\t-C\tIgnore case when matching -pattern. By defaulty, case matters." << endl << endl; cerr << "\t-fullHeader\tUse full fasta header." << endl; cerr << "\t\t- By default, only the word before the first space or tab " << "is used." << endl << endl; cerr << "Output format: " << endl; cerr << "\tThe following information will be reported after each BED entry:" << endl; cerr << "\t 1) %AT content" << endl; cerr << "\t 2) %GC content" << endl; cerr << "\t 3) Number of As observed" << endl; cerr << "\t 4) Number of Cs observed" << endl; cerr << "\t 5) Number of Gs observed" << endl; cerr << "\t 6) Number of Ts observed" << endl; cerr << "\t 7) Number of Ns observed" << endl; cerr << "\t 8) Number of other bases observed" << endl; cerr << "\t 9) The length of the explored sequence/interval." << endl; cerr << "\t 10) The seq. extracted from the FASTA file. (opt., if -seq is used)" << endl; cerr << "\t 11) The number of times a user's pattern was observed." << endl; cerr << "\t (opt., if -pattern is used.)" << endl << endl; // end the program here exit(1); }