/** ** Copyright (c) 2007-2010 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file lib/common/StringUtil.cpp ** ** \brief String utilities ** ** String utilities ** ** \author Lukasz Szajkowski **/ #include #include #include #include #include #include using namespace std; /* * Convert input string into vector of string tokens * strin string to be parsed * delims list of delimiters. */ std::vector tokenize_str(const std::string & strin, const std::string & delims=", \t") { if (strin.length() == 0) { fprintf(stderr, "tokenize_str: Error - empty string\n"); exit(-1); } if (delims.length() == 0) { fprintf(stderr, "tokenize_str: Error - empty delims\n"); exit(-1); } string::size_type lastPos = strin.find_first_not_of(delims, 0); string::size_type pos = strin.find_first_of(delims, lastPos); // output vector vector tokens; while (string::npos != pos || string::npos != lastPos) { std::string token = strin.substr(lastPos, pos - lastPos); tokens.push_back(token); lastPos = strin.find_first_not_of(delims, pos); pos = strin.find_first_of(delims, lastPos); } return tokens; } void fastTokenizer(const char* str, char delim, vector& tokenList) { tokenList.clear(); int start = 0; int i; for (i = 0; str[i]; i++) { if (str[i] == delim) { if (i > 0 && str[i - 1] != delim) { char* token = new char[i-start+1]; memcpy(token, &str[start], i-start); token[i-start] = '\0'; tokenList.push_back(token); } start = i + 1; } } if (start < i) { char* token = new char[i-start+1]; memcpy(token, &str[start], i-start); token[i-start] = '\0'; tokenList.push_back(token); } } void fastTokenizerInPlace(char* str, char delim, vector& tokenList) { tokenList.clear(); int start = -1; int i; int lastEnd = -1; for (i = 0; str[i]; i++) { if (str[i] == delim) { if (i > 0 && str[i - 1] != delim && i - 1 != lastEnd) { str[i] = '\0'; lastEnd = i; tokenList.push_back(&str[start+1]); } start = i; } } if (start + 1 < i) { str[i] = '\0'; tokenList.push_back(&str[start + 1]); } } /* * Replaces maching characters in the string with given new characters. * return number of characters replaced. */ int replaceTR(std::string & strin, const std::string & findChars, const std::string & replChars) { int length = strin.length(); int i, j; if (findChars.length() != replChars.length()) { return -1; } int lengthRepl = replChars.length(); int replCount = 0; for (i = 0; i < length; i++) { for (j = 0; j < lengthRepl; j++) { if (strin[i] == findChars[j]) { strin[i] = replChars[j]; replCount++; } } } return replCount; } /*****************************************************************************/ // Returns a ptr to a newly allocated C-string that is a copy of the supplied // string. char* copy_to_new_cstr(const std::string& str) { char* buf_ptr = new char[str.size() + 1]; if (!buf_ptr) { return buf_ptr; } strncpy(buf_ptr, str.c_str(), str.size()); *(buf_ptr + str.size()) = '\0'; return buf_ptr; } bool replace_suffix(const std::string& str, const std::string& old_suffix, const std::string& new_suffix, std::string& new_str) { const unsigned str_len(str.length()); const unsigned old_suffix_len(old_suffix.length()); if (str_len < (old_suffix_len + 1)) { return false; } if (str.compare(str_len - old_suffix_len, old_suffix_len, old_suffix) != 0) { return false; } new_str = (str.substr(0, str_len - old_suffix_len) + new_suffix); return true; } unsigned GetStringColCount(const char* str){ unsigned ncols(0); bool is_in_space(true); for(const char* cp(str);*cp != '\0';++cp){ if(isspace(*cp)){ if(! is_in_space){ is_in_space=true; } } else { if(is_in_space) { ncols++; is_in_space=false; } } } return ncols; } /*****************************************************************************/