/***************************************************************************** VectorOps.cpp (c) 2009 - Aaron Quinlan Hall Laboratory Department of Biochemistry and Molecular Genetics University of Virginia aaronquinlan@gmail.com Purpose: Container to perform various useful operations (e.g., min, max, count, distinct, etc.) on vectors. Licensed under the GNU General Public License 2.0 license. ******************************************************************************/ #include "VectorOps.h" // functor to convert a single string to a double double MakeDouble(const string &element) { std::istringstream i(element); double x; if (!(i >> x)) { cerr << "Error: Could not properly convert string to numeric (\"" + element + "\")" << endl; exit(1); } return x; } // functor to convert the abs() of single string to a double double MakeAbsDouble(const string &element) { std::istringstream i(element); double x; if (!(i >> x)) { cerr << "Error: Could not properly convert string to numeric (\"" + element + "\")" << endl; exit(1); } return fabs(x); } struct ValueGreaterThan { bool operator()( const vector< pair >::value_type& lhs, const vector< pair >::value_type& rhs ) const { return lhs.first > rhs.first; } }; struct ValueLessThan { bool operator()( const vector< pair >::value_type& lhs, const vector< pair >::value_type& rhs ) const { return lhs.first < rhs.first; } }; // Constructor VectorOps::VectorOps(const vector &vec) : _vecs(vec) , _vecd() , _size(vec.size()) , _delimStr(",") { _vecd.reserve(vec.size()); } // Destructor VectorOps::~VectorOps(void) { } double VectorOps::GetSum(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); return accumulate(_vecd.begin(), _vecd.end(), 0.0); } double VectorOps::GetMean(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); return accumulate(_vecd.begin(), _vecd.end(), 0.0) / _size; } double VectorOps::GetStddev(void) { double mean = GetMean(); // get the variance double totalVariance = 0.0; vector::const_iterator dIt = _vecd.begin(); vector::const_iterator dEnd = _vecd.end(); for (; dIt != dEnd; ++dIt) { totalVariance += pow((*dIt - mean),2); } double variance = totalVariance / _vecd.size(); return sqrt(variance); } double VectorOps::GetSstddev(void) { double mean = GetMean(); // get the variance double totalVariance = 0.0; vector::const_iterator dIt = _vecd.begin(); vector::const_iterator dEnd = _vecd.end(); for (; dIt != dEnd; ++dIt) { totalVariance += pow((*dIt - mean),2); } double variance = totalVariance / (_vecd.size() - 1); return sqrt(variance); } double VectorOps::GetMedian(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); double median = 0.0; sort(_vecd.begin(), _vecd.end()); int totalLines = _vecd.size(); if ((totalLines % 2) > 0) { long mid; mid = totalLines / 2; median = _vecd[mid]; } else { long midLow, midHigh; midLow = (totalLines / 2) - 1; midHigh = (totalLines / 2); median = (_vecd[midLow] + _vecd[midHigh]) / 2.0; } return median; } double VectorOps::GetMin(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); return *min_element( _vecd.begin(), _vecd.end() ); } double VectorOps::GetAbsMin(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeAbsDouble); return *min_element( _vecd.begin(), _vecd.end() ); } double VectorOps::GetMax(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); return *max_element( _vecd.begin(), _vecd.end() ); } double VectorOps::GetAbsMax(void) { // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeAbsDouble); return *max_element( _vecd.begin(), _vecd.end() ); } string VectorOps::GetMode(void) { string mode; int max_count = 0; int curr_count = 0; sort(_vecs.begin(), _vecs.end()); string prev = ""; vector::const_iterator it = _vecs.begin(); while (it != _vecs.end()) { if (*it != prev && prev != "") { if (curr_count > max_count) { max_count = curr_count; mode = prev; } curr_count = 1; } else { curr_count++; } prev = *it; ++it; } if (curr_count > max_count) mode = prev; return mode; } string VectorOps::GetAntiMode(void) { string antimode; int min_count = INT_MAX; int curr_count = 0; sort(_vecs.begin(), _vecs.end()); string prev = ""; vector::const_iterator it = _vecs.begin(); while (it != _vecs.end()) { if (*it != prev && prev != "") { if (curr_count < min_count) { min_count = curr_count; antimode = prev; } curr_count = 1; } else { curr_count++; } prev = *it; ++it; } if (curr_count < min_count) antimode = prev; return antimode; } uint32_t VectorOps::GetCount(void) { return _vecs.size(); } uint32_t VectorOps::GetCountDistinct(void) { sort( _vecs.begin(), _vecs.end() ); _vecs.erase( unique( _vecs.begin(), _vecs.end() ), _vecs.end() ); return _vecs.size(); } string VectorOps::GetCollapse(string delimiter) { ostringstream collapse; for( size_t i = 0; i < _vecs.size(); i++ ) { if (i>0) collapse << delimiter; collapse << _vecs[i]; } return collapse.str(); } string VectorOps::GetConcat(void) { ostringstream collapse; for( size_t i = 0; i < _vecs.size(); i++ ) collapse << _vecs[i]; return collapse.str(); } string VectorOps::GetDistinct(void) { ostringstream distinct; // remove duplicate entries from the vector // http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector sort( _vecs.begin(), _vecs.end() ); _vecs.erase( unique( _vecs.begin(), _vecs.end() ), _vecs.end() ); for( size_t i = 0; i < _vecs.size(); i++ ) { if (i>0) distinct << _delimStr; distinct << _vecs[i]; } return distinct.str(); } string VectorOps::GetDistinctSortNum(bool ascending) { ostringstream distinct; // remove duplicate entries from the vector // http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector // convert the vec of strings to a vec of doubles transform(_vecs.begin(), _vecs.end(), back_inserter(_vecd), MakeDouble); if (ascending) { sort( _vecd.begin(), _vecd.end(), less()); } else { sort( _vecd.begin(), _vecd.end(), greater()); } _vecd.erase( unique( _vecd.begin(), _vecd.end() ), _vecd.end() ); for( size_t i = 0; i < _vecd.size(); i++ ) { if (i>0) distinct << _delimStr; distinct << _vecd[i]; } return distinct.str(); } string VectorOps::GetFreqDesc(void) { // compute the frequency of each unique value map freqs; vector::const_iterator dIt = _vecs.begin(); vector::const_iterator dEnd = _vecs.end(); for (; dIt != dEnd; ++dIt) { freqs[*dIt]++; } // pair for the num times a values was // observed (1) and the value itself (2) pair freqPair; vector< pair > freqList; // create a list of pairs of all the observed values (second) // and their occurences (first) map::const_iterator mapIter = freqs.begin(); map::const_iterator mapEnd = freqs.end(); for(; mapIter != mapEnd; ++mapIter) freqList.push_back( make_pair(mapIter->second, mapIter->first) ); // sort the list of pairs in the requested order by the frequency // this will make the value that was observed least/most bubble to the top sort(freqList.begin(), freqList.end(), ValueGreaterThan()); // record all of the values and their frequencies. ostringstream buffer; vector< pair >::const_iterator iter = freqList.begin(); vector< pair >::const_iterator iterEnd = freqList.end(); for (; iter != iterEnd; ++iter) buffer << iter->second << ":" << iter->first << _delimStr; return buffer.str(); } string VectorOps::GetFreqAsc(void) { // compute the frequency of each unique value map freqs; vector::const_iterator dIt = _vecs.begin(); vector::const_iterator dEnd = _vecs.end(); for (; dIt != dEnd; ++dIt) { freqs[*dIt]++; } // pair for the num times a values was // observed (1) and the value itself (2) pair freqPair; vector< pair > freqList; // create a list of pairs of all the observed values (second) // and their occurences (first) map::const_iterator mapIter = freqs.begin(); map::const_iterator mapEnd = freqs.end(); for(; mapIter != mapEnd; ++mapIter) freqList.push_back( make_pair(mapIter->second, mapIter->first) ); // sort the list of pairs in the requested order by the frequency // this will make the value that was observed least/most bubble to the top sort(freqList.begin(), freqList.end(), ValueLessThan()); // record all of the values and their frequencies. ostringstream buffer; vector< pair >::const_iterator iter = freqList.begin(); vector< pair >::const_iterator iterEnd = freqList.end(); for (; iter != iterEnd; ++iter) buffer << iter->second << ":" << iter->first << _delimStr; return buffer.str(); } string VectorOps::GetFirst(void) { return _vecs[0]; } string VectorOps::GetLast(void) { return _vecs[_vecs.size() - 1]; }