// // sequenceUtils.cpp // BEDTools // // Created by Aaron Quinlan Spring 2009. // Copyright 2009 Aaron Quinlan. All rights reserved. // // Summary: Contains common functions for manipulating DNA sequences. // // Acknowledgment: I am grateful to Michael Stromberg for the code below to // reverse complement a sequence. #include "sequenceUtils.h" // Performs an in-place sequence reversal void reverseSequence(string &seq) { std::reverse(seq.begin(), seq.end()); } // Performs an in-place reverse complement conversion void reverseComplement(string &seq) { // reverse the sequence reverseSequence(seq); // swap the bases for(unsigned int i = 0; i < seq.length(); i++) { switch(seq[i]) { // nucleotides case 'A': seq[i] = 'T'; break; case 'C': seq[i] = 'G'; break; case 'G': seq[i] = 'C'; break; case 'T': seq[i] = 'A'; break; case 'a': seq[i] = 't'; break; case 'c': seq[i] = 'g'; break; case 'g': seq[i] = 'c'; break; case 't': seq[i] = 'a'; break; // unknown (N) case 'N': seq[i] = 'N'; break; case 'n': seq[i] = 'n'; break; // unknown (X) case 'X': seq[i] = 'X'; break; case 'x': seq[i] = 'x'; break; // IUPAC case 'Y': seq[i] = 'R'; break; case 'y': seq[i] = 'r'; break; case 'R': seq[i] = 'Y'; break; case 'r': seq[i] = 'y'; break; case 'S': seq[i] = 'S'; break; case 's': seq[i] = 's'; break; case 'W': seq[i] = 'W'; break; case 'w': seq[i] = 'w'; break; case 'K': seq[i] = 'M'; break; case 'k': seq[i] = 'm'; break; case 'M': seq[i] = 'K'; break; case 'm': seq[i] = 'k'; break; case 'B': seq[i] = 'V'; break; case 'b': seq[i] = 'v'; break; case 'V': seq[i] = 'B'; break; case 'v': seq[i] = 'b'; break; case 'D': seq[i] = 'H'; break; case 'd': seq[i] = 'h'; break; case 'H': seq[i] = 'D'; break; case 'h': seq[i] = 'd'; break; default: break; } } } void toLowerCase(std::string &seq) { const int length = seq.length(); for(int i=0; i < length; ++i) { seq[i] = std::tolower(seq[i]); } } void toUpperCase(std::string &seq) { const int length = seq.length(); for(int i=0; i < length; ++i) { seq[i] = std::toupper(seq[i]); } } void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other) { // swap the bases for(unsigned int i = 0; i < seq.length(); i++) { switch(seq[i]) { case 'A': case 'a': a++; break; case 'C': case 'c': c++; break; case 'G': case 'g': g++; break; case 'T': case 't': t++; break; case 'N': case 'n': n++; break; default: other++; break; } } } int countPattern(const string &seq, const string &pattern, bool ignoreCase) { string s = seq; string p = pattern; // standardize the seq and the pattern // if case should be ignored if (ignoreCase) { toUpperCase(s); toUpperCase(p); } int patternLength = p.size(); int patternCount = 0; for(unsigned int i = 0; i < s.length(); i++) { if (s.substr(i,patternLength) == p) { patternCount++; } } return patternCount; }