/* * Tokenizer.h * * Created on: Apr 15, 2014 * Author: nek3d */ #ifndef TOKENIZER_H_ #define TOKENIZER_H_ #include "string.h" #include #include using namespace std; class Tokenizer { public: Tokenizer(); ~Tokenizer(); // If you know the expected number of items, set this. // If not, don't worry about it. void setNumExpectedItems(int val); int tokenize(const string &str, char delimiter = '\t', bool eofHit = false, bool isCompressed = true); // If the final element ends before a delim char, that means // the buffer passed in ends mid-element. The last, incomplete // element found can either be: // 1) Used now. We want it whether it's complete or not. // 3) Ignored altogether. typedef enum { USE_NOW, IGNORE } lastElemCode; void setKeepFinalIncompleteElem(lastElemCode code); int getNumTotalElems() const { return (int)_elems.size(); } const string getElem(int i) const { return (_elems[i]); } int getNumFields(const string &str, char delimiter); private: static const int DEFAULT_PARSE_BUFFER_SIZE = 4096; // 8Kb static const int INITIAL_NUM_ELEMS = 10; vector _elems; int _numExpectedElems; lastElemCode _keepFinalIncElem; string fetchElem(int idx); void resize(int newSize); }; #endif /* TOKENIZER_H_ */