// *************************************************************************** // FastaIndex.h (c) 2010 Erik Garrison // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- // Last modified: 5 February 2010 (EG) // --------------------------------------------------------------------------- #ifndef _FASTA_H #define _FASTA_H #include #include #include #include #include #include #include #include "LargeFileSupport.h" #include #include #include "split.h" #include #include #include using namespace std; class FastaIndexEntry { friend ostream& operator<<(ostream& output, const FastaIndexEntry& e); public: FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len, bool useFullHeader); FastaIndexEntry(void); ~FastaIndexEntry(void); string name; // sequence name int length; // length of sequence long long offset; // bytes offset of sequence from start of file int line_blen; // line length in bytes, sequence characters int line_len; // line length including newline bool useFullHeader; void clear(void); }; class FastaIndex : public map { friend ostream& operator<<(ostream& output, FastaIndex& i); public: FastaIndex(bool useFullHeader); ~FastaIndex(void); bool useFullHeader; vector sequenceNames; void indexReference(string refName); void readIndexFile(string fname); void writeIndexFile(string fname); ifstream indexFile; FastaIndexEntry entry(string key); bool chromFound(string name); void flushEntryToIndex(FastaIndexEntry& entry); string indexFileExtension(void); }; class FastaReference { public: void open(string reffilename, bool usemmap = false, bool useFullHeader = false); bool usingmmap; string filename; bool usingfullheader; FastaReference(void) : usingmmap(false), usingfullheader(false) { } ~FastaReference(void); FILE* file; void* filemm; size_t filesize; FastaIndex* index; vector findSequencesStartingWith(string seqnameStart); string getSequence(string seqname); // potentially useful for performance, investigate // void getSequence(string seqname, string& sequence); string getSubSequence(string seqname, int start, int length); string sequenceNameStartingWith(string seqnameStart); long unsigned int sequenceLength(string seqname); }; #endif