#ifndef OUR_GFA_READER_H #define OUR_GFA_READER_H #include "CLI/Timer.hpp" #include "Util.hpp" #include "cereal/types/string.hpp" #include "cereal/types/vector.hpp" //#include "sdsl/int_vector.hpp" #include "sparsepp/spp.h" #include "spdlog/spdlog.h" #include "compact_vector/compact_vector.hpp" #include "rank9sel.hpp" #include "string_view.hpp" #include "zstr/zstr.hpp" #include #include #include #include #include #include #include #include namespace pufferfish { class BinaryGFAReader { private: std::string filename_; std::unique_ptr file; size_t k; struct Contig { std::string seq; std::string id; }; pufferfish::util::PackedContigInfoVec contigid2seq; // path maps each transcript_id to a pair of // orientation : +/true main, -/false reverse spp::sparse_hash_map>> path; spp::sparse_hash_map refIDs; // spp::sparse_hash_map refMap; std::vector refMap; std::vector refLengths; uint64_t maxRefLength{0}; compact::vector seqVec_; compact::vector rankVec_; //edge table //ATGC|ATGC = 8 bits compact::vector edgeVec_; //predecessor,stores the same //transcript in reverse order //improve walkability //sdsl::int_vector<8> edgeVec2_; size_t fillContigInfoMap_(); // Avoiding un-necessary stream creation + replacing strings with string view // is a bit > than a 2x win! // implementation from : https://marcoarena.wordpress.com/tag/string_view/ std::vector split(stx::string_view str, char delims); bool buildEdgeVec_{false}; bool buildEqClses_{false}; std::shared_ptr logger_{nullptr}; std::unique_ptr> cpos_offsets{nullptr}; public: // spp::sparse_hash_map> std::vector contig2pos; BinaryGFAReader(const char* gfaFileName, size_t input_k, bool buildEqClses, bool buildEdgeVEc, std::shared_ptr logger); /*void encodeSeq(sdsl::int_vector<2>& seqVec, size_t offset, stx::string_view str); */ void encodeSeq(compact::vector& seqVec, size_t offset, stx::string_view str); //spp::sparse_hash_map& getContigNameMap(); pufferfish::util::PackedContigInfoVec& getContigNameMap(); std::vector& getRefIDs(); std::vector& getRefLengths(); compact::vector& getContigSeqVec(); compact::vector& getRankVec(); compact::vector& getEdgeVec(); void parseFile(); void mapContig2Pos(); void clearContigTable(); void serializeContigTable(const std::string& odir, const std::vector>& shortRefsNameLen, const std::vector& refIdExtensions); void deserializeContigTable(); // void writeFile(std::string fileName); }; } // end namespace pufferfish #endif