#ifndef __ALEVIN_UTILS_HPP__ #define __ALEVIN_UTILS_HPP__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "spdlog/spdlog.h" #include #include #include #include #include "cereal/archives/json.hpp" #include "metro/metrohash.h" #include "nonstd/optional.hpp" #include "AlevinOpts.hpp" #include "SingleCellProtocols.hpp" #include "BarcodeGroup.hpp" #include "SalmonDefaults.hpp" #include "SalmonConfig.hpp" #include "SalmonUtils.hpp" // #include "IndexHeader.hpp" #include "pufferfish/sparsepp/spp.h" namespace alevin{ namespace utils{ namespace apt = alevin::protocols; namespace bfs = boost::filesystem; constexpr uint32_t uint32_max = std::numeric_limits::max(); void getIndelNeighbors( const std::string& barcodeSeq, std::unordered_set& neighbors); void findNeighbors(size_t length, const std::string& barcode, std::unordered_set& neighbors); //template bool sequenceCheck(const std::string& barcode, //AlevinOpts& aopt, //std::mutex& iomutex, Sequence seq = Sequence::BARCODE); bool recoverBarcode(std::string& sequence); void readWhitelist(bfs::path& filePath, TrueBcsT& trueBarcodes); unsigned int hammingDistance(const std::string s1, const std::string s2); template bool processAlevinOpts(AlevinOpts& aopt, SalmonOpts& sopt, bool noTgMap, boost::program_options::variables_map& vm); template bool extractUMI(std::string& read, std::string& read2, ProtocolT& pt, std::string& umi); template std::string* getReadSequence(ProtocolT& pt, std::string& seq, std::string& seq2, std::string& subseq); template bool extractBarcode(std::string& read, std::string& read2, ProtocolT& pt, std::string& bc); template bool writeCmdInfo(boost::filesystem::path cmdInfoPath, OrderedOptionsT& orderedOptions) { std::ofstream os(cmdInfoPath.string()); cereal::JSONOutputArchive oa(os); oa(cereal::make_nvp("salmon_version", std::string(salmon::version))); for (auto& opt : orderedOptions.options) { if (opt.value.size() == 1) { oa(cereal::make_nvp(opt.string_key, opt.value.front())); } else { oa(cereal::make_nvp(opt.string_key, opt.value)); } } return true; } void getTxpToGeneMap(spp::sparse_hash_map& txpToGeneMap, spp::sparse_hash_map& geneIdxMap, const std::string& t2gFile, const std::string& refNamesFile, const std::string& refLengthFile, const std::string& headerFile, std::shared_ptr& jointLog, bool noTgMap); bool checkSetCoverage(std::vector>& tgroup, std::vector txps); void combinationUtil(std::vector& arr, int n, int r, int index, std::vector data, int i, std::vector>& comb); bool hasOneGene(const std::vector& txps, uint32_t& geneId, spp::sparse_hash_map& txpToGeneMap, const size_t numGenes); } } #endif // __ALEVIN_UTILS_HPP__