#ifndef __SALMON_UTILS_HPP__ #define __SALMON_UTILS_HPP__ extern "C" { #include "io_lib/os.h" #include "io_lib/scram.h" #undef min #undef max } #include #include #include #include #include #include #include #include #include #include #include #include "oneapi/tbb/task_arena.h" #include "cereal/archives/json.hpp" #include "spdlog/fmt/fmt.h" #include "SalmonMath.hpp" #include "SalmonOpts.hpp" #include "GenomicFeature.hpp" #include "LibraryFormat.hpp" #include "pufferfish/Util.hpp" #include "ReadLibrary.hpp" #include "SalmonConfig.hpp" #include "TranscriptGeneMap.hpp" template class ReadExperiment; class LibraryFormat; class FragmentLengthDistribution; class Transcript; namespace salmon { namespace utils { using std::string; using NameVector = std::vector; using IndexVector = std::vector; using KmerVector = std::vector; using MateStatus = pufferfish::util::MateStatus; // Keep track of the type of mapping that was obtained for this read enum class MappingType : uint8_t { UNMAPPED = 0, LEFT_ORPHAN = 1, RIGHT_ORPHAN = 2, BOTH_ORPHAN = 3, PAIRED_MAPPED = 4, SINGLE_MAPPED = 5, DECOY = 6 }; enum class DuplicateTargetStatus : uint8_t { UNKNOWN = 0, RETAINED_DUPLICATES = 1, REMOVED_DUPLICATES = 2 }; std::string str(const MappingType& mt); // To keep track of short fragments (shorter than the k-mer length) // on which the index was built. struct ShortFragStats { size_t numTooShort{0}; size_t shortest{std::numeric_limits::max()}; }; // An enum class for direction to avoid potential errors // with keeping everything as a bool enum class Direction { FORWARD = 0, REVERSE_COMPLEMENT = 1, REVERSE = 2 }; // Returns FORWARD if isFwd is true and REVERSE_COMPLEMENT otherwise constexpr inline Direction boolToDirection(bool isFwd) { return isFwd ? Direction::FORWARD : Direction::REVERSE_COMPLEMENT; } // Returns a uint64_t where the upper 32-bits // contain tid and the lower 32-bits contain offset uint64_t encode(uint64_t tid, uint64_t offset); // Given a uin64_t generated by encode(), return the // transcript id --- upper 32-bits uint32_t transcript(uint64_t enc); // Given a uin64_t generated by encode(), return the // offset --- lower 32-bits uint32_t offset(uint64_t enc); LibraryFormat parseLibraryFormatStringNew(std::string& fmt); std::vector extractReadLibraries(boost::program_options::parsed_options& orderedOptions); LibraryFormat parseLibraryFormatString(std::string& fmt); bool peekBAMIsPaired(const boost::filesystem::path& fname); size_t numberOfReadsInFastaFile(const std::string& fname); bool readKmerOrder(const std::string& fname, std::vector& kmers); template