/**
>HEADER
Copyright (c) 2013 Rob Patro robp@cs.cmu.edu
This file is part of Sailfish.
Sailfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Sailfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Sailfish. If not, see .
#include
#include
#include
#include
#include
class TranscriptGeneMap {
using Index = size_t;
using Size = size_t;
using NameVector = std::vector;
using IndexVector = std::vector;
using IndexVectorList = std::vector>;
private:
NameVector _transcriptNames;
NameVector _geneNames;
IndexVector _transcriptsToGenes;
IndexVectorList _genesToTranscripts;
bool _haveReverseMap;
void _computeReverseMap() {
_genesToTranscripts.resize(_geneNames.size(), {});
Index transcriptID = 0;
size_t maxNumTrans = 0;
Index maxGene;
for (transcriptID = 0; transcriptID < _transcriptsToGenes.size();
++transcriptID) {
_genesToTranscripts[_transcriptsToGenes[transcriptID]].push_back(
transcriptID);
if (maxNumTrans <
_genesToTranscripts[_transcriptsToGenes[transcriptID]].size()) {
maxNumTrans =
_genesToTranscripts[_transcriptsToGenes[transcriptID]].size();
maxGene = _transcriptsToGenes[transcriptID];
}
}
std::cerr << "max # of transcripts in a gene was " << maxNumTrans
<< " in gene " << _geneNames[maxGene] << "\n";
}
friend class cereal::access;
template
void serialize(Archive& ar, const unsigned int version) {
ar(_transcriptNames, _geneNames, _transcriptsToGenes, _genesToTranscripts,
_haveReverseMap);
}
public:
TranscriptGeneMap()
: _transcriptNames(NameVector()), _geneNames(NameVector()),
_transcriptsToGenes(IndexVector()), _haveReverseMap(false) {}
TranscriptGeneMap(const NameVector& transcriptNames,
const NameVector& geneNames,
const IndexVector& transcriptsToGenes)
: _transcriptNames(transcriptNames), _geneNames(geneNames),
_transcriptsToGenes(transcriptsToGenes), _haveReverseMap(false) {}
TranscriptGeneMap(const TranscriptGeneMap& other) = default;
TranscriptGeneMap& operator=(const TranscriptGeneMap& other) = default;
Index INVALID{std::numeric_limits::max()};
Index findTranscriptID(const std::string& tname) {
using std::distance;
using std::lower_bound;
auto it =
lower_bound(_transcriptNames.begin(), _transcriptNames.end(), tname);
if (it == _transcriptNames.end() or *it != tname) {
return INVALID;
} else {
return distance(_transcriptNames.begin(), it);
}
}
Size numTranscripts() { return _transcriptNames.size(); }
Size numGenes() { return _geneNames.size(); }
bool needReverse() {
if (_haveReverseMap) {
return false;
} else {
_computeReverseMap();
return true;
}
}
const IndexVector& transcriptsForGene(Index geneID) {
return _genesToTranscripts[geneID];
}
inline std::string nameFromGeneID(Index geneID) { return _geneNames[geneID]; }
inline Index gene(Index transcriptID) {
return _transcriptsToGenes[transcriptID];
}
inline std::string geneName(Index transcriptID) {
return _geneNames[_transcriptsToGenes[transcriptID]];
}
inline std::string geneName(const std::string& transcriptName, bool& found) {
found = false;
auto tid = findTranscriptID(transcriptName);
if (tid != INVALID) {
found = true;
return geneName(tid);
} else {
found = false;
return transcriptName;
}
}
inline std::string transcriptName(Index transcriptID) {
return _transcriptNames[transcriptID];
}
};
#endif // TRANSCRIPT_GENE_MAP_HPP