/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file BclDemultiplexer.hh ** ** \brief Top level library component to demultiplex a tile stack of bcl files. ** ** \author Roman Petrovski **/ #ifndef CASAVA_DEMULTIPLEX_BARCODE_TRANSLATION_TABLE_HH #define CASAVA_DEMULTIPLEX_BARCODE_TRANSLATION_TABLE_HH #include #include #include #include #include "common/Exceptions.hh" namespace casava { namespace demultiplex { namespace fs = boost::filesystem; namespace cc = casava::common; struct SamplePath : public fs::path { SamplePath(const std::string& sample, const std::string& barcode, const fs::path path) :fs::path(path), sample_(sample), barcode_(barcode){} // SamplePath(const SamplePath& that) // :fs::path(that), sample_(that.sample){} // SamplePath() // {} std::string sample_; std::string barcode_; }; class BarcodeTranslationTable : public std::vector { public: BarcodeTranslationTable( const fs::path &outputPath, const std::string & unknownBarcode, const std::vector &componentMaxMismatches, const std::vector &samples, const std::vector &barcodes, const std::vector &demuxDirNames) : std::vector(calculateDemuxOutputPaths( outputPath, unknownBarcode, samples, barcodes, demuxDirNames)) , unknownBarcodeOutDir_(begin()) , componentMaxMismatches_(componentMaxMismatches) , correctedBarcodeOutDir_(mapBarcodes(unknownBarcode, barcodes, begin() + 1)) { } struct CorrectedBarcodeIndex{ explicit CorrectedBarcodeIndex(uint32_t val) : index(val){} // uint32_t is required to number all the 2-error variants of 96 15-base barcodes // errors 0 1 2 3 4 5 // 15-base variants 1 61 3477 184281 9029769 406339605 // 96 samples variants 96 5856 333792 17690976 866857824 39008602080 uint32_t index; }; const CorrectedBarcodeIndex getUnknownBarcodeIndex() const { return CorrectedBarcodeIndex(0); } const CorrectedBarcodeIndex getCorrectedBarcodeIndex(const std::string& barcode) const { const std::pair p( std::equal_range(correctedBarcodeOutDir_.begin(), correctedBarcodeOutDir_.end(), BarcodeDirPair(barcode, unknownBarcodeOutDir_), barcodeLess)); // The first entry contains empty barcode pointing at "unknown" dir return (1 != p.second - p.first) ? getUnknownBarcodeIndex() : CorrectedBarcodeIndex(p.first - correctedBarcodeOutDir_.begin()); } size_type getOutDirIndex(const CorrectedBarcodeIndex &correctedBarcodeIndex) const { return correctedBarcodeOutDir_[correctedBarcodeIndex.index].second - begin(); } const std::string &getCorrectedBarcode(const CorrectedBarcodeIndex &correctedBarcodeIndex) const{ return correctedBarcodeOutDir_[correctedBarcodeIndex.index].first; } static const char barcodeComponentSeparator_ = '-'; static std::string removeComponentSeparator(const std::string &barcode); private: const const_iterator unknownBarcodeOutDir_; // one entry per barcode component const std::vector &componentMaxMismatches_; typedef std::pair BarcodeDirPair; typedef std::vector BarcodeOutDir; typedef std::map BarcodeOutDirMap; // Vector that maps barcode string to output path iterator. Ordered by the barcode string // The first entry is populated with empty barcode pointing at "unknown" dir const BarcodeOutDir correctedBarcodeOutDir_; static bool barcodeLess(const BarcodeDirPair &left, const BarcodeDirPair &right) { return left.first < right.first; } static const std::vector calculateDemuxOutputPaths( const fs::path &outputPath, const std::string &unknownBarcode, const std::vector &sampleNames, const std::vector &barcodes, const std::vector &demuxDirNames) { std::vector ret; std::vector::const_iterator itSample(sampleNames.begin()); std::vector::const_iterator itBarcode(barcodes.begin()); bool unknownBarcodeMappingFound(false); BOOST_FOREACH(const std::string &barcodeDir, demuxDirNames) { if (unknownBarcode == *itBarcode) { // Unknown barcode mapping must be the first element (see unknownBarcodeOutDir_ initialization) ret.insert(ret.begin(), SamplePath((*itSample++), (*itBarcode++), barcodeDir.empty() ? "" : outputPath / barcodeDir)); unknownBarcodeMappingFound = true; } else { ret.push_back(SamplePath((*itSample++), (*itBarcode++), barcodeDir.empty() ? "" : outputPath / barcodeDir)); } } if (!unknownBarcodeMappingFound) { // Mapping for unknown barcodes is required. BOOST_THROW_EXCEPTION(cc::InvalidParameterException("Mapping not provided for unknown barcodes.")); } return ret; } const BarcodeOutDir mapBarcodes(const std::string &unknownBarcode, const std::vector barcodes, const_iterator outDirPathIterator) const { BarcodeOutDirMap m; // kick the recursion off at the last barcode component and work backwards to the first one const unsigned startComponentIndex = componentMaxMismatches_.size() ? componentMaxMismatches_.size() - 1 : 0; BOOST_FOREACH(const std::string &barcode, barcodes) { if (unknownBarcode != barcode) { insertBarcode(barcode, startComponentIndex, barcode.length(), componentMaxMismatches_.at(startComponentIndex), outDirPathIterator++, m); } } // Corrected for unknown barcode at index 0, see getUnknownBarcodeIndex() BarcodeOutDir ret(1, BarcodeDirPair("", unknownBarcodeOutDir_)); ret.insert(ret.end(), m.begin(), m.end()); return ret; } void insertBarcode( const std::string &barcode, const unsigned currentComponentIndex, std::string::size_type pos, const unsigned int allowedMismatchesLeft, const_iterator outDirPathIterator, BarcodeOutDirMap &container) const; // Diagnostics friend std::ostream & operator <<(std::ostream &os, const BarcodeTranslationTable &btt); friend std::ostream & printStdPair(std::ostream &os, const BarcodeDirPair &kv); BarcodeTranslationTable(); BarcodeTranslationTable(const BarcodeTranslationTable &); BarcodeTranslationTable &operator=(const BarcodeTranslationTable &); }; } // namespave demultiplex } // namespace casava #endif // CASAVA_DEMULTIPLEX_BARCODE_TRANSLATION_TABLE_HH