/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file BclDemultiplexer.hh ** ** \brief Top level library component to demultiplex a tile stack of bcl files. ** ** \author Roman Petrovski **/ #ifndef CASAVA_DEMULTIPLEX_BCL_DEMULTIPLEXER_HH #define CASAVA_DEMULTIPLEX_BCL_DEMULTIPLEXER_HH #include #include #include #include #include #include "io/StatsFileRecord.hh" #include "io/QvalReader.hh" #include "io/QvalWriter.hh" #include "io/FastqWriter.hh" #include "demultiplex/ClusterIndexWriter.hh" #include "demultiplex/CountingBclWriter.hh" #include "LanePaths.hh" #include "BarcodeTranslationTable.hh" namespace casava { namespace demultiplex { namespace fs = boost::filesystem; namespace cc = casava::common; namespace cio = casava::io; class BclDemultiplexer { public: BclDemultiplexer(const std::string &instrumentName, const unsigned int runNumber, const std::string &flowCellId, const unsigned int lane, const std::vector tiles, const std::vector > &readCycles, const std::vector &barcodeCycles, const BarcodeTranslationTable &barcodeDirectories, const fs::path &basecallsPath, const std::string &outputFormat, const unsigned int fastqClusterCount, const bool createMissingFolders, const bool createDemuxIndex, const fs::path &intensitiesPath, const fs::path &inputPositionsDir, const std::string &inputPositionsFileType, const fs::path &inputFilterDir, const cc::Compression &compressor) : instrumentName_(instrumentName) , runNumber_(runNumber) , flowCellId_(flowCellId) , lane_(lane) , tiles_(tiles) , pathsHelper_(lane, intensitiesPath, basecallsPath) , readCycles_(readCycles) , barcodeCycles_(barcodeCycles) , basecallsPath_(basecallsPath) , intensitiesPath_(intensitiesPath) , barcodeDirectories_(barcodeDirectories) , outputFormat_(outputFormat) , fastqClusterCount_(fastqClusterCount) , createMissingFolders_(createMissingFolders) , createDemuxIndex_(createDemuxIndex) , compressor_(compressor) , inputFilterDir_(inputFilterDir) , inputPositionsDir_(inputPositionsDir) , inputPositionsFileType_(inputPositionsFileType) // , clusterBarcodeIndex_(mapClusterBarcodes(tiles[0])) { } // void run() const; protected: const std::string instrumentName_; const unsigned int runNumber_; const std::string flowCellId_; const unsigned int lane_; const std::vector tiles_; const LanePaths pathsHelper_; const std::vector > readCycles_; const std::vector barcodeCycles_; typedef std::vector BclPaths; const fs::path basecallsPath_; const fs::path intensitiesPath_; const BarcodeTranslationTable &barcodeDirectories_; const std::string outputFormat_; const unsigned int fastqClusterCount_; const bool createMissingFolders_; const bool createDemuxIndex_; const cc::Compression &compressor_; const fs::path inputFilterDir_; const fs::path inputPositionsDir_; const std::string inputPositionsFileType_; struct ClusterCorrectedBarcodeIndex { ClusterCorrectedBarcodeIndex(const size_t clustersTotal, const size_t barcodesTotal) {index_.reserve(clustersTotal); counts_.resize(barcodesTotal);} // array pointing to barcode translation table information for each cluster std::vector index_; // array with number of clusters for each barcode (including unknown); std::vector counts_; }; const ClusterCorrectedBarcodeIndex mapClusterBarcodes(const unsigned int tile) const; const std::string createStatsFile(const fs::path &intensitiesPath, const unsigned int tile, const unsigned int cycle, boost::shared_ptr writerPtr, const cio::StatsFileRecord &originalStats) const; void demultiplexBclsCycles(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, const std::vector &cycles) const {std::for_each(cycles.begin(), cycles.end(), boost::bind(&BclDemultiplexer::demultiplexBclsCycle, this, clusterBarcodeIndex, tile, _1));} void demultiplexBclsCycle(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, const unsigned int cycle) const; void demultiplexFiltersRead(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, const unsigned int readNumber) const; void demultiplexFiltersRead(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile) const; template void demultiplex(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, ReaderT &reader, const std::vector > &writers, ConverterT &converter) const { unsigned int clustersToProcess(reader.getClusterCount()); if (clusterBarcodeIndex.index_.size() != clustersToProcess) { BOOST_THROW_EXCEPTION(cc::PreConditionException( (boost::format("Incorrect clusters count in file: %s. Expected: %i. Actual: %i") % reader.getDescription() % clusterBarcodeIndex.index_.size() % clustersToProcess).str())); } typename ReaderT::RecordType tmp; BOOST_FOREACH(const BarcodeTranslationTable::CorrectedBarcodeIndex &i, clusterBarcodeIndex.index_) { writers[barcodeDirectories_.getOutDirIndex(i)]->put(converter(reader.get(tmp))); } } template const std::vector > demultiplex(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, ReaderT &reader, const std::string &outputFileName, const ConverterT &converter) const; template const std::vector > demultiplex(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, ReaderT &reader, const std::string &outputName) const; BclPaths bclFilePaths(const unsigned int tile, const std::vector &cycles) const { BclPaths ret; std::transform(cycles.begin(), cycles.end(), std::back_inserter(ret), boost::bind(&LanePaths::formatBclFilePath, &pathsHelper_, tile, _1)); return ret; } template boost::shared_ptr createWriter(const SamplePath &samplePath, const unsigned int tile, const unsigned int expectedClusters, const std::string &outputName) const; BclDemultiplexer(); BclDemultiplexer(const BclDemultiplexer &); BclDemultiplexer &operator=(const BclDemultiplexer &); }; } // namespave demultiplex } // namespace casava #endif // CASAVA_DEMULTIPLEX_BCL_DEMULTIPLEXER_HH