/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file BclDemultiplexer.cpp ** ** \brief Top level library component to demultiplex a tile stack of bcl files. ** ** \author Roman Petrovski **/ #include #include #include #include #include #include "common/Exceptions.hh" #include "alignment/BclReader.hh" #include "demultiplex/BclDemultiplexer.hh" namespace casava { namespace demultiplex { namespace ca=casava::alignment; std::string formatWithColon(const SamplePath &path, const unsigned int val) { return path.string() + ":" + path.barcode_ + ":" + boost::lexical_cast(val); } /** * @brief Produces byte array containing index of barcodeDirectories_ entry for each cluster in the tile */ const BclDemultiplexer::ClusterCorrectedBarcodeIndex BclDemultiplexer::mapClusterBarcodes(const unsigned int tile) const { BclPaths barcodeFiles(bclFilePaths(tile, (!barcodeCycles_.empty() ? barcodeCycles_ : std::vector(1, readCycles_[0][0])) )); ca::BclReader reader(barcodeFiles); unsigned int clustersToProcess(reader.getClusterCount()); ClusterCorrectedBarcodeIndex ret(clustersToProcess, barcodeDirectories_.size()); if(barcodeCycles_.empty()) { // not a significant speed improvement, but avoids confusing non-multiplexed user with // demultiplexing debug output. std::fill_n(std::back_inserter(ret.index_), clustersToProcess, barcodeDirectories_.getUnknownBarcodeIndex()); } else { std::cerr << "Barcode BCLs:\n"; std::transform(barcodeFiles.begin(), barcodeFiles.end(), std::ostream_iterator(std::cerr, "\n"), boost::bind(&fs::path::string, _1)); std::string bases, quality; while(clustersToProcess--) { reader.getCluster(bases, quality); const BarcodeTranslationTable::CorrectedBarcodeIndex index(barcodeDirectories_.getCorrectedBarcodeIndex(bases)); ret.index_.push_back(index); ++ret.counts_[barcodeDirectories_.getOutDirIndex(index)]; } std::cerr << "mapClusterBarcodes: barcode/clusters:\n"; std::transform(barcodeDirectories_.begin(), barcodeDirectories_.end(), ret.counts_.begin(), std::ostream_iterator(std::cerr, "\n"), boost::bind(&formatWithColon, _1, _2)); } return ret; } /** * @brief Base template for various writers instantiation. There is no default implementation. * Each writer has a specialization provided below * * @param WriterT type of the writer to instantiate * * @param samplePath path to the demultiplexer output folder for the barcode * @param outputName context-specific name to form the output paths. Supplied by the caller * of demultiplex * @param expectedClusters number of clusters expected to be written through this writer * @param pathsHelper path formatting helper object */ template boost::shared_ptr BclDemultiplexer::createWriter(const SamplePath &samplePath, const unsigned int tile, const unsigned int expectedClusters, const std::string &outputName) const { BOOST_STATIC_ASSERT(sizeof(WriterT) == 0 && "Only specializations of this method are allowed to instantiate"); } template<> boost::shared_ptr BclDemultiplexer::createWriter( const SamplePath &samplePath, const unsigned int , const unsigned int expectedClusters, const std::string &filtersFileName) const { return boost::shared_ptr( new cio::FiltersWriter(pathsHelper_.formatFiltersFilePath(samplePath, filtersFileName), compressor_, expectedClusters)); } template<> boost::shared_ptr BclDemultiplexer::createWriter( const SamplePath &samplePath, const unsigned int , const unsigned int , const std::string &positionsFileName) const { return boost::shared_ptr( new cio::PositionsWriterText(pathsHelper_.formatPositionsFilePath(samplePath, positionsFileName), compressor_)); //new cio::PositionsWriterText(pathsHelper_.formatPositionsPath(samplePath) / posFileName, compressor_, expectedClusters)); } template<> boost::shared_ptr BclDemultiplexer::createWriter( const SamplePath &samplePath, const unsigned int tile, const unsigned int expectedClusters, const std::string &cycleDirName) const { return boost::shared_ptr( new CountingBclWriter( pathsHelper_.formatLanePath(samplePath) / cycleDirName / pathsHelper_.formatBclFileName(tile), expectedClusters)); } template<> boost::shared_ptr BclDemultiplexer::createWriter( const SamplePath &samplePath, const unsigned int , const unsigned int , const std::string &demuxIndexFileName) const { return boost::shared_ptr( new ClusterIndexWriter(pathsHelper_.formatDemuxIndexPath(samplePath)/ demuxIndexFileName, compressor_)); } /** * @brief Creates array of writers, reader and performs cluster-by-cluster transfer of. * data between the reader and writers depending on the barcode mapping available through * clusterBarcodeIndex_ * * @param WriterT Type for createWriter<> * @param ReaderT Type of the reader * @param ConverterT Type that performs conversion between record from reader and the record * required by the writer * * @clusterBarcodeIndex mapping between cluster numbers and indexes in error-corrected barcodes * @param reader Reader to use * @param outputName Context-specific name to form the output paths. Passed to the * creaderReader. * @param converter object performing record conversion. * * @return Vector of closed writers in the order corresponding to that of the barcodes. */ template const std::vector > BclDemultiplexer::demultiplex(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, ReaderT &reader, const std::string &outputName, const ConverterT &converter) const { std::vector > writers; std::transform(barcodeDirectories_.begin(), barcodeDirectories_.end(), clusterBarcodeIndex.counts_.begin(), std::back_inserter(writers), boost::bind(&BclDemultiplexer::createWriter, this, _1, tile, _2, outputName)); demultiplex(clusterBarcodeIndex, reader, writers, converter); std::for_each(writers.begin(), writers.end(), boost::bind(&WriterT::close, _1)); return writers; } template const std::vector > BclDemultiplexer::demultiplex(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, ReaderT &reader, const std::string &outputName) const { return demultiplex(clusterBarcodeIndex, tile, reader, outputName, boost::bind(&boost::cref, _1)); } /** * @brief Saves demultiplexed stats file * * @param intensitiesPath path to target dataset intensities folder * @param cycle 1 - based cycle to demultiplex * @param writerPtr writer that was used to store the Bcl file * @param originalStats data from the original stats file * * @return path to the saved file */ const std::string BclDemultiplexer::createStatsFile(const fs::path &samplePath, const unsigned int tile, const unsigned int cycle, boost::shared_ptr writerPtr, const cio::StatsFileRecord &originalStats) const { const std::string demultiplexedlStatsFilePath(pathsHelper_.formatStatsFilePath(samplePath, tile, cycle).string()); std::ofstream sos(demultiplexedlStatsFilePath.c_str(), std::ios_base::out | std::ios_base::binary); sos.exceptions(std::ofstream::failbit|std::ofstream::badbit); cio::StatsFileRecord demultiplexedStats(originalStats); demultiplexedStats.numA_ = writerPtr->getBaseCount('A'); demultiplexedStats.numC_ = writerPtr->getBaseCount('C'); demultiplexedStats.numG_ = writerPtr->getBaseCount('G'); demultiplexedStats.numT_ = writerPtr->getBaseCount('T'); demultiplexedStats.numX_ = writerPtr->getBaseCount('N'); if (!(sos << demultiplexedStats)) { BOOST_THROW_EXCEPTION(cc::IoException(errno, (boost::format("Unable to write data into %s") % demultiplexedlStatsFilePath).str())); } return demultiplexedlStatsFilePath; } /** * @brief Performs cycle-specific parameter setup for demultiplex and computes new stats files * * @param cycle 1 - based cycle number */ void BclDemultiplexer::demultiplexBclsCycle(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, const unsigned int cycle) const { // Demultiplex bcl file const fs::path originalBclFilePath(pathsHelper_.formatBclFilePath(intensitiesPath_, tile, cycle)); std::cerr << "demultiplexing: " << originalBclFilePath << "\n"; // std::for_each(barcodeDirectories_.begin(), barcodeDirectories_.end(), // boost::bind(&BclDemultiplexer::checkCreateCycleDirectory, this, _1, cycle)); ca::BclReader reader(std::vector(1, originalBclFilePath)); const std::vector > writers( demultiplex(clusterBarcodeIndex, tile, reader, pathsHelper_.formatCycleDirName(cycle))); // Write new stats file const std::string originalStatsFilePath(pathsHelper_.formatStatsFilePath(intensitiesPath_, tile, cycle).string()); std::cerr << "demultiplexing: " << originalStatsFilePath << "\n"; cio::StatsFileRecord originalStats; std::ifstream sis(originalStatsFilePath.c_str(), std::ios_base::in | std::ios_base::binary); if (!(sis >> originalStats)) { BOOST_THROW_EXCEPTION(cc::IoException(errno, (boost::format("Unable to read data from %s") % originalStatsFilePath).str())); } std::vector demultiplexedStatsFilePaths; std::transform(barcodeDirectories_.begin(), barcodeDirectories_.end(), writers.begin(), std::back_inserter(demultiplexedStatsFilePaths), //dummy string vector to make transform happy boost::bind(&BclDemultiplexer::createStatsFile, this, _1, tile, cycle, _2, boost::cref(originalStats))); } /** * @brief Demultiplexes single Filters file if it exists * * @param readNumber 1 - based read number. Used to compose the input and output file names * * @return nothing */ void BclDemultiplexer::demultiplexFiltersRead(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile, const unsigned int readNumber) const { const fs::path originalFiltersFilePath(pathsHelper_.formatFiltersFilePath(basecallsPath_, tile, readNumber)); std::cerr << "demultiplexing: " << originalFiltersFilePath << "\n"; ca::FiltersReader filtersReader(originalFiltersFilePath, false); demultiplex(clusterBarcodeIndex, tile, filtersReader, pathsHelper_.formatFiltersFileName(tile, readNumber)); } void BclDemultiplexer::demultiplexFiltersRead(const ClusterCorrectedBarcodeIndex &clusterBarcodeIndex, const unsigned int tile) const { const fs::path originalFiltersFilePath(pathsHelper_.formatFiltersFilePath(basecallsPath_, tile)); std::cerr << "demultiplexing: " << originalFiltersFilePath << "\n"; ca::FiltersReader filtersReader(originalFiltersFilePath, false); demultiplex(clusterBarcodeIndex, tile, filtersReader, pathsHelper_.formatFiltersFileName(tile)); } } // namespace demultiplex } // namespace casava