/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file BclToFastqDemultiplexer.cpp ** ** \brief Top level library component to demultiplex a tile stack of bcl files. ** ** \author Roman Petrovski **/ #include #include #include #include #include #include "common/Exceptions.hh" #include "alignment/BclReader.hh" #include "demultiplex/BclToFastqDemultiplexer.hh" #include "demultiplex/MultiFileFastqWriter.hh" #include "demultiplex/CompositeBclReader.hh" #include "AdapterTrimmingRecordConverter.hh" #include "DemuxSummaryXmlWriter.hh" namespace casava { namespace demultiplex { namespace ca=casava::alignment; boost::shared_ptr > BclToFastqDemultiplexer::createMultiFileWriter( const SamplePath &samplePath, const unsigned int readNumber) const { boost::shared_ptr writer; if (!samplePath.empty()) { fs::path fastqPathTemplate(pathsHelper_.formatFastqPath(samplePath) / pathsHelper_.formatFastqFileName(samplePath.sample_, samplePath.barcode_, readNumber)); writer = boost::shared_ptr( new MultiFileFastqWriter(fastqPathTemplate, fastqClusterCount_, compressor_, true)); } else { writer = boost::shared_ptr(new MultiFileWriterThatDoesNothing); } return boost::shared_ptr >( new DemuxSummaryCollector(readNumber, lane_, samplePath.sample_, samplePath.barcode_, writer, withFailedReads_)); } boost::shared_ptr BclToFastqDemultiplexer::createReader( const unsigned int tile, const size_t readIndex, const std::vector &dataCycles) const { std::vector allCycles = dataCycles; allCycles.insert(allCycles.end(), barcodeCycles_.begin(), barcodeCycles_.end()); std::vector bclPaths = bclFilePaths(tile, allCycles); fs::path filtersPath = (filterPerRead_ ? pathsHelper_.formatFiltersFilePath(inputFilterDir_, tile, inputReadNumbers_[readIndex]) : pathsHelper_.formatFiltersFilePath(inputFilterDir_, tile) ); fs::path positionsPath = pathsHelper_.formatPositionsFilePath(inputPositionsDir_, tile, inputPositionsFileType_); // BIG ASSUMPTION IN LAST PARAMETER OF THE FOLLOWING CONSTRUCTOR: // 1 filter file per read => 16-bit filter file, // otherwise, it's an 8-bit filter file // (and, yes, this IS ignored for control files) bool controlsIncluded = filterPerRead_; fs::path controlsPath = pathsHelper_.formatControlsFilePath(inputFilterDir_, tile); if (needSeparateControls_ && (fs::exists(controlsPath) || !ignoreMissingCtrl_)) { return boost::shared_ptr( new CompositeBclReader( bclPaths, filtersPath, controlsPath, positionsPath, ignoreMissingBcl_, controlsIncluded ) ); } else { return boost::shared_ptr( new CompositeBclReader( bclPaths, filtersPath, positionsPath, ignoreMissingBcl_, controlsIncluded ) ); } } void BclToFastqDemultiplexer::run() const { // Initialize writers std::vector< std::vector< boost::shared_ptr > > > readWriters(readCycles_.size()); size_t readIndex = 0; BOOST_FOREACH(const unsigned int readNumber, outputReadNumbers_) { std::transform(barcodeDirectories_.begin(), barcodeDirectories_.end(), std::back_inserter(readWriters[readIndex++]), boost::bind(&BclToFastqDemultiplexer::createMultiFileWriter, this, _1, readNumber)); } DemuxSummaryXmlWriter summaryWriter(outputSummaryFilePath_); // Demultiplex BOOST_FOREACH(const unsigned int tile, tiles_) { ClusterCorrectedBarcodeIndex clusterBarcodeIndex(mapClusterBarcodes(tile)); size_t readIndex = 0; BOOST_FOREACH(const std::vector &cycles, readCycles_) { std::cerr << "Processing BCL files for lane " << lane_ << ", tile " << tile << ", read " << outputReadNumbers_[readIndex] << "\n"; boost::shared_ptr reader(createReader(tile,readIndex,cycles)); Bcl2FastqRecordConverter bcl2FastqConverter(instrumentName_, runNumber_, flowCellId_, lane_, tile, outputReadNumbers_[readIndex], withEamss_, !needSeparateControls_, barcodeCycles_.size()); if (readAdapterSequences_[readIndex].empty()) { demultiplex(clusterBarcodeIndex, *reader, readWriters[readIndex], bcl2FastqConverter); } else { AdapterTrimmingRecordConverter adapterTrimmingConverter(readAdapterSequences_[readIndex], barcodeCycles_.size(), bcl2FastqConverter); demultiplex(clusterBarcodeIndex, *reader, readWriters[readIndex], adapterTrimmingConverter); } ++readIndex; } } // Write summary BOOST_FOREACH(const std::vector > > &writers, readWriters ) { BOOST_FOREACH(const boost::shared_ptr > &writer, writers) { BOOST_FOREACH(const DemuxSummaryCollector::PairSummaryRawPf& p, *writer) { if (p.first.readNumber_) { summaryWriter.addTileSummary(p); } } writer->close(); } } summaryWriter.close(); } } // namespace demultiplex } // namespace casava