/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file DemuxSummaryCollector.hh ** ** \brief Summary collector declaration. ** ** \author Roman Petrovski **/ #ifndef CASAVA_DEMULTIPLEX_DEMUX_SUMMARY_COLLECTOR_HH #define CASAVA_DEMULTIPLEX_DEMUX_SUMMARY_COLLECTOR_HH #include #include "io/FastqWriter.hh" #include "DemuxSummaryRecord.hh" namespace casava { namespace demultiplex { /** * @brief Collects various demultiplexing statistics and produces an xml file at close(). * * The parent vector contains pairs of Raw/Pf summary records for each tile that had at least * one cluster. If the record read number is 0, it means there has not been a single cluster * reported for that tile. **/ template class DemuxSummaryCollector : boost::noncopyable, public std::vector > { public: DemuxSummaryCollector(const unsigned int readNumber, const unsigned int lane, const std::string &sample, const std::string &barcode, const boost::shared_ptr writer, const bool withFailedReads) : readNumber_(readNumber) , lane_(lane) , sample_(sample) , barcodeWithoutSeparators_(BarcodeTranslationTable::removeComponentSeparator(barcode)) , barcode_(barcode) , writer_(writer) , withFailedReads_(withFailedReads) { } void put(const cio::FastqWriter::RecordType &read){ if (withFailedReads_ || read.filter_) { writer_->put(read); updateStats(read); } } void close() { writer_->close(); } typedef std::pair PairSummaryRawPf; private: unsigned int readNumber_; unsigned int lane_; std::string sample_; const std::string barcodeWithoutSeparators_; const std::string barcode_; const fs::path summaryFilePath_; const boost::shared_ptr writer_; const bool withFailedReads_; void updateTileStat(DemuxSummaryRecord &summary, const cio::FastqWriter::RecordType &read) { summary.yield_ += read.cluster_.first.length(); summary.yiedQ30_ += std::count_if(read.cluster_.second.begin(), read.cluster_.second.end(), boost::bind(&boost::cref, _1) >= (33u + 30u)); ++summary.clustersTotal_; if (!barcodeWithoutSeparators_.empty()) { const std::pair mm( std::mismatch(barcodeWithoutSeparators_.begin(), barcodeWithoutSeparators_.end(), read.barcode_.begin())); if (barcodeWithoutSeparators_.end() == mm.first) { ++summary.clusters0MismatchBarcode_; } else { const std::pair mm2( std::mismatch(mm.first+1, barcodeWithoutSeparators_.end(), mm.second+1)); summary.clusters1MismatchBarcode_ += (barcodeWithoutSeparators_.end() == mm2.first); } } summary.qualityScoreSum_ += std::accumulate(read.cluster_.second.begin(), read.cluster_.second.end(), 0) - read.cluster_.second.length() * 33u; } void updateStats(const cio::FastqWriter::RecordType &read) { resize(std::max(size_t(read.tile_ + 1), size())); PairSummaryRawPf &p(operator[](read.tile_)); if (!p.first.readNumber_) { //uninitialized record p.first = p.second = DemuxSummaryRecord(readNumber_, lane_, read.tile_, sample_, barcode_); } updateTileStat(p.first, read); if (0x01 & read.filter_) { updateTileStat(p.second, read); } } }; } // namespace demultiplex } // namespace casava #endif // CASAVA_DEMULTIPLEX_DEMUX_SUMMARY_COLLECTOR_HH