/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** ** \file MultiFileFastqWriter.hh ** ** \brief FastqWriter with limited number of clusters per output file. ** ** \author Roman Petrovski **/ #ifndef CASAVA_DEMULTIPLEX_MULTI_FILE_FASTQ_WRITER_HH #define CASAVA_DEMULTIPLEX_MULTI_FILE_FASTQ_WRITER_HH #include #include "io/FastqWriter.hh" namespace casava { namespace demultiplex { namespace fs = boost::filesystem; namespace cc = casava::common; namespace cio = casava::io; /** ** @brief Presumably splits output into multiple files each one containing no more than ** specified number of clusters. **/ class MultiFileWriter { public: virtual ~MultiFileWriter(){} virtual void put(const cio::FastqWriter::RecordType &read) = 0; virtual void close() = 0; }; /** ** @brief Does nothing. **/ class MultiFileWriterThatDoesNothing : public MultiFileWriter { virtual void put(const cio::FastqWriter::RecordType &){} virtual void close(){} }; /** ** @brief Splits output into multiple fastq files each one containing no more than ** specified number of clusters. **/ class MultiFileFastqWriter : boost::noncopyable, public MultiFileWriter { public: MultiFileFastqWriter(const fs::path &filePathTemplate, const unsigned int maxClustersPerFile, const cc::Compression &compressor, const bool overwrite) : compressor_(compressor) , overwrite_(overwrite) , filePathTemplate_(filePathTemplate.string()) , maxClustersPerFile_(maxClustersPerFile) , currentSet_(0) , currentCluster_(0) { // Make sure empty file gets created regarless of whether anything is written. // This is required when demultiplexer is executed from makefiles. Otherwise, repeated // invocation of make will start demultiplexer again. createNewFile(overwrite_); } private: virtual void put(const cio::FastqWriter::RecordType &read){ if (currentCluster_ && !(currentCluster_ % maxClustersPerFile_)) { createNewFile(overwrite_); } ++currentCluster_; currentWriter_->put(read); } virtual void close() { if (currentWriter_) { currentWriter_->close(); } const fs::path tmpFile = formatFilePath(filePathTemplate_, 1, true).string() + compressor_.getFileNameExtension(); const fs::path finalFile = formatFilePath(filePathTemplate_, 1, false).string() + compressor_.getFileNameExtension(); fs::remove(finalFile); fs::rename(tmpFile, finalFile); } private: void createNewFile(const bool overwrite){ if (currentWriter_) { currentWriter_->close(); } ++currentSet_; currentWriter_ = boost::shared_ptr( new cio::FastqWriter(formatFilePath(filePathTemplate_, currentSet_, 1 == currentSet_), compressor_, overwrite)); } static fs::path formatFilePath(const std::string &filePathTemplate, const unsigned int setNumber, const bool useTmpName){ static const char *tmpExtension_ = ".tmp"; return (boost::format(filePathTemplate) % setNumber).str() + (useTmpName ? tmpExtension_ : ""); } const cc::Compression &compressor_; const bool overwrite_; const std::string filePathTemplate_; const unsigned int maxClustersPerFile_; unsigned int currentSet_; unsigned int currentCluster_; boost::shared_ptr currentWriter_; MultiFileFastqWriter(); }; } // namespace demultiplex } // namespace casava #endif // CASAVA_DEMULTIPLEX_MULTI_FILE_FASTQ_WRITER_HH