/** ** Copyright (c) 2007-2009 Illumina, Inc. ** ** This source file is covered by the "Illumina Public Source License" ** agreement and bound by the terms therein. ** ** This file is part of the Off Line Base-caller (OLB) ** ** \file StatsToSignalMeansConverter.cpp ** ** Top level application to convert a set of stats files - and their associated ** filter - for a single tile into the corresponding SignalMeans files ** ** \author Come Raczy **/ #include #include #include #include #include #include "common/Exceptions.hh" #include "common/FastIo.hh" #include "alignment/BclReader.hh" #include "basecalling/StatsToSignalMeansConverter.hh" namespace casava { namespace basecalling { namespace cc = casava::common; StatsToSignalMeansConverter::StatsToSignalMeansConverter(unsigned int laneIdentifier, unsigned int repeatIdentifier, const std::vector &cycleList, const fs::path &inputDirectory, const std::string statsfileName, const fs::path &filterFilePath, const fs::path &signalMeansFilePath, const bool checkCycles, const bool ignoreMissingStats, const bool ctrlIncluded) : laneIdentifier_(laneIdentifier) , repeatIdentifier_(repeatIdentifier) , cycleList_(cycleList) , inputDirectory_(inputDirectory) , statsFileName_(statsfileName) , filterFilePath_(filterFilePath) , signalMeansFilePath_(signalMeansFilePath) , checkCycles_(checkCycles) , ignoreMissingStats_(ignoreMissingStats) , ctrlIncluded_(ctrlIncluded) { } unsigned int getNumberOfClusters(const fs::path filePath, const bool ctrlIncluded) { using boost::format; casava::alignment::FiltersReader fr(filePath, ctrlIncluded); return fr.getClusterCount(); } void StatsToSignalMeansConverter::run() { using boost::format; if (!fs::exists(inputDirectory_)) { const format message = format("Input directory %s does not exist") % inputDirectory_; BOOST_THROW_EXCEPTION(cc::IoException(ENOENT, message.str())); } const fs::path lanePath = inputDirectory_ / (format("L%03d") % laneIdentifier_).str(); if (!fs::exists(lanePath)) { const format message = format("Lane directory %s does not exist") % lanePath; BOOST_THROW_EXCEPTION(cc::IoException(ENOENT, message.str())); } std::ostringstream os; unsigned long yield = 0; unsigned int filtered = 0; for (std::vector::const_iterator cycle = cycleList_.begin(); cycleList_.end() != cycle; ++cycle) { const fs::path cyclePath = lanePath / (format("C%d.%d") % *cycle % repeatIdentifier_).str(); if (!fs::exists(cyclePath)) { const format message = format("cycle directory %s does not exist") % cyclePath; BOOST_THROW_EXCEPTION(cc::IoException(ENOENT, message.str())); } unsigned int currentFiltered = 0; const fs::path statsPath = cyclePath / statsFileName_; if (fs::exists(statsPath)) { std::ifstream is(statsPath.string().c_str()); if (!is) { const format message = format("couldn't open stats file %s: %s") % statsPath % strerror(errno); BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } os << laneIdentifier_; const int c = cc::readInteger<4>(is) + 1; if (checkCycles_ && (c < 0 || static_cast(c) != *cycle)) { const format message = format("unexpected cycle number in stats file %s: expected %d: got %d") % statsPath % *cycle % c; BOOST_THROW_EXCEPTION(cc::IoException(EINVAL, message.str())); } os << '\t' << (*cycle); //double dummy; cc::readDouble(is); // average cycle intensity is not used for (unsigned int i = 0; 8 > i; ++i) { // All_A All_C All_G All_T Call_A Call_C Call_G Call_T const double d = cc::readDouble(is); if (!is) { const format message = format("failed to read double value from stats file %s: %s") % statsPath % strerror(errno); BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } os << '\t' << d; } for (unsigned int i = 0; 5 > i; ++i) { // Num_A Num_C Num_G Num_T Num_X const int n = cc::readInteger<4>(is); if (!is) { const format message = format("failed to integer value from stats file %s: %s") % statsPath % strerror(errno); BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } if (0 > n) { const format message = format("found negative number of bases in stats file %s: %d") % statsPath % n; BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } os << '\t' << n; if (4 > i) { yield += n; } currentFiltered += n; } } else if (ignoreMissingStats_) { // stats file does not exist, but there is an option set to avoid blowing up const format message = format("Filling in zeros for missing stats file %s") % statsPath; std::cerr << message.str() << std::endl; const format iZero = format("\t%d") % 0; const format fZero = format("\t%2.1f") % 0.0; os << laneIdentifier_; os << '\t' << (*cycle); // All_A All_C All_G All_T Call_A Call_C Call_G Call_T os << fZero << fZero << fZero << fZero << fZero << fZero << fZero << fZero; // Num_A Num_C Num_G Num_T Num_X os << iZero << iZero << iZero << iZero << iZero; } else { // stats file does not exist and there is no option, so just blow up! const format message = format("stats file %s does not exist") % statsPath; BOOST_THROW_EXCEPTION(cc::IoException(ENOENT, message.str())); } if (cycleList_.begin() == cycle) { filtered = currentFiltered; } if (currentFiltered != filtered) { const format message = format("Inconsistent number of bases in %s: expected %d: got %d") % statsPath % filtered % currentFiltered; std::cerr << message.str() << std::endl; // BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } os << std::endl; } std::ofstream signalMeans(signalMeansFilePath_.string().c_str()); if (!signalMeans) { const format message = format("failed to open signal means file %s: %s") % signalMeansFilePath_ % strerror(errno); BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } const unsigned int original = getNumberOfClusters(filterFilePath_, ctrlIncluded_); signalMeans << (format("# Clusters: Filtered %d Original %d\n") % filtered % original).str(); signalMeans << (format("# Yield: Filtered %d\n") % yield).str(); signalMeans << "#\n"; signalMeans << "#Lane Cycle All A All C All G All T Call A Call C Call G Call T Num A Num C Num G Num T Num X\n"; signalMeans << os.str(); if (!signalMeans) { const format message = format("failed to write into signal means file %s: %s") % signalMeansFilePath_ % strerror(errno); BOOST_THROW_EXCEPTION(cc::IoException(errno, message.str())); } } } // namespace basecalling } // namespace casava