#include "applications/Export2Fastq.hh" /** * Project : CASAVA * Module : $RCSfile: Export2Fastq.cpp,v $ * @author : Roman Petrovski * Copyright : Copyright (c) Illumina 2008, 2009. All rights reserved. * ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). * */ #include #include #include #include #include "common/Alignment.hh" namespace ca { namespace applications { using namespace std; using namespace casava::common; void Export2Fastq::writeSummary(const std::string &inputReadsFilePath, int totalBases, int totalReads, int minReadLen, int maxReadLen){ std::string summaryFilePath = _options.summaryFilePath(); if(!summaryFilePath.empty()){ casava::common::Ofstream summaryStream(summaryFilePath, casava::common::CompressionFactory::none()); if(summaryStream.is_open() && summaryStream.good()){ cerr << "\n[Export2Fastq] Writing " << summaryFilePath; }else{ cout << "\n[Export2Fastq] Error: failed to open. Summary will not be produced." << summaryFilePath; exit(1); } summaryStream << ""; } cout << "\n[Export2Fastq] totalReads:" << totalReads << " totalBases:" << totalBases << " minReadLen:" << minReadLen << " maxReadLen:" << maxReadLen; } int Export2Fastq::run() { std::ostringstream os; cout << "[Export2Fastq] Start"; int hashSize = 1000000; std::string inputReadsFilePath = _options.inputReadsFilePath(); std::string outputFilePath = _options.outputFilePath(); if(outputFilePath.empty()) { boost::filesystem::path outputPath(inputReadsFilePath); outputPath.replace_extension("fastq"); outputFilePath = outputPath.string(); } casava::common::AlignmentReader alignmentReader(inputReadsFilePath, casava::common::CompressionFactory::none()); if (alignmentReader && alignmentReader.good()) { cout << "\n[Export2Fastq] Reading from " << inputReadsFilePath; } else { cerr << "\n[Export2Fastq] Error: failed to open input file." << inputReadsFilePath; exit(1); } casava::common::Ofstream outputStream(outputFilePath, casava::common::CompressionFactory::none()); if (outputStream.is_open() && outputStream.good()) { cout << "\n[Export2Fastq] Writing " << outputFilePath << " printing hash mark every " << hashSize << " bases "; } else { cerr << "[Export2Fastq] Error: failed to open output file." << outputFilePath; exit(1); } int minReadLen = INT_MAX; int maxReadLen = 0; int totalBases = 0; int totalReads = 0; int hashBases = 0; casava::common::Alignment readAlignment; while (alignmentReader.get(readAlignment)) { if (alignmentReader.good() && // purity Filtering ( !_options.purityFilter() || readAlignment.getPassed() ) ) { const casava::common::Spot &spot = readAlignment.getSpot(); const casava::common::Tile &tile = spot.getTile(); outputStream << "@" << tile.getMachineName() << "_" << tile.getRunNumber() << "_R" << readAlignment.getReadNumber() << "_" << tile.getLaneNumber() << "_" << readAlignment.getReadNumber() << "_" << spot.getX() << "_" << spot.getY(); if (!readAlignment.getIndex().empty()) { outputStream << "_" << readAlignment.getIndex(); } outputStream << "\n" << readAlignment.getData() << "\n+\n" << readAlignment.getQuality() << "\n"; int readBases = readAlignment.getData().length(); totalBases += readBases; totalReads ++; if(minReadLen > readBases) { minReadLen = readBases; } if(maxReadLen < readBases) { maxReadLen = readBases; } if(hashBases != totalBases / hashSize) { cout << "#"; cout.flush(); hashBases = totalBases / hashSize; } } } writeSummary(inputReadsFilePath, totalBases, totalReads, minReadLen, maxReadLen); cout << "\n[Export2Fastq] End\n"; return 0; } } } // end namespace casava{ namespace { applications