//Copyright 2017 Ryan Wick //This file is part of Bandage //Bandage is free software: you can redistribute it and/or modify //it under the terms of the GNU General Public License as published by //the Free Software Foundation, either version 3 of the License, or //(at your option) any later version. //Bandage is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU General Public License for more details. //You should have received a copy of the GNU General Public License //along with Bandage. If not, see . #include "querypaths.h" #include "commoncommandlinefunctions.h" #include "../program/settings.h" #include "../graph/assemblygraph.h" #include "../blast/blastsearch.h" #include int bandageQueryPaths(QStringList arguments) { QTextStream out(stdout); QTextStream err(stderr); if (checkForHelp(arguments)) { printQueryPathsUsage(&out, false); return 0; } if (checkForHelpAll(arguments)) { printQueryPathsUsage(&out, true); return 0; } if (arguments.size() < 3) { printQueryPathsUsage(&err, false); return 1; } QString graphFilename = arguments.at(0); arguments.pop_front(); if (!checkIfFileExists(graphFilename)) { outputText("Bandage error: " + graphFilename, &err); return 1; } QString queriesFilename = arguments.at(0); arguments.pop_front(); if (!checkIfFileExists(queriesFilename)) { outputText("Bandage error: " + queriesFilename, &err); return 1; } g_settings->blastQueryFilename = queriesFilename; //Ensure that the --query option isn't used, as that would overwrite the //queries file that is a positional argument. if (isOptionPresent("--query", &arguments)) { err << "Bandage error: the --query option cannot be used with Bandage querypaths." << Qt::endl; return 1; } QString outputPrefix = arguments.at(0); QString tableFilename = outputPrefix + ".tsv"; QString pathFastaFilename = outputPrefix + "_paths.fasta"; QString hitsFastaFilename = outputPrefix + "_hits.fasta"; arguments.pop_front(); QString error = checkForInvalidQueryPathsOptions(arguments); if (error.length() > 0) { outputText("Bandage error: " + error, &err); return 1; } bool pathFasta = false; bool hitsFasta = false; parseQueryPathsOptions(arguments, &pathFasta, &hitsFasta); //Check to make sure the output files don't already exist. QFile tableFile(tableFilename); QFile pathsFile(pathFastaFilename); QFile hitsFile(hitsFastaFilename); if (tableFile.exists()) { outputText("Bandage error: " + tableFilename + " already exists.", &err); return 1; } if (pathFasta && pathsFile.exists()) { outputText("Bandage error: " + pathFastaFilename + " already exists.", &err); return 1; } if (hitsFasta && hitsFile.exists()) { outputText("Bandage error: " + hitsFastaFilename + " already exists.", &err); return 1; } QDateTime startTime = QDateTime::currentDateTime(); out << Qt::endl << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Loading graph... " << Qt::flush; bool loadSuccess = g_assemblyGraph->loadGraphFromFile(graphFilename); if (!loadSuccess) return 1; out << "done" << Qt::endl; if (!createBlastTempDirectory()) { err << "Error creating temporary directory for BLAST files" << Qt::endl; return 1; } out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Running BLAST search... " << Qt::flush; QString blastError = g_blastSearch->doAutoBlastSearch(); if (blastError != "") { err << Qt::endl << blastError << Qt::endl; return 1; } out << "done" << Qt::endl; out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Saving results... " << Qt::flush; //Create the table file. tableFile.open(QIODevice::WriteOnly | QIODevice::Text); QTextStream tableOut(&tableFile); //Write the TSV header line. tableOut << "Query\t" "Path\t" "Length\t" "Query covered by path\t" "Query covered by hits\t" "Mean hit identity\t" "Total hit mismatches\t" "Total hit gap opens\t" "Relative length\t" "Length discrepancy\t" "E-value product\t"; //If the user asked for a separate path sequence file, then the last column //will be a reference to that sequence ID. If not, the sequence will go in //the table. if (pathFasta) tableOut << "Sequence ID\n"; else tableOut << "Sequence\n"; //If a path sequence FASTA file is used, these will store the sequences //that will go there. QList pathSequenceIDs; QList pathSequences; //If a hits sequence FASTA file is used, these will store the sequences //that will go there. QList hitSequenceIDs; QList hitSequences; for (size_t i = 0; i < g_blastSearch->m_blastQueries.m_queries.size(); ++i) { BlastQuery * query = g_blastSearch->m_blastQueries.m_queries[i]; QList queryPaths = query->getPaths(); for (int j = 0; j < queryPaths.size(); ++j) { BlastQueryPath queryPath = queryPaths[j]; Path path = queryPath.getPath(); tableOut << query->getName() << "\t"; tableOut << path.getString(true) << "\t"; tableOut << QString::number(path.getLength()) << "\t"; tableOut << QString::number(100.0 * queryPath.getPathQueryCoverage()) << "%\t"; tableOut << QString::number(100.0 * queryPath.getHitsQueryCoverage()) << "%\t"; tableOut << QString::number(queryPath.getMeanHitPercIdentity()) << "%\t"; tableOut << QString::number(queryPath.getTotalHitMismatches()) << "\t"; tableOut << QString::number(queryPath.getTotalHitGapOpens()) << "\t"; tableOut << QString::number(100.0 * queryPath.getRelativePathLength()) << "%\t"; tableOut << queryPath.getAbsolutePathLengthDifferenceString(false) << "\t"; tableOut << queryPath.getEvalueProduct().asString(false) << "\t"; //If we are using a separate file for the path sequences, save the //sequence along with its ID to save later, and store the ID here. //Otherwise, just include the sequence in this table. QByteArray sequence = path.getPathSequence(); QString pathSequenceID = query->getName() + "_" + QString::number(j+1); if (pathFasta) { pathSequenceIDs.push_back(pathSequenceID); pathSequences.push_back(sequence); tableOut << pathSequenceID << "\n"; } else tableOut << sequence << "\n"; //If we are also saving the hit sequences, save the hit sequence //along with its ID to save later. if (hitsFasta) { QList hits = queryPath.getHits(); for (int k = 0; k < hits.size(); ++k) { BlastHit * hit = hits[k]; QString hitSequenceID = pathSequenceID + "_" + QString::number(k+1); QByteArray hitSequence = hit->getNodeSequence(); hitSequenceIDs.push_back(hitSequenceID); hitSequences.push_back(hitSequence); } } } } //Write the path sequence FASTA file, if appropriate. if (pathFasta) { pathsFile.open(QIODevice::WriteOnly | QIODevice::Text); QTextStream pathsOut(&pathsFile); for (int i = 0; i < pathSequenceIDs.size(); ++i) { pathsOut << ">" + pathSequenceIDs[i] + "\n"; pathsOut << AssemblyGraph::addNewlinesToSequence(pathSequences[i]); } } //Write the hits sequence FASTA file, if appropriate. if (hitsFasta) { hitsFile.open(QIODevice::WriteOnly | QIODevice::Text); QTextStream hitsOut(&hitsFile); for (int i = 0; i < hitSequenceIDs.size(); ++i) { hitsOut << ">" << hitSequenceIDs[i] << "\n"; hitsOut << AssemblyGraph::addNewlinesToSequence(hitSequences[i]); } } out << "done" << Qt::endl; out << Qt::endl << "Results: " + tableFilename << Qt::endl; if (pathFasta) out << " " + pathFastaFilename << Qt::endl; if (hitsFasta) out << " " + hitsFastaFilename << Qt::endl; out << Qt::endl << "Summary: Total BLAST queries: " << g_blastSearch->m_blastQueries.getQueryCount() << Qt::endl; out << " Queries with found paths: " << g_blastSearch->m_blastQueries.getQueryCountWithAtLeastOnePath() << Qt::endl; out << " Total query paths: " << g_blastSearch->m_blastQueries.getQueryPathCount() << Qt::endl; out << Qt::endl << "Elapsed time: " << getElapsedTime(startTime, QDateTime::currentDateTime()) << Qt::endl; deleteBlastTempDirectory(); return 0; } void printQueryPathsUsage(QTextStream * out, bool all) { QStringList text; text << "Bandage querypaths searches for queries in the graph using BLAST and outputs the results to a tab-delimited file."; text << ""; text << "Usage: Bandage querypaths [options]"; text << ""; text << "Positional parameters:"; text << " A graph file of any type supported by Bandage"; text << " A FASTA file of one or more BLAST queries"; text << " The output file prefix (used to create the '.tsv' output file, and possibly FASTA files as well, depending on options)"; text << ""; text << "Options: --pathfasta Put all query path sequences in a multi-FASTA file, not in the TSV file"; text << "--hitsfasta Produce a multi-FASTA file of all BLAST hits in the query paths"; text << ""; getCommonHelp(&text); if (all) getSettingsUsage(&text); getOnlineHelpMessage(&text); outputText(text, out); } QString checkForInvalidQueryPathsOptions(QStringList arguments) { checkOptionWithoutValue("--pathfasta", &arguments); checkOptionWithoutValue("--hitsfasta", &arguments); QString error = checkForInvalidOrExcessSettings(&arguments); if (error.length() > 0) return error; return checkForInvalidOrExcessSettings(&arguments); } void parseQueryPathsOptions(QStringList arguments, bool * pathFasta, bool * hitsFasta) { int pathFastaIndex = arguments.indexOf("--pathfasta"); *pathFasta = (pathFastaIndex > -1); int hitsFastaIndex = arguments.indexOf("--hitsfasta"); *hitsFasta = (hitsFastaIndex > -1); parseSettings(arguments); }