//Copyright 2017 Ryan Wick //This file is part of Bandage //Bandage is free software: you can redistribute it and/or modify //it under the terms of the GNU General Public License as published by //the Free Software Foundation, either version 3 of the License, or //(at your option) any later version. //Bandage is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU General Public License for more details. //You should have received a copy of the GNU General Public License //along with Bandage. If not, see . #include "debruijnedge.h" #include #include "../program/settings.h" #include "ogdfnode.h" #include #include "../program/settings.h" #include "../program/globals.h" #include "assemblygraph.h" DeBruijnEdge::DeBruijnEdge(DeBruijnNode *startingNode, DeBruijnNode *endingNode) : m_startingNode(startingNode), m_endingNode(endingNode), m_graphicsItemEdge(0), m_drawn(false), m_overlapType(UNKNOWN_OVERLAP), m_overlap(0) { } //This function assumes that the parameter node pointer is one of the two nodes //in this edge, and it returns the other one. DeBruijnNode * DeBruijnEdge::getOtherNode(const DeBruijnNode * node) const { if (node == m_startingNode) return m_endingNode; else return m_startingNode; } //This function determines whether the edge should be drawn to the screen. bool DeBruijnEdge::edgeIsVisible() const { //If the program is in double mode, then draw any edge where both of its //nodes are drawn. if (g_settings->doubleMode) return m_startingNode->isDrawn() && m_endingNode->isDrawn(); //If the program is in single mode, then draw any edge where both of its //nodes or their reverse complements are drawn. else { bool drawEdge = (m_startingNode->isDrawn() || m_startingNode->getReverseComplement()->isDrawn()) && (m_endingNode->isDrawn() || m_endingNode->getReverseComplement()->isDrawn()); if (!drawEdge) return false; //But it is also necessary to avoid drawing both an edge and its //reverse complement edge. return isPositiveEdge(); } } //This function says whether an edge is 'positive'. This is used to distinguish //an edge from its reverse complement - i.e. half of the graph edges are //positive and their reverse complements are negative. //When one node in the edge is positive and the other is negative, then the //choice is somewhat arbitrary. bool DeBruijnEdge::isPositiveEdge() const { //If both nodes have a positive number, show this edge, and not //the reverse complement where both nodes are negative. if (m_startingNode->isPositiveNode() && m_endingNode->isPositiveNode()) return true; if (m_startingNode->isNegativeNode() && m_endingNode->isNegativeNode()) return false; //Edges that are their own reverse complement are considered positive (but //will not have a negative counterpart). if (isOwnReverseComplement()) return true; //If the code got here, then one node is positive and the other //negative. In this case, just choose the one with the first name //alphabetically - an arbitrary choice, but at least it is //consistent. return (m_startingNode->getName() > m_reverseComplement->m_startingNode->getName()); } void DeBruijnEdge::addToOgdfGraph(ogdf::Graph * ogdfGraph, ogdf::EdgeArray * edgeArray) const { ogdf::node firstEdgeOgdfNode; ogdf::node secondEdgeOgdfNode; if (m_startingNode->inOgdf()) firstEdgeOgdfNode = m_startingNode->getOgdfNode()->getLast(); else if (m_startingNode->getReverseComplement()->inOgdf()) firstEdgeOgdfNode = m_startingNode->getReverseComplement()->getOgdfNode()->getFirst(); else return; //Ending node or its reverse complement isn't in OGDF if (m_endingNode->inOgdf()) secondEdgeOgdfNode = m_endingNode->getOgdfNode()->getFirst(); else if (m_endingNode->getReverseComplement()->inOgdf()) secondEdgeOgdfNode = m_endingNode->getReverseComplement()->getOgdfNode()->getLast(); else return; //Ending node or its reverse complement isn't in OGDF //If this in an edge connected a single-segment node to itself, then we //don't want to put it in the OGDF graph, because it would be redundant //with the node segment (and created conflict with the node/edge length). if (m_startingNode == m_endingNode) { if (m_startingNode->getNumberOfOgdfGraphEdges(m_startingNode->getDrawnNodeLength()) == 1) return; } ogdf::edge newEdge = ogdfGraph->newEdge(firstEdgeOgdfNode, secondEdgeOgdfNode); (*edgeArray)[newEdge] = g_settings->edgeLength; } //This function traces all possible paths from this edge. //It proceeds a number of steps, as determined by a setting. //If forward is true, it looks in a forward direction (starting nodes to //ending nodes). If forward is false, it looks in a backward direction //(ending nodes to starting nodes). void DeBruijnEdge::tracePaths(bool forward, int stepsRemaining, std::vector< std::vector > * allPaths, DeBruijnNode * startingNode, std::vector pathSoFar) const { //This can go for a while, so keep the UI responsive. QApplication::processEvents(); //Find the node in the direction we are tracing. DeBruijnNode * nextNode; if (forward) nextNode = m_endingNode; else nextNode = m_startingNode; //Add that node to the path so far. pathSoFar.push_back(nextNode); //If there are no steps left, then the path so far is done. --stepsRemaining; if (stepsRemaining == 0) { allPaths->push_back(pathSoFar); return; } //If the code got here, then more steps remain. //Find the edges that are in the correct direction. std::vector nextEdges = findNextEdgesInPath(nextNode, forward); //If there are no next edges, then we are finished with the //path search, even though steps remain. if (nextEdges.size() == 0) { allPaths->push_back(pathSoFar); return; } //Call this function on all of the next edges. //However, we also need to check to see if we are tracing a loop //and stop if that is the case. for (size_t i = 0; i < nextEdges.size(); ++i) { DeBruijnEdge * nextEdge = nextEdges[i]; //Determine the node that this next edge leads to. DeBruijnNode * nextNextNode; if (forward) nextNextNode = nextEdge->m_endingNode; else nextNextNode = nextEdge->m_startingNode; //If that node is the starting node, then we've made //a full loop and the path should be considered complete. if (nextNextNode == startingNode) { allPaths->push_back(pathSoFar); continue; } //If that node is already in the path TWICE so far, that means //we're caught in a loop, and we should throw this path out. //If it appears 0 or 1 times, then continue the path search. if (timesNodeInPath(nextNextNode, &pathSoFar) < 2) nextEdge->tracePaths(forward, stepsRemaining, allPaths, startingNode, pathSoFar); } } //This function counts how many times a node appears in a path int DeBruijnEdge::timesNodeInPath(DeBruijnNode * node, std::vector * path) const { int count = 0; for (size_t i = 0; i < path->size(); ++i) { if ( (*path)[i] == node) ++count; } return count; } bool DeBruijnEdge::leadsOnlyToNode(bool forward, int stepsRemaining, DeBruijnNode * target, std::vector pathSoFar, bool includeReverseComplement) const { //This can go for a while, so keep the UI responsive. QApplication::processEvents(); //Find the node in the direction we are tracing. DeBruijnNode * nextNode; if (forward) nextNode = m_endingNode; else nextNode = m_startingNode; //Add that node to the path so far. pathSoFar.push_back(nextNode); //If this path has landed on the node from which the search began, //that means we've followed a loop around. The search has therefore //failed because this path could represent circular DNA that does //not contain the target. if (nextNode == pathSoFar[0]) return false; //If the next node is the target, the search succeeded! if (nextNode == target) return true; //If we are including reverse complements and the next node is //the reverse complement of the target, the search succeeded! if (includeReverseComplement && nextNode->getReverseComplement() == target) return true; //If there are no steps left, then the search failed. --stepsRemaining; if (stepsRemaining == 0) return false; //If the code got here, then more steps remain. //Find the edges that are in the correct direction. std::vector nextEdges = findNextEdgesInPath(nextNode, forward); //If there are no next edges, then the search failed, even //though steps remain. if (nextEdges.size() == 0) return false; //In order for the search to succeed, this function needs to return true //for all of the nextEdges. //However, we also need to check to see if we are tracing a loop //and stop if that is the case. for (size_t i = 0; i < nextEdges.size(); ++i) { DeBruijnEdge * nextEdge = nextEdges[i]; //Determine the node that this next edge leads to. DeBruijnNode * nextNextNode; if (forward) nextNextNode = nextEdge->m_endingNode; else nextNextNode = nextEdge->m_startingNode; //If that node is already in the path TWICE so far, that means //we're caught in a loop, and we should throw this path out. //If it appears 0 or 1 times, then continue the path search. if (timesNodeInPath(nextNextNode, &pathSoFar) < 2) { if ( !nextEdge->leadsOnlyToNode(forward, stepsRemaining, target, pathSoFar, includeReverseComplement) ) return false; } } //If the code got here, then the search succeeded! return true; } std::vector DeBruijnEdge::findNextEdgesInPath(DeBruijnNode * nextNode, bool forward) const { std::vector nextEdges; const std::vector * nextNodeEdges = nextNode->getEdgesPointer(); for (size_t i = 0; i < nextNodeEdges->size(); ++i) { DeBruijnEdge * edge = (*nextNodeEdges)[i]; //If forward, we're looking for edges that lead away from //nextNode. If backward, we're looking for edges that lead //into nextNode. if ((forward && edge->m_startingNode == nextNode) || (!forward && edge->m_endingNode == nextNode)) nextEdges.push_back(edge); } return nextEdges; } //This function tries to automatically determine the overlap size //between the two nodes. It tries each overlap size between the min //to the max (in settings), assigning the first one it finds. void DeBruijnEdge::autoDetermineExactOverlap() { m_overlap = 0; m_overlapType = AUTO_DETERMINED_EXACT_OVERLAP; //Find an appropriate search range int minPossibleOverlap = std::min(m_startingNode->getLength(), m_endingNode->getLength()); if (minPossibleOverlap < g_settings->minAutoFindEdgeOverlap) return; int min = std::min(minPossibleOverlap, g_settings->minAutoFindEdgeOverlap); int max = std::min(minPossibleOverlap, g_settings->maxAutoFindEdgeOverlap); //Try each overlap in the range and set the first one found. //However, we don't want the search to be biased towards larger //or smaller overlaps, so start with a pseudorandom value and loop. int testOverlap = min + (rand() % (max - min + 1)); for (int i = min; i <= max; ++i) { if (testExactOverlap(testOverlap)) { m_overlap = testOverlap; return; } ++testOverlap; if (testOverlap > max) testOverlap = min; } } //This function tries the given overlap between the two nodes. //If the overlap works perfectly, it returns true. bool DeBruijnEdge::testExactOverlap(int overlap) const { bool mismatchFound = false; int seq1Offset = m_startingNode->getLength() - overlap; //Look at each position in the overlap for (int j = 0; j < overlap && !mismatchFound; ++j) { char a = m_startingNode->getBaseAt(seq1Offset + j); char b = m_endingNode->getBaseAt(j); if (a != b) mismatchFound = true; } return !mismatchFound; } QByteArray DeBruijnEdge::getGfaLinkLine() const { DeBruijnNode * startingNode = getStartingNode(); DeBruijnNode * endingNode = getEndingNode(); QByteArray gfaLinkLine = "L\t"; gfaLinkLine += startingNode->getNameWithoutSign().toUtf8() + "\t"; gfaLinkLine += startingNode->getSign().toUtf8() + "\t"; gfaLinkLine += endingNode->getNameWithoutSign().toUtf8() + "\t"; gfaLinkLine += endingNode->getSign().toUtf8() + "\t"; //When Velvet graphs are saved to GFA, the sequences are extended to include //the overlap. So even though this edge might have no overlap, the GFA link //line should. if (g_assemblyGraph->m_graphFileType == LAST_GRAPH) gfaLinkLine += QString::number(g_assemblyGraph->m_kmer - 1).toUtf8() + "M"; else gfaLinkLine += QString::number(getOverlap()).toUtf8() + "M"; gfaLinkLine += "\n"; return gfaLinkLine; } bool DeBruijnEdge::compareEdgePointers(DeBruijnEdge * a, DeBruijnEdge * b) { QString aStart = a->getStartingNode()->getName(); QString bStart = b->getStartingNode()->getName(); QString aStartNoSign = aStart; aStartNoSign.chop(1); QString bStartNoSign = bStart; bStartNoSign.chop(1); bool ok1; long long aStartNumber = aStartNoSign.toLongLong(&ok1); bool ok2; long long bStartNumber = bStartNoSign.toLongLong(&ok2); QString aEnd = a->getEndingNode()->getName(); QString bEnd = b->getEndingNode()->getName(); QString aEndNoSign = aEnd; aEndNoSign.chop(1); QString bEndNoSign = bEnd; bEndNoSign.chop(1); bool ok3; long long aEndNumber = aEndNoSign.toLongLong(&ok3); bool ok4; long long bEndNumber = bEndNoSign.toLongLong(&ok4); //If the node names are essentially numbers, then sort them as numbers. if (ok1 && ok2 && ok3 && ok4) { if (aStartNumber != bStartNumber) return aStartNumber < bStartNumber; if (aStartNumber == bStartNumber) return aEndNumber < bEndNumber; } //If the node names are strings, then just sort them as strings. return aStart < bStart; }