//Copyright 2017 Ryan Wick //This file is part of Bandage //Bandage is free software: you can redistribute it and/or modify //it under the terms of the GNU General Public License as published by //the Free Software Foundation, either version 3 of the License, or //(at your option) any later version. //Bandage is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU General Public License for more details. //You should have received a copy of the GNU General Public License //along with Bandage. If not, see . #ifndef ASSEMBLYGRAPH_H #define ASSEMBLYGRAPH_H #include #include #include "../ogdf/basic/Graph.h" #include "../ogdf/basic/GraphAttributes.h" #include #include #include "../program/globals.h" #include "../ui/mygraphicsscene.h" #include "path.h" #include class DeBruijnNode; class DeBruijnEdge; class MyProgressDialog; class AssemblyGraph : public QObject { Q_OBJECT public: AssemblyGraph(); ~AssemblyGraph(); //Nodes are stored in a map with a key of the node's name. QMap m_deBruijnGraphNodes; //Edges are stored in a map with a key of the starting and ending node //pointers. QMap, DeBruijnEdge*> m_deBruijnGraphEdges; ogdf::Graph * m_ogdfGraph; ogdf::EdgeArray * m_edgeArray; ogdf::GraphAttributes * m_graphAttributes; int m_kmer; int m_nodeCount; int m_edgeCount; long long m_totalLength; long long m_shortestContig; long long m_longestContig; double m_meanDepth; double m_firstQuartileDepth; double m_medianDepth; double m_thirdQuartileDepth; GraphFileType m_graphFileType; bool m_contiguitySearchDone; QString m_filename; QString m_depthTag; SequencesLoadedFromFasta m_sequencesLoadedFromFasta; void cleanUp(); void createDeBruijnEdge(QString node1Name, QString node2Name, int overlap = 0, EdgeOverlapType overlapType = UNKNOWN_OVERLAP); void clearOgdfGraphAndResetNodes(); static QByteArray getReverseComplement(QByteArray forwardSequence); void resetEdges(); double getMeanDepth(bool drawnNodesOnly = false); double getMeanDepth(std::vector nodes); double getMeanDepth(QList nodes); void resetNodeContiguityStatus(); void resetAllNodeColours(); void clearAllBlastHitPointers(); void determineGraphInfo(); void clearGraphInfo(); void buildDeBruijnGraphFromLastGraph(QString fullFileName); void buildDeBruijnGraphFromGfa(QString fullFileName, bool * unsupportedCigar, bool * customLabels, bool * customColours, QString *bandageOptionsError); void buildDeBruijnGraphFromFastg(QString fullFileName); void buildDeBruijnGraphFromTrinityFasta(QString fullFileName); int buildDeBruijnGraphFromAsqg(QString fullFileName); void buildDeBruijnGraphFromPlainFasta(QString fullFileName); void recalculateAllDepthsRelativeToDrawnMean(); void recalculateAllNodeWidths(); GraphFileType getGraphFileTypeFromFile(QString fullFileName); bool checkFileIsLastGraph(QString fullFileName); bool checkFileIsFastG(QString fullFileName); bool checkFileIsFasta(QString fullFileName); bool checkFileIsGfa(QString fullFileName); bool checkFileIsTrinityFasta(QString fullFileName); bool checkFileIsAsqg(QString fullFileName); bool checkFirstLineOfFile(QString fullFileName, QString regExp); bool loadGraphFromFile(QString filename); void buildOgdfGraphFromNodesAndEdges(std::vector startingNodes, int nodeDistance); void addGraphicsItemsToScene(MyGraphicsScene * scene); QStringList splitCsv(QString line, QString sep=","); bool loadCSV(QString filename, QStringList * columns, QString * errormsg, bool * coloursLoaded); std::vector getStartingNodes(QString * errorTitle, QString * errorMessage, bool doubleMode, QString nodesList, QString blastQueryName); bool checkIfStringHasNodes(QString nodesString); QString generateNodesNotFoundErrorMessage(std::vector nodesNotInGraph, bool exact); std::vector getNodesFromString(QString nodeNamesString, bool exactMatch, std::vector * nodesNotInGraph = 0); void layoutGraph(); void setAllEdgesExactOverlap(int overlap); void autoDetermineAllEdgesExactOverlap(); static void readFastaOrFastqFile(QString filename, std::vector * names, std::vector * sequences); static void readFastaFile(QString filename, std::vector * names, std::vector * sequences); static void readFastqFile(QString filename, std::vector * names, std::vector * sequences); int getDrawnNodeCount() const; void deleteNodes(std::vector * nodes); void deleteEdges(std::vector * edges); void duplicateNodePair(DeBruijnNode * node, MyGraphicsScene * scene); bool mergeNodes(QList nodes, MyGraphicsScene * scene, bool recalulateDepth); void removeGraphicsItemEdges(const std::vector * edges, bool reverseComplement, MyGraphicsScene * scene); void removeGraphicsItemNodes(const std::vector * nodes, bool reverseComplement, MyGraphicsScene * scene); int mergeAllPossible(MyGraphicsScene * scene = 0, MyProgressDialog * progressDialog = 0); void saveEntireGraphToFasta(QString filename); void saveEntireGraphToFastaOnlyPositiveNodes(QString filename); bool saveEntireGraphToGfa(QString filename); bool saveVisibleGraphToGfa(QString filename); void changeNodeName(QString oldName, QString newName); NodeNameStatus checkNodeNameValidity(QString nodeName); void changeNodeDepth(std::vector * nodes, double newDepth); static QByteArray addNewlinesToSequence(QByteArray sequence, int interval = 70); int getDeadEndCount() const; void getNodeStats(int * n50, int * shortestNode, int * firstQuartile, int * median, int * thirdQuartile, int * longestNode) const; void getGraphComponentCountAndLargestComponentSize(int * componentCount, int * largestComponentLength) const; double getMedianDepthByBase() const; long long getEstimatedSequenceLength() const; long long getEstimatedSequenceLength(double medianDepthByBase) const; long long getTotalLengthMinusEdgeOverlaps() const; QPair getOverlapRange() const; bool attemptToLoadSequencesFromFasta(); long long getTotalLengthOrphanedNodes() const; bool useLinearLayout() const; private: template double getValueUsingFractionalIndex(std::vector * v, double index) const; QString convertNormalNumberStringToBandageNodeName(QString number); void makeReverseComplementNodeIfNecessary(DeBruijnNode * node); void pointEachNodeToItsReverseComplement(); QStringList removeNullStringsFromList(QStringList in); std::vector getNodesFromListExact(QStringList nodesList, std::vector * nodesNotInGraph); std::vector getNodesFromListPartial(QStringList nodesList, std::vector * nodesNotInGraph); std::vector getNodesFromBlastHits(QString queryName); std::vector getNodesInDepthRange(double min, double max); std::vector makeOverlapCountVector(); bool cigarContainsOnlyM(QString cigar); int getLengthFromSimpleCigar(QString cigar); int getLengthFromCigar(QString cigar); int getCigarCount(QString cigarCode, QString cigar); QString getOppositeNodeName(QString nodeName); void clearAllCsvData(); QString getNodeNameFromString(QString string); QString getNewNodeName(QString oldNodeName); void duplicateGraphicsNode(DeBruijnNode * originalNode, DeBruijnNode * newNode, MyGraphicsScene * scene); bool canAddNodeToStartOfMergeList(QList * mergeList, DeBruijnNode * potentialNode); bool canAddNodeToEndOfMergeList(QList * mergeList, DeBruijnNode * potentialNode); QString getUniqueNodeName(QString baseName); void mergeGraphicsNodes(QList * originalNodes, QList * revCompOriginalNodes, DeBruijnNode * newNode, MyGraphicsScene * scene); bool mergeGraphicsNodes2(QList * originalNodes, DeBruijnNode * newNode, MyGraphicsScene * scene); void removeAllGraphicsEdgesFromNode(DeBruijnNode * node, bool reverseComplement, MyGraphicsScene * scene); QString cleanNodeName(QString name); double findDepthAtIndex(QList * nodeList, long long targetIndex) const; bool allNodesStartWith(QString start) const; QString simplifyCanuNodeName(QString oldName) const; signals: void setMergeTotalCount(int totalCount); void setMergeCompletedCount(int completedCount); }; #endif // ASSEMBLYGRAPH_H