//Copyright 2017 Ryan Wick
//This file is part of Bandage
//Bandage is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//Bandage is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//You should have received a copy of the GNU General Public License
//along with Bandage. If not, see .
#ifndef ASSEMBLYGRAPH_H
#define ASSEMBLYGRAPH_H
#include
#include
#include "../ogdf/basic/Graph.h"
#include "../ogdf/basic/GraphAttributes.h"
#include
#include
#include "../program/globals.h"
#include "../ui/mygraphicsscene.h"
#include "path.h"
#include
class DeBruijnNode;
class DeBruijnEdge;
class MyProgressDialog;
class AssemblyGraph : public QObject
{
Q_OBJECT
public:
AssemblyGraph();
~AssemblyGraph();
//Nodes are stored in a map with a key of the node's name.
QMap m_deBruijnGraphNodes;
//Edges are stored in a map with a key of the starting and ending node
//pointers.
QMap, DeBruijnEdge*> m_deBruijnGraphEdges;
ogdf::Graph * m_ogdfGraph;
ogdf::EdgeArray * m_edgeArray;
ogdf::GraphAttributes * m_graphAttributes;
int m_kmer;
int m_nodeCount;
int m_edgeCount;
long long m_totalLength;
long long m_shortestContig;
long long m_longestContig;
double m_meanDepth;
double m_firstQuartileDepth;
double m_medianDepth;
double m_thirdQuartileDepth;
GraphFileType m_graphFileType;
bool m_contiguitySearchDone;
QString m_filename;
QString m_depthTag;
SequencesLoadedFromFasta m_sequencesLoadedFromFasta;
void cleanUp();
void createDeBruijnEdge(QString node1Name, QString node2Name,
int overlap = 0,
EdgeOverlapType overlapType = UNKNOWN_OVERLAP);
void clearOgdfGraphAndResetNodes();
static QByteArray getReverseComplement(QByteArray forwardSequence);
void resetEdges();
double getMeanDepth(bool drawnNodesOnly = false);
double getMeanDepth(std::vector nodes);
double getMeanDepth(QList nodes);
void resetNodeContiguityStatus();
void resetAllNodeColours();
void clearAllBlastHitPointers();
void determineGraphInfo();
void clearGraphInfo();
void buildDeBruijnGraphFromLastGraph(QString fullFileName);
void buildDeBruijnGraphFromGfa(QString fullFileName, bool * unsupportedCigar, bool * customLabels,
bool * customColours, QString *bandageOptionsError);
void buildDeBruijnGraphFromFastg(QString fullFileName);
void buildDeBruijnGraphFromTrinityFasta(QString fullFileName);
int buildDeBruijnGraphFromAsqg(QString fullFileName);
void buildDeBruijnGraphFromPlainFasta(QString fullFileName);
void recalculateAllDepthsRelativeToDrawnMean();
void recalculateAllNodeWidths();
GraphFileType getGraphFileTypeFromFile(QString fullFileName);
bool checkFileIsLastGraph(QString fullFileName);
bool checkFileIsFastG(QString fullFileName);
bool checkFileIsFasta(QString fullFileName);
bool checkFileIsGfa(QString fullFileName);
bool checkFileIsTrinityFasta(QString fullFileName);
bool checkFileIsAsqg(QString fullFileName);
bool checkFirstLineOfFile(QString fullFileName, QString regExp);
bool loadGraphFromFile(QString filename);
void buildOgdfGraphFromNodesAndEdges(std::vector startingNodes,
int nodeDistance);
void addGraphicsItemsToScene(MyGraphicsScene * scene);
QStringList splitCsv(QString line, QString sep=",");
bool loadCSV(QString filename, QStringList * columns, QString * errormsg, bool * coloursLoaded);
std::vector getStartingNodes(QString * errorTitle,
QString * errorMessage,
bool doubleMode,
QString nodesList,
QString blastQueryName);
bool checkIfStringHasNodes(QString nodesString);
QString generateNodesNotFoundErrorMessage(std::vector nodesNotInGraph,
bool exact);
std::vector getNodesFromString(QString nodeNamesString,
bool exactMatch,
std::vector * nodesNotInGraph = 0);
void layoutGraph();
void setAllEdgesExactOverlap(int overlap);
void autoDetermineAllEdgesExactOverlap();
static void readFastaOrFastqFile(QString filename, std::vector * names,
std::vector * sequences);
static void readFastaFile(QString filename, std::vector * names,
std::vector * sequences);
static void readFastqFile(QString filename, std::vector * names,
std::vector * sequences);
int getDrawnNodeCount() const;
void deleteNodes(std::vector * nodes);
void deleteEdges(std::vector * edges);
void duplicateNodePair(DeBruijnNode * node, MyGraphicsScene * scene);
bool mergeNodes(QList nodes, MyGraphicsScene * scene,
bool recalulateDepth);
void removeGraphicsItemEdges(const std::vector * edges,
bool reverseComplement,
MyGraphicsScene * scene);
void removeGraphicsItemNodes(const std::vector * nodes,
bool reverseComplement,
MyGraphicsScene * scene);
int mergeAllPossible(MyGraphicsScene * scene = 0,
MyProgressDialog * progressDialog = 0);
void saveEntireGraphToFasta(QString filename);
void saveEntireGraphToFastaOnlyPositiveNodes(QString filename);
bool saveEntireGraphToGfa(QString filename);
bool saveVisibleGraphToGfa(QString filename);
void changeNodeName(QString oldName, QString newName);
NodeNameStatus checkNodeNameValidity(QString nodeName);
void changeNodeDepth(std::vector * nodes,
double newDepth);
static QByteArray addNewlinesToSequence(QByteArray sequence, int interval = 70);
int getDeadEndCount() const;
void getNodeStats(int * n50, int * shortestNode, int * firstQuartile, int * median, int * thirdQuartile, int * longestNode) const;
void getGraphComponentCountAndLargestComponentSize(int * componentCount, int * largestComponentLength) const;
double getMedianDepthByBase() const;
long long getEstimatedSequenceLength() const;
long long getEstimatedSequenceLength(double medianDepthByBase) const;
long long getTotalLengthMinusEdgeOverlaps() const;
QPair getOverlapRange() const;
bool attemptToLoadSequencesFromFasta();
long long getTotalLengthOrphanedNodes() const;
bool useLinearLayout() const;
private:
template double getValueUsingFractionalIndex(std::vector * v, double index) const;
QString convertNormalNumberStringToBandageNodeName(QString number);
void makeReverseComplementNodeIfNecessary(DeBruijnNode * node);
void pointEachNodeToItsReverseComplement();
QStringList removeNullStringsFromList(QStringList in);
std::vector getNodesFromListExact(QStringList nodesList, std::vector * nodesNotInGraph);
std::vector getNodesFromListPartial(QStringList nodesList, std::vector * nodesNotInGraph);
std::vector getNodesFromBlastHits(QString queryName);
std::vector getNodesInDepthRange(double min, double max);
std::vector makeOverlapCountVector();
bool cigarContainsOnlyM(QString cigar);
int getLengthFromSimpleCigar(QString cigar);
int getLengthFromCigar(QString cigar);
int getCigarCount(QString cigarCode, QString cigar);
QString getOppositeNodeName(QString nodeName);
void clearAllCsvData();
QString getNodeNameFromString(QString string);
QString getNewNodeName(QString oldNodeName);
void duplicateGraphicsNode(DeBruijnNode * originalNode, DeBruijnNode * newNode, MyGraphicsScene * scene);
bool canAddNodeToStartOfMergeList(QList * mergeList,
DeBruijnNode * potentialNode);
bool canAddNodeToEndOfMergeList(QList * mergeList,
DeBruijnNode * potentialNode);
QString getUniqueNodeName(QString baseName);
void mergeGraphicsNodes(QList * originalNodes,
QList * revCompOriginalNodes,
DeBruijnNode * newNode, MyGraphicsScene * scene);
bool mergeGraphicsNodes2(QList * originalNodes,
DeBruijnNode * newNode, MyGraphicsScene * scene);
void removeAllGraphicsEdgesFromNode(DeBruijnNode * node,
bool reverseComplement,
MyGraphicsScene * scene);
QString cleanNodeName(QString name);
double findDepthAtIndex(QList * nodeList, long long targetIndex) const;
bool allNodesStartWith(QString start) const;
QString simplifyCanuNodeName(QString oldName) const;
signals:
void setMergeTotalCount(int totalCount);
void setMergeCompletedCount(int completedCount);
};
#endif // ASSEMBLYGRAPH_H