/* ================================================================= * * e-mem.h : Header file for main program * * * * E-MEM: An efficient (MUMmer-like) tool to retrieve Maximum Exact * * Matches using hashing based algorithm * * * * Copyright (c) 2014, Nilesh Khiste * * All rights reserved * * * * This program is free software: you can redistribute it and/or * * modify it under the terms of the GNU General Public License as * * published by the Free Software Foundation, either version 3 of * * the License, or (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public * * License along with this program. * * * * This file is subject to the terms and conditions defined in the * * file 'LICENSE', which is part of this source code package. * * ================================================================= */ /* * Command line argument control */ #define LENGTH 0x00000001 #define REF_FILE 0x00000002 #define QUERY_FILE 0x00000004 #define SPLIT_SIZE 0x00000008 #define KMER_SIZE 0x00000010 #define NUM_THREADS 0x00000020 #define IGNORE_N 0x00000040 #define MATCH_REV 0x00000080 #define MATCH_BOTH 0x00000100 #define REL_REV_QUEPOS 0x00000200 #define FOUR_COL_OUTPUT 0x00000400 #define LEN_IN_HEADER 0x00000800 #define IS_LENGTH_DEF(x) (x & LENGTH) #define IS_REF_FILE_DEF(x) (x & REF_FILE) #define IS_QUERY_FILE_DEF(x) (x & QUERY_FILE) #define IS_SPLIT_SIZE_DEF(x) (x & SPLIT_SIZE) #define IS_KMER_SIZE_DEF(x) (x & KMER_SIZE) #define IS_NUM_THREADS_DEF(x) (x & NUM_THREADS) #define IS_IGNORE_N_DEF(x) (x & IGNORE_N) #define IS_MATCH_REV_DEF(x) (x & MATCH_REV) #define IS_MATCH_BOTH_DEF(x) (x & MATCH_BOTH) #define IS_RELREV_QUEPOS_DEF(x) (x & REL_REV_QUEPOS) #define IS_FCOL_OUTPUT_DEF(x) (x & FOUR_COL_OUTPUT) #define IS_LEN_IN_HEADER_DEF(x) (x & LEN_IN_HEADER) #define SET_LENGTH(x) (x |= LENGTH) #define SET_REF_FILE(x) (x |= REF_FILE) #define SET_QUERY_FILE(x) (x |= QUERY_FILE) #define SET_SPLIT_SIZE(x) (x |= SPLIT_SIZE) #define SET_KMER_SIZE(x) (x |= KMER_SIZE) #define SET_NUM_THREADS(x) (x |= NUM_THREADS) #define SET_IGNORE_N(x) (x |= IGNORE_N) #define SET_MATCH_REV(x) (x |= MATCH_REV) #define SET_MATCH_BOTH(x) (x |= MATCH_BOTH) #define SET_RELREV_QUEPOS(x) (x |= REL_REV_QUEPOS) #define SET_FCOL_OUTPUT(x) (x |= FOUR_COL_OUTPUT) #define SET_LEN_IN_HEADER(x) (x |= LEN_IN_HEADER) class Knode { uint64_t currKmer; uint64_t *pos; uint64_t getHashKey(uint64_t currKmer, uint64_t &match_found) { uint64_t key=currKmer%currHashTabSize; uint64_t step = 1 + (currKmer%prevHashTabSize); uint32_t count=1; while (this[key].pos != NULL) { if (this[key].currKmer==currKmer){ match_found=1; return key; } else { key = (key + (count*step)) % currHashTabSize; count++; } } return key; //no collision } public: static uint64_t currHashTabSize; static uint64_t prevHashTabSize; bool findKmer(uint64_t currKmer, uint64_t* &pos_ptr) { uint64_t match=0; uint64_t key=this->getHashKey(currKmer, match); if (match) { pos_ptr = this[key].pos; return true; }else return false; } void addKmerNode(uint64_t currKmer, uint64_t currKmerPos) { uint64_t i=0, match=0; uint64_t key=this->getHashKey(currKmer, match); if (this[key].pos==NULL) { this[key].currKmer=currKmer; this[key].pos= new uint64_t[2]; this[key].pos[0]= 1; //position of the last element this[key].pos[1]= currKmerPos; // Position }else { if(this[key].pos[0] & (this[key].pos[0] + 1)){ this[key].pos[0] += 1; // Increment counter this[key].pos[this[key].pos[0]]= currKmerPos; // Fill Next Position }else { // Increase array size uint64_t *r = new uint64_t[2 * (this[key].pos[0]+1)]; r[0] = this[key].pos[0]+1; for (i=1; i