/* This file is part of Jellyfish. Jellyfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Jellyfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Jellyfish. If not, see . */ #ifndef __JELLYFISH_LARGE_HASH_ARRAY_HPP__ #define __JELLYFISH_LARGE_HASH_ARRAY_HPP__ #include #include #include #include #include #include #include #include #include #include namespace jellyfish { namespace large_hash { /* Contains an integer, the reprobe limit. It is capped based on the * reprobe strategy to not be bigger than the size of the hash * array. Also, the length to encode reprobe limit must not be larger * than the length to encode _size. */ class reprobe_limit_t { uint_t limit; public: reprobe_limit_t(uint_t _limit, const size_t *_reprobes, size_t _size) : limit(_limit) { while(_reprobes[limit] >= _size && limit >= 1) limit--; } inline uint_t val() const { return limit; } }; // Key is any type with the following two methods: get_bits(unsigned // int start, unsigned int len); and set_bits(unsigned int start, // unsigned int len, uint64_t bits). These methods get and set the // bits [start, start + len). Start and len may not be aligned to word // boundaries. On the other hand, len is guaranteed to be < // sizeof(uint64_t). I.e. never more than 1 word is fetched or set. template class array_base { static const int wsize = std::numeric_limits::digits; // Word size in bits // Can't be done. Resort to an evil macro! // static const word fmask = std::numeric_limits::max(); // Mask full of ones #define fmask (std::numeric_limits::max()) public: define_error_class(ErrorAllocation); typedef word data_word; typedef typename Offsets::offset_t offset_t; typedef struct offset_t::key key_offsets; typedef struct offset_t::val val_offsets; typedef Key key_type; typedef uint64_t mapped_type; typedef std::pair value_type; typedef stl_iterator_base iterator; typedef stl_iterator_base const_iterator; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* pointer; typedef const value_type* const_pointer; typedef eager_iterator_base eager_iterator; typedef lazy_iterator_base lazy_iterator; typedef region_iterator_base region_iterator; /// Status of a (key,value) pair. LBSET means that the large bit is /// set. Hence, it contains a pointer back to the original key and a /// large value. enum key_status { FILLED, EMPTY, LBSET}; protected: uint16_t lsize_; // log of size size_t size_, size_mask_; reprobe_limit_t reprobe_limit_; uint16_t key_len_; // Length of key in bits uint16_t raw_key_len_; // Length of key stored raw (i.e. complement of implied length) Offsets offsets_; // key len reduced by size of hash array size_t size_bytes_; word * const data_; atomic_t atomic_; const size_t *reprobes_; RectangularBinaryMatrix hash_matrix_; RectangularBinaryMatrix hash_inverse_matrix_; public: /// Give information about memory usage and array size. struct usage_info { uint16_t key_len_, val_len_, reprobe_limit_; const size_t* reprobes_; usage_info(uint16_t key_len, uint16_t val_len, uint16_t reprobe_limit, const size_t* reprobes = jellyfish::quadratic_reprobes) : key_len_(key_len), val_len_(val_len), reprobe_limit_(reprobe_limit), reprobes_(reprobes) { } /// Memory usage for a given size. size_t mem(size_t size) { uint16_t lsize(ceilLog2(size)); size_t asize((size_t)1 << lsize); reprobe_limit_t areprobe_limit(reprobe_limit_, reprobes_, asize); uint16_t raw_key_len(key_len_ > lsize ? key_len_ - lsize : 0); Offsets offsets(raw_key_len + bitsize(areprobe_limit.val() + 1), val_len_, areprobe_limit.val() + 1); return div_ceil(asize, (size_t)offsets.block_len()) * offsets.block_word_len() * sizeof(word) + sizeof(array_base) + sizeof(Offsets); } /// Actual size for a given size. size_t asize(size_t size) { return (size_t)1 << ceilLog2(size); } struct fit_in { usage_info* i_; size_t mem_; fit_in(usage_info* i, size_t mem) : i_(i), mem_(mem) { } bool operator()(uint16_t size_bits) const { return i_->mem((size_t)1 << size_bits) < mem_; } }; /// Maximum size for a given maximum memory. size_t size(size_t mem) { return (size_t)1 << size_bits(mem); } /// Log of maximum size for a given maximum memory uint16_t size_bits(size_t mem) { uint16_t res = *binary_search_first_false(pointer_integer(0), pointer_integer(64), fit_in(this, mem)); return res > 0 ? res - 1 : 0; } size_t size_bits_linear(size_t mem) { fit_in predicate(this, mem); uint16_t i = 0; for( ; i < 64; ++i) if(!predicate(i)) break; return i > 0 ? i - 1 : 0; } }; array_base(size_t size, // Size of hash. To be rounded up to a power of 2 uint16_t key_len, // Size of key in bits uint16_t val_len, // Size of val in bits uint16_t reprobe_limit, // Maximum reprobe RectangularBinaryMatrix m, const size_t* reprobes = quadratic_reprobes) : // Reprobing policy lsize_(ceilLog2(size)), size_((size_t)1 << lsize_), size_mask_(size_ - 1), reprobe_limit_(reprobe_limit, reprobes, size_), key_len_(key_len), raw_key_len_(key_len_ > lsize_ ? key_len_ - lsize_ : 0), offsets_(raw_key_len_ + bitsize(reprobe_limit_.val() + 1), val_len, reprobe_limit_.val() + 1), size_bytes_(div_ceil(size_, (size_t)offsets_.block_len()) * offsets_.block_word_len() * sizeof(word)), data_(static_cast(this)->alloc_data(size_bytes_)), reprobes_(reprobes), hash_matrix_(m), hash_inverse_matrix_(hash_matrix_.pseudo_inverse()) { if(!data_) throw ErrorAllocation(err::msg() << "Failed to allocate " << (div_ceil(size, (size_t)offsets_.block_len()) * offsets_.block_word_len() * sizeof(word)) << " bytes of memory"); } array_base(array_base&& ary) : lsize_(ary.lsize_), size_(ary.size_), size_mask_(size_ - 1), reprobe_limit_(ary.reprobe_limit_), key_len_(ary.key_len_), raw_key_len_(ary.raw_key_len_), offsets_(std::move(ary.offsets_)), size_bytes_(ary.size_bytes_), data_(ary.data_), reprobes_(ary.reprobes_), hash_matrix_(std::move(ary.hash_matrix_)), hash_inverse_matrix_(std::move(ary.hash_inverse_matrix_)) { } array_base& operator=(const array_base& rhs) = delete; array_base& operator=(array_base&& rhs) = delete; size_t size() const { return size_; } size_t lsize() const { return lsize_; } size_t size_mask() const { return size_mask_; } uint_t key_len() const { return key_len_; } uint_t val_len() const { return offsets_.val_len(); } const size_t* reprobes() const { return reprobes_; } uint_t max_reprobe() const { return reprobe_limit_.val(); } size_t max_reprobe_offset() const { return reprobes_[reprobe_limit_.val()]; } const RectangularBinaryMatrix& matrix() const { return hash_matrix_; } const RectangularBinaryMatrix& inverse_matrix() const { return hash_inverse_matrix_; } void matrix(const RectangularBinaryMatrix& m) { hash_inverse_matrix_ = m.pseudo_inverse(); hash_matrix_ = m; } /** * Clear hash table. Not thread safe. */ void clear() { memset(data_, '\0', size_bytes_); } /** * Write the hash table raw to a stream. Not thread safe. */ void write(std::ostream& os) const { os.write((const char*)data_, size_bytes_); } size_t size_bytes() const { return size_bytes_; } /* The storage of the hash is organized in "blocks". A (key,value) * pair always start at bit 0 of the block. The following methods * work with the blocks of the hash. */ /** * Number of blocks needed to fit at least a given number of * records. Given a number of records, it returns the number of * blocks necessary and the actual number of records these blocks * contain. */ std::pair blocks_for_records(size_t nb_records) const { return offsets_.blocks_for_records(nb_records); } /** * Convert coordinate from (start, blen) given in blocks to * coordinate in char* and length in bytes. It also makes sure that * the pointer and length returned do not go beyond allocated * memory. */ void block_to_ptr(const size_t start, const size_t blen, char **start_ptr, size_t *memlen) const { *start_ptr = (char *)(data_ + start * offsets_.block_word_len()); char *end_ptr = (char *)data_ + size_bytes_; if(*start_ptr >= end_ptr) { *memlen = 0; return; } *memlen = blen * offsets_.block_word_len() * sizeof(word); if(*start_ptr + *memlen > end_ptr) *memlen = end_ptr - *start_ptr; } /** * Zero out blocks in [start, start+length), where start and * length are given in number of blocks. **/ void zero_blocks(const size_t start, const size_t length) { char *start_ptr; size_t memlen; block_to_ptr(start, length, &start_ptr, &memlen); memset(start_ptr, '\0', memlen); } /** * Use hash values as counters. * * The matrix multiplication gets only a uint64_t. The lsb of the * matrix product, the hsb are assume to be equal to the key itself * (the matrix has a partial identity on the first rows). * * In case of failure (false is returned), carry_shift contains the * number of bits of the value that were successfully stored in the * hash (low significant bits). If carry_shift == 0, then nothing * was stored and the key is not in the hash at all. In that case, * the value of *is_new and *id are not valid. If carry_shift > 0, * then the key is present but the value stored is not correct * (missing the high significant bits of value), but *is_new and *id * contain the proper information. */ inline bool add(const key_type& key, mapped_type val, unsigned int* carry_shift, bool* is_new, size_t* id) { uint64_t hash = hash_matrix_.times(key); *carry_shift = 0; return add_rec(hash & size_mask_, key, val, false, is_new, id, carry_shift); } inline bool add(const key_type& key, mapped_type val, unsigned int* carry_shift) { bool is_new = false; size_t id = 0; return add(key, val, carry_shift, &is_new, &id); } inline bool add(const key_type& key, mapped_type val) { unsigned int carry_shift = 0; return add(key, val, &carry_shift); } inline bool set(const key_type& key) { bool is_new; size_t id; return set(key, &is_new, &id); } bool set(const key_type& key, bool* is_new, size_t* id) { word* w; const offset_t* o; *id = hash_matrix_.times(key) & size_mask_; return claim_key(key, is_new, id, &o, &w); } /** * Use hash values as counters, if already exists * * Add val to the value associated with key if key is already in the * hash. Returns true if the update was done, false otherwise. */ inline bool update_add(const key_type& key, mapped_type val) { key_type tmp_key; unsigned int carry_shift; return update_add(key, val, &carry_shift, tmp_key); } // Optimization. Use tmp_key as buffer. Avoids allocation if update_add is called repeatedly. bool update_add(const key_type& key, mapped_type val, unsigned int* carry_shift, key_type& tmp_key) { size_t id; word* w; const offset_t* o; *carry_shift = 0; if(get_key_id(key, &id, tmp_key, (const word**)&w, &o)) return add_rec_at(id, key, val, o, w, carry_shift); return false; } // Get the value, stored in *val, associated with key. If the key is // not found, false is returned, otherwise true is returned and *val // is updated. If carry_bit is true, then the first bit of the key // field indicates whether we should reprobe to get the complete // value. inline bool get_val_for_key(const key_type& key, mapped_type* val, bool carry_bit = false) const { key_type tmp_key; size_t id; return get_val_for_key(key, val, tmp_key, &id, carry_bit); } // Optimization version. A tmp_key buffer is passed and the id where // the key was found is return in *id. If get_val_for_key is called // many times consecutively, it may be faster to pass the same // tmp_key buffer instead of allocating it every time. bool get_val_for_key(const key_type& key, mapped_type* val, key_type& tmp_key, size_t* id, bool carry_bit = false) const { const word* w; const offset_t* o; if(!get_key_id(key, id, tmp_key, &w, &o)) return false; *val = get_val_at_id(*id, w, o, true, carry_bit); return true; } // Return true if the key is present in the hash inline bool has_key(const key_type& key) const { size_t id; return get_key_id(key, &id); } // Get the id of the key in the hash. Returns true if the key is // found in the hash, false otherwise. inline bool get_key_id(const key_type& key, size_t* id) const { key_type tmp_key; const word* w; const offset_t* o; return get_key_id(key, id, tmp_key, &w, &o); } // Optimization version where a tmp_key buffer is provided instead // of being allocated. May be faster if many calls to get_key_id are // made consecutively by passing the same tmp_key each time. inline bool get_key_id(const key_type& key, size_t* id, key_type& tmp_key) const { const word* w; const offset_t* o; return get_key_id(key, id, tmp_key, &w, &o); } protected: // Information and methods to manage the prefetched data. struct prefetch_info { size_t id; const word* w; const offset_t *o, *lo; }; typedef simple_circular_buffer::pre_alloc prefetch_buffer; void warm_up_cache(prefetch_buffer& buffer, size_t oid) const { buffer.clear(); for(int i = 0; i < buffer.capacity(); ++i) { buffer.push_back(); prefetch_info& info = buffer.back(); info.id = (oid + (i > 0 ? reprobes_[i] : 0)) & size_mask_; info.w = offsets_.word_offset(info.id, &info.o, &info.lo, data_); __builtin_prefetch(info.w + info.o->key.woff, 0, 1); __builtin_prefetch(info.o, 0, 3); } } void prefetch_next(prefetch_buffer& buffer, size_t oid, uint_t reprobe) const { buffer.pop_front(); // if(reprobe + buffer.capacity() <= reprobe_limit_.val()) { buffer.push_back(); prefetch_info& info = buffer.back(); info.id = (oid + reprobes_[reprobe + buffer.capacity() - 1]) & size_mask_; info.w = offsets_.word_offset(info.id, &info.o, &info.lo, data_); __builtin_prefetch(info.w + info.o->key.woff, 0, 1); __builtin_prefetch(info.o, 0, 3); // } } public: // Optimization version again. Also return the word and the offset // information where the key was found. These can be used later one // to fetch the value associated with the key. inline bool get_key_id(const key_type& key, size_t* id, key_type& tmp_key, const word** w, const offset_t** o) const { return get_key_id(key, id, tmp_key, w, o, hash_matrix_.times(key) & size_mask_); } // Find the actual id of the key in the hash, starting at oid. bool get_key_id(const key_type& key, size_t* id, key_type& tmp_key, const word** w, const offset_t** o, const size_t oid) const { // This static_assert makes clang++ happy static_assert(std::is_pod::value, "prefetch_info must be a POD"); prefetch_info info_ary[prefetch_buffer::capacityConstant]; prefetch_buffer buffer(info_ary); warm_up_cache(buffer, oid); for(uint_t reprobe = 0; reprobe <= reprobe_limit_.val(); ++reprobe) { prefetch_info& info = buffer.front(); key_status st = get_key_at_id(info.id, tmp_key, info.w, info.o); switch(st) { case EMPTY: return false; case FILLED: if(oid != tmp_key.get_bits(0, lsize_)) break; tmp_key.template set_bits(0, lsize_, key.get_bits(0, lsize_)); if(tmp_key != key) break; *id = info.id; *w = info.w; *o = info.o; return true; default: break; } prefetch_next(buffer, oid, reprobe + 1); } // for return false; } ////////////////////////////// // Iterator ////////////////////////////// const_iterator begin() { return const_iterator(this); } const_iterator begin() const { return const_iterator(this); } const_iterator end() { return const_iterator(); } const_iterator end() const { return const_iterator(); } /// Get a slice of an array as an iterator template Iterator iterator_slice(size_t index, size_t nb_slices) const { std::pair res = slice(index, nb_slices, size()); return Iterator(this, res.first, res.second); } template Iterator iterator_all() const { return iterator_slice(0, 1); } // See hash_counter.hpp for why we added this method. It should not // be needed, but I can't get the thing to compile without :(. eager_iterator eager_slice(size_t index, size_t nb_slices) const { return iterator_slice(index, nb_slices); } region_iterator region_slice(size_t index, size_t nb_slices) const { return iterator_slice(index, nb_slices); } // Claim a key with the large bit not set. I.e. first entry for a key. // // id is input/output. Equal to hash & size_maks on input. Equal to // actual id where key was set on output. key is already hash // shifted and masked to get higher bits. (>> lsize & key_mask) // is_new is set on output to true if key did not exists in hash // before. *ao points to the actual offsets object and w to the word // holding the value. bool claim_key(const key_type& key, bool* is_new, size_t* id, const offset_t** _ao, word** _w) { uint_t reprobe = 0; const offset_t *o, *lo; word *w, *kw, nkey; bool key_claimed = false; size_t cid = *id; // Akey contains first word of what to store in the key // field. I.e. part of the original key (the rest is encoded in // the original position) and the reprobe value to substract from // the actual position to get to the original position. // // MSB LSB // +--------------+-------------+ // | MSB of key | reprobe | // + -------------+-------------+ // raw_key_len reprobe_len // // Akey is updated at every operation to reflect the current // reprobe value. nkey is the temporary word containing the part // to be stored in the current word kw (+ some offset). word akey = 1; // start reprobe value == 0. Store reprobe value + 1 const int to_copy = std::min((uint16_t)(wsize - offsets_.reprobe_len()), raw_key_len_); const int implied_copy = std::min(key_len_, lsize_); akey |= key.get_bits(implied_copy, to_copy) << offsets_.reprobe_len(); const int abits_copied = implied_copy + to_copy; // Bits from original key already copied, explicitly or implicitly do { int bits_copied = abits_copied; w = offsets_.word_offset(cid, &o, &lo, data_); kw = w + o->key.woff; if(o->key.sb_mask1) { // key split on multiple words nkey = akey << o->key.boff; nkey |= o->key.sb_mask1; nkey &= o->key.mask1; key_claimed = set_key(kw, nkey, o->key.mask1, o->key.mask1, is_new); if(key_claimed) { nkey = akey >> o->key.shift; if(o->key.full_words) { // Copy full words. First one is special nkey |= key.get_bits(bits_copied, o->key.shift - 1) << (wsize - o->key.shift); bits_copied += o->key.shift - 1; nkey |= o->key.sb_mask1; // Set bit is MSB int copied_full_words = 1; key_claimed = set_key(kw + copied_full_words, nkey, fmask, fmask, is_new); // Copy more full words if needed while(bits_copied + wsize - 1 <= key_len_ && key_claimed) { nkey = key.get_bits(bits_copied, wsize - 1); bits_copied += wsize - 1; nkey |= o->key.sb_mask1; copied_full_words += 1; key_claimed = set_key(kw + copied_full_words, nkey, fmask, fmask, is_new); } assert(!key_claimed || (bits_copied < key_len_) == (o->key.sb_mask2 != 0)); if(o->key.sb_mask2 && key_claimed) { // Copy last word nkey = key.get_bits(bits_copied, key_len_ - bits_copied); nkey |= o->key.sb_mask2; copied_full_words += 1; key_claimed = set_key(kw + copied_full_words, nkey, o->key.mask2, o->key.mask2, is_new); } } else if(o->key.sb_mask2) { // if bits_copied + wsize - 1 < key_len // Copy last word, no full words copied nkey |= key.get_bits(bits_copied, key_len_ - bits_copied) << (wsize - o->key.shift); nkey |= o->key.sb_mask2; nkey &= o->key.mask2; key_claimed = set_key(kw + 1, nkey, o->key.mask2, o->key.mask2, is_new); } } // if(key_claimed) } else { // key on one word nkey = akey << o->key.boff; nkey &= o->key.mask1; key_claimed = set_key(kw, nkey, o->key.mask1, o->key.mask1, is_new); } if(!key_claimed) { // reprobe if(++reprobe > reprobe_limit_.val()) return false; cid = (*id + reprobes_[reprobe]) & size_mask_; akey = (akey & ~offsets_.reprobe_mask()) | (reprobe + 1); } } while(!key_claimed); *id = cid; *_w = w; *_ao = o; return true; } // Claim large key. Enter an entry for a key when it is not the // first entry. Only encode the number of reprobe hops back to the // first entry of the key in the hash table. It is simpler as can // takes less than one word in length. bool claim_large_key(size_t* id, const offset_t** _ao, word** _w) { uint_t reprobe = 0; size_t cid = *id; const offset_t *o, *lo; word *w, *kw, nkey; bool key_claimed = false; do { w = offsets_.word_offset(cid, &o, &lo, data_); kw = w + lo->key.woff; if(lo->key.sb_mask1) { // key split on multiple words nkey = (reprobe << lo->key.boff) | lo->key.sb_mask1 | lo->key.lb_mask; nkey &= lo->key.mask1; // Use o->key.mask1 and not lo->key.mask1 as the first one is // guaranteed to be bigger. The key needs to be free on its // longer mask to claim it! key_claimed = set_key(kw, nkey, o->key.mask1, lo->key.mask1); if(key_claimed) { nkey = (reprobe >> lo->key.shift) | lo->key.sb_mask2; nkey &= lo->key.mask2; key_claimed = set_key(kw + 1, nkey, o->key.full_words ? fmask : o->key.mask2, lo->key.mask2); } } else { // key on 1 word nkey = (reprobe << lo->key.boff) | lo->key.lb_mask; nkey &= lo->key.mask1; key_claimed = set_key(kw, nkey, o->key.mask1, lo->key.mask1); } if(!key_claimed) { //reprobe if(++reprobe > reprobe_limit_.val()) return false; cid = (*id + reprobes_[reprobe]) & size_mask_; } } while(!key_claimed); *id = cid; *_w = w; *_ao = lo; return true; } // Add val to key. id is the starting place (result of hash // computation). eid is set to the effective place in the // array. large is set to true is setting a large key (upon // recurrence if there is a carry). bool add_rec(size_t id, const key_type& key, word val, bool large, bool* is_new, size_t* eid, unsigned int* carry_shift) { const offset_t *ao = 0; word *w = 0; bool claimed = false; if(large) claimed = claim_large_key(&id, &ao, &w); else claimed = claim_key(key, is_new, &id, &ao, &w); if(!claimed) return false; *eid = id; return add_rec_at(id, key, val, ao, w, carry_shift); } bool add_rec_at(size_t id, const key_type& key, word val, const offset_t* ao, word* w, unsigned int* carry_shift) { // Increment value word *vw = w + ao->val.woff; word cary = add_val(vw, val, ao->val.boff, ao->val.mask1); cary >>= ao->val.shift; *carry_shift += ao->val.shift; if(cary && ao->val.mask2) { // value split on two words cary = add_val(vw + 1, cary, 0, ao->val.mask2); cary >>= ao->val.cshift; *carry_shift += ao->val.cshift; } if(!cary) return true; id = (id + reprobes_[0]) & size_mask_; size_t ignore_eid; bool ignore_is_new; return add_rec(id, key, cary, true, &ignore_is_new, &ignore_eid, carry_shift); // // Adding failed, table is full. Need to back-track and // // substract val. // std::cerr << "Failed to add large part of value -> return false\n"; // cary = add_val(vw, ((word)1 << offsets_.val_len()) - val, // ao->val.boff, ao->val.mask1); // cary >>= ao->val.shift; // if(cary && ao->val.mask2) { // // Can I ignore the cary here? Table is known to be full, so // // not much of a choice. But does it leave the table in a // // consistent state? // add_val(vw + 1, cary, 0, ao->val.mask2); // } // return false; } // Atomic methods to set the key. Attempt to set nkey in word w. All // bits matching free_mask must be unset and the bits matching // equal_mask must be equal for a success in setting the key. Set // is_new to true if the spot was previously empty. Otherwise, if // is_new is false but true is returned, the key was already present // at that spot. inline bool set_key(word *w, word nkey, word free_mask, word equal_mask, bool *is_new) { word ow = *w, nw, okey; okey = ow & free_mask; while(okey == 0) { // large bit not set && key is free nw = atomic_.cas(w, ow, ow | nkey); if(nw == ow) { *is_new = true; return true; } ow = nw; okey = ow & free_mask; } *is_new = false; return (ow & equal_mask) == nkey; } inline bool set_key(word *w, word nkey, word free_mask, word equal_mask) { bool is_new; return set_key(w, nkey, free_mask, equal_mask, &is_new); } // Add val the value in word w, with shift and mask giving the // particular part of the word in which the value is stored. The // return value is the carry. inline word add_val(word *w, word val, uint_t shift, word mask) { word now = *w, ow, nw, nval; do { ow = now; nval = ((ow & mask) >> shift) + val; nw = (ow & ~mask) | ((nval << shift) & mask); now = atomic_.cas(w, ow, nw); } while(now != ow); return nval & (~(mask >> shift)); } // Return the key and value at position id. If the slot at id is // empty or has the large bit set, returns false. Otherwise, returns // the key and the value is the sum of all the entries in the hash // table for that key. I.e., the table is search forward for entries // with large bit set pointing back to the key at id, and all those // values are summed up. key_status get_key_val_at_id(size_t id, key_type& key, word& val, const bool carry_bit = false) const { const word* w; const offset_t* o; key_status st = get_key_at_id(id, key, &w, &o); if(st != FILLED) return st; val = get_val_at_id(id, w, o, true, carry_bit); return FILLED; } // Get a the key at the given id. It also returns the word and // offset information in w and o. The return value is EMPTY (no key // at id), FILLED (there is a key at id), LBSET (the large bit is // set, hence the key is only a pointer back to the real key). // // The key returned contains the original id in the hash as its // lsize_ lsb bits. To obtain the full key, one needs to compute the // product with the inverse matrix to get the lsb bits. inline key_status get_key_at_id(size_t id, key_type& key, const word** w, const offset_t** o) const { const offset_t *lo; *w = offsets_.word_offset(id, o, &lo, data_); return get_key_at_id(id, key, *w, *o); } // Sam as above, but it assume that the word w and o for id have // already be computed (like already prefetched). key_status get_key_at_id(size_t id, key_type&key, const word* w, const offset_t* o) const { const word* kvw = w + o->key.woff; word key_word = *kvw; word kreprobe = 0; const key_offsets& key_o = o->key; if(key_word & key_o.lb_mask) return LBSET; const int implied_copy = std::min(lsize_, key_len_); int bits_copied = implied_copy; if(key_o.sb_mask1) { if((key_word & key_o.sb_mask1) == 0) return EMPTY; kreprobe = (key_word & key_o.mask1 & ~key_o.sb_mask1) >> key_o.boff; if(key_o.full_words) { // Copy full words. First one is special key_word = *(kvw + 1); if(offsets_.reprobe_len() < key_o.shift) { key.set_bits(bits_copied, key_o.shift - offsets_.reprobe_len(), kreprobe >> offsets_.reprobe_len()); bits_copied += key_o.shift - offsets_.reprobe_len(); kreprobe &= offsets_.reprobe_mask(); key.set_bits(bits_copied, wsize - 1, key_word & ~key_o.sb_mask1); bits_copied += wsize - 1; } else { int reprobe_left = offsets_.reprobe_len() - key_o.shift; kreprobe |= (key_word & (((word)1 << reprobe_left) - 1)) << key_o.shift; key.set_bits(bits_copied, wsize - 1 - reprobe_left, (key_word & ~key_o.sb_mask1) >> reprobe_left); bits_copied += wsize - 1 - reprobe_left; } int word_copied = 2; while(bits_copied + wsize - 1 <= key_len_) { key.set_bits(bits_copied, wsize - 1, *(kvw + word_copied++) & (fmask >> 1)); bits_copied += wsize - 1; } if(key_o.sb_mask2) key.set_bits(bits_copied, key_len_ - bits_copied, *(kvw + word_copied) & key_o.mask2 & ~key_o.sb_mask2); } else if(key_o.sb_mask2) { // if(bits_copied + wsize - 1 < key_len // Two words but no full words key_word = *(kvw + 1) & key_o.mask2 & ~key_o.sb_mask2; if(offsets_.reprobe_len() < key_o.shift) { key.set_bits(bits_copied, key_o.shift - offsets_.reprobe_len(), kreprobe >> offsets_.reprobe_len()); bits_copied += key_o.shift - offsets_.reprobe_len(); kreprobe &= offsets_.reprobe_mask(); key.set_bits(bits_copied, key_len_ - bits_copied, key_word); } else { int reprobe_left = offsets_.reprobe_len() - key_o.shift; kreprobe |= (key_word & (((word)1 << reprobe_left) - 1)) << key_o.shift; key.set_bits(bits_copied, key_len_ - bits_copied, key_word >> reprobe_left); } } } else { // if(key_o.sb_mask1 // Everything in 1 word key_word = (key_word & key_o.mask1) >> key_o.boff; if(key_word == 0) return EMPTY; kreprobe = key_word & offsets_.reprobe_mask(); key.set_bits(bits_copied, raw_key_len_, key_word >> offsets_.reprobe_len()); } // Compute missing oid so that the original key can be computed // back through the inverse matrix. Although the key may have a // length of key_len_, which may be less than lsize_, assume that // it still fit here as lsize_ is less than a word length. Need all lsize_. size_t oid = id; // Original id if(kreprobe > 1) oid -= reprobes_[kreprobe - 1]; oid &= size_mask_; // Can use more bits than mer size. That's OK, will fix it later // when computing the actual mers by computing the product with // the inverse matrix. key.template set_bits<0>(0, lsize_, oid); return FILLED; } word get_val_at_id(const size_t id, const word* w, const offset_t* o, const bool reprobe = true, const bool carry_bit = false) const { word val = 0; if(val_len() == 0) return val; // First part of value const word* kvw = w + o->val.woff; val = ((*kvw) & o->val.mask1) >> o->val.boff; if(o->val.mask2) val |= ((*(kvw+1)) & o->val.mask2) << o->val.shift; // Do we want to get the large value bool do_reprobe = reprobe; if(carry_bit && do_reprobe) { do_reprobe = do_reprobe && (val & 0x1); val >>= 1; } if(!do_reprobe) return val; return resolve_val_rec((id + reprobes_[0]) & size_mask_, val, carry_bit); } word resolve_val_rec(const size_t id, word val, const bool carry_bit, const uint_t overflows = 0) const { uint_t reprobe = 0; size_t cid = id; while(reprobe <= reprobe_limit_.val()) { const offset_t *o, *lo; const word* w = offsets_.word_offset(cid, &o, &lo, data_); const word* kw = w + o->key.woff; word nkey = *kw; const key_offsets& lkey = lo->key; if(nkey & lkey.lb_mask) { // If the large bit is set, the size of the key (reprobe_len) // is guaranteed to have a length of at most 1 word. if(lkey.sb_mask1) { nkey = (nkey & lkey.mask1 & ~lkey.sb_mask1) >> lkey.boff; nkey |= ((*(kw+1)) & lkey.mask2 & ~lkey.sb_mask2) << lkey.shift; } else { nkey = (nkey & lkey.mask1) >> lkey.boff; } if(nkey == reprobe) { const val_offsets& lval = lo->val; const word* vw = w + lval.woff; word nval = ((*vw) & lval.mask1) >> lval.boff; if(lval.mask2) nval |= ((*(vw+1)) & lval.mask2) << lval.shift; bool do_reprobe = true; if(carry_bit) { do_reprobe = nval & 0x1; nval >>= 1; } nval <<= offsets_.val_len(); nval <<= offsets_.lval_len() * overflows; val += nval; if(!do_reprobe) return val; return resolve_val_rec((cid + reprobes_[0]) & size_mask_, val, carry_bit, overflows + 1); } } else if((nkey & o->key.mask1) == 0) { break; } cid = (id + reprobes_[++reprobe]) & size_mask_; } return val; } }; template class array : protected mem_block_t, public array_base > { typedef array_base > super; friend class array_base >; public: array(size_t size, // Size of hash. To be rounded up to a power of 2 uint16_t key_len, // Size of key in bits uint16_t val_len, // Size of val in bits uint16_t reprobe_limit, // Maximum reprobe const size_t* reprobes = quadratic_reprobes) : // Reprobing policy mem_block_t(), super(size, key_len, val_len, reprobe_limit, RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(), reprobes) { } protected: word* alloc_data(size_t s) { mem_block_t::realloc(s); return (word*)mem_block_t::get_ptr(); } }; struct ptr_info { void* ptr_; size_t bytes_; ptr_info(void* ptr, size_t bytes) : ptr_(ptr), bytes_(bytes) { } }; template class array_raw : protected ptr_info, public array_base > { typedef array_base > super; friend class array_base >; public: array_raw(void* ptr, size_t bytes, // Memory available at ptr size_t size, // Size of hash in number of entries. To be rounded up to a power of 2 uint16_t key_len, // Size of key in bits uint16_t val_len, // Size of val in bits uint16_t reprobe_limit, // Maximum reprobe RectangularBinaryMatrix m, const size_t* reprobes = quadratic_reprobes) : // Reprobing policy ptr_info(ptr, bytes), super(size, key_len, val_len, reprobe_limit, m, reprobes) { } protected: word* alloc_data(size_t s) { assert(bytes_ == s); return (word*)ptr_; } }; } } // namespace jellyfish { namespace large_hash_array #endif /* __JELLYFISH_LARGE_HASH_ARRAY_HPP__ */