/* This file is part of Jellyfish.
Jellyfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Jellyfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Jellyfish. If not, see .
*/
#ifndef __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
#define __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_CONFIG_H
#include
#endif
// Column major representation
//
// Rectangular matrices on Z/2Z of size _r x _c where 1<=_r<=64 (_c is
// not limited) and _r <= _c. I.e., the matrix can be stored as an
// array of 64 bit word, each representing a column (the highest 64-_r
// bits of each word are set to 0).
//
// Multiplication between a matrix and vector of size _c x 1 gives a
// vector of size _r x 1 stored as one 64 bit word.
namespace jellyfish {
class RectangularBinaryMatrix {
public:
RectangularBinaryMatrix(unsigned int r, unsigned c)
: _columns(alloc(r, c)), _r(r), _c(c) { }
RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
: _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) {
memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
}
RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) :
_columns(rhs._columns), _r(rhs._r), _c(rhs._c) {
rhs._columns = 0;
}
// Initialize from raw data. raw must contain at least c words.
template
RectangularBinaryMatrix(const T &raw, unsigned int r, unsigned c)
: _columns(alloc(r, c)), _r(r), _c(c) {
for(unsigned int i = 0; i < _c; ++i)
_columns[i] = raw[i] & cmask();
}
~RectangularBinaryMatrix() {
free(_columns);
}
RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
if(_r != rhs._r || _c != rhs._c)
throw std::invalid_argument("RHS matrix dimensions do not match");
memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
return *this;
}
RectangularBinaryMatrix& operator=(RectangularBinaryMatrix&& rhs) {
if(_r != rhs._r || _c != rhs._c)
throw std::invalid_argument("RHS matrix dimensions do not match");
std::swap(_columns, rhs._columns);
return *this;
}
bool operator==(const RectangularBinaryMatrix &rhs) const {
if(_r != rhs._r || _c != rhs._c)
return false;
return !memcmp(_columns, rhs._columns, sizeof(uint64_t) * _c);
}
bool operator!=(const RectangularBinaryMatrix &rhs) const {
return !(*this == rhs);
}
// Get i-th column. No check on range
const uint64_t & operator[](unsigned int i) const { return _columns[i]; }
unsigned int r() const { return _r; }
unsigned int c() const { return _c; }
// True if every column is zero
bool is_zero() const {
uint64_t *p = _columns;
while(*p == 0 && p < _columns + _c)
++p;
return (p - _columns) == _c;
}
// Randomize the content of the matrix
void randomize(uint64_t (*rng)()) {
for(unsigned int i = 0; i < _c; ++i)
_columns[i] = rng() & cmask();
}
//void randomize() { randomize(rng); }
// Make and check that the matrix the lower right corner of the
// identity.
void init_low_identity();
bool is_low_identity();
// Left matrix vector multiplication. Type T supports the operator
// v[i] to return the i-th 64 bit word of v.
template
uint64_t times_loop(const T &v) const;
#ifdef HAVE_SSE
// This SSE implementation only works if the number of columns is
// even.
template
uint64_t times_sse(const T &v) const;
#endif
#ifdef HAVE_INT128
// Implementation using __int128
template
uint64_t times_128(const T& v) const;
#endif
template
inline uint64_t times(const T& v) const {
#ifdef HAVE_SSE
return times_sse(v);
#elif HAVE_INT128
return times_128(v);
#else
return times_loop(v);
#endif
}
// Return a matrix which is the "pseudo inverse" of this matrix. It
// is assumed that there is above this square matrix an identity
// block and a zero so as to make the matrix squared. Raise an
// exception std::domain_error if the matrix is singular.
RectangularBinaryMatrix pseudo_inverse() const;
// Return the multiplication of this and rhs. As in pseudo_inverse,
// the two matrices are viewed as being squared, padded above by the
// identity.
RectangularBinaryMatrix pseudo_multiplication(const RectangularBinaryMatrix &rhs) const;
// Initialize the object with a pseudo-invertible matrix and return its pseudo-inverse
RectangularBinaryMatrix randomize_pseudo_inverse(uint64_t (*rng)());
RectangularBinaryMatrix randomize_pseudo_inverse() { return randomize_pseudo_inverse(random_bits); }
// Return the rank of the matrix. The matrix is assumed to be
// squared, padded above by the identity.
unsigned int pseudo_rank() const;
// Display matrix
void print(std::ostream &os) const;
template
void print_vector(std::ostream &os, const T &v) const;
// Nb words in vector for multiplication
uint64_t nb_words() const { return (_c >> 6) + ((_c & 0x3f) != 0); }
// Mask of most significant bit in most significant word of a vector
// with _c rows.
uint64_t msb() const {
int shift = _c & 0x3f;
if(shift == 0)
shift = sizeof(uint64_t) * 8;
return (uint64_t)1 << (shift - 1);
}
private:
// Store column by column. A column may use one word. By
// convention, the "unused" bits (most significant bits) of each
// column are set to 0.
uint64_t * _columns;
const unsigned int _r, _c;
static uint64_t *alloc(unsigned int r, unsigned int c) __attribute__((malloc));
// Mask for column word (zero msb)
uint64_t cmask() const { return std::numeric_limits::max() >> (std::numeric_limits::digits - _r); }
// Mask of highest word of a vector with _c rows (Most Significant
// Word)
uint64_t msw() const { return (msb() << 1) - 1; }
// Nb of bits used in highest word of vector with _c rows.
uint64_t nb_msb() const {
uint64_t nb = _c & 0x3f;
return nb ? nb : sizeof(uint64_t) * 8;
}
// Allow to change the matrix vectors. No check on i.
uint64_t & get(unsigned int i) { return _columns[i]; }
};
template
uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
uint64_t *p = _columns + _c - 1;
uint64_t res = 0, x = 0, j = 0;
const uint64_t one = (uint64_t)1;
for(unsigned int i = 0; i < nb_words(); ++i) {
j = sizeof(uint64_t) * 8;
x = v[i];
if(i == nb_words() - 1) {
x &= msw();
j = nb_msb();
}
for( ; j > 7; j -= 8, p -= 8) {
res ^= (-(x & one)) & p[0]; x >>= 1;
res ^= (-(x & one)) & p[-1]; x >>= 1;
res ^= (-(x & one)) & p[-2]; x >>= 1;
res ^= (-(x & one)) & p[-3]; x >>= 1;
res ^= (-(x & one)) & p[-4]; x >>= 1;
res ^= (-(x & one)) & p[-5]; x >>= 1;
res ^= (-(x & one)) & p[-6]; x >>= 1;
res ^= (-(x & one)) & p[-7]; x >>= 1;
}
}
// Finish the loop
switch(j) {
case 7: res ^= (-(x & one)) & *p--; x >>= 1;
case 6: res ^= (-(x & one)) & *p--; x >>= 1;
case 5: res ^= (-(x & one)) & *p--; x >>= 1;
case 4: res ^= (-(x & one)) & *p--; x >>= 1;
case 3: res ^= (-(x & one)) & *p--; x >>= 1;
case 2: res ^= (-(x & one)) & *p--; x >>= 1;
case 1: res ^= (-(x & one)) & *p;
}
return res;
}
#ifdef HAVE_SSE
template
uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
#define FFs ((uint64_t)-1)
static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
{0, 0, 0, FFs, FFs, 0, FFs, FFs};
typedef uint64_t xmm_t __attribute__((vector_size(16)));
uint64_t *p = _columns + _c - 8;
// //#ifdef __ICC
// register xmm_t acc;
// register xmm_t load;
// memset(&acc, '\0', 16);
// memset(&load, '\0', 16);
// #else
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wuninitialized"
#endif
xmm_t acc = acc ^ acc; // Set acc to 0
xmm_t load = load ^ load;
#ifdef __clang__
#pragma clang diagnostic pop
#endif
// #endif
// // Zero out acc
// #pragma GCC diagnostic push
// #pragma GCC diagnostic ignored "-Wuninitialized"
// asm("pxor %0,%0\n\t" : "=x"(acc) : "0"(acc));
// asm("pxor %0,%0\n\t" : "=x"(load) : "0"(load));
// #pragma GCC diagnostic pop
// i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j].
#define AND_XOR(off) \
asm("movdqa (%[s],%[i]), %[load]\n\t" \
"pand " off "(%[p]),%[load]\n\t" \
"pxor %[load],%[acc]\n\t" \
: [acc]"=&x"(acc) \
: "[acc]"(acc), [i]"r"(i), [p]"r"(p), [s]"r"(smear), [load]"x"(load))
uint64_t i, j = 0, x = 0;
for(unsigned int w = 0; w < nb_words(); ++w) {
x = v[w];
j = sizeof(uint64_t) * 8;
if(w == nb_words() - 1) {
x &= msw();
j = nb_msb();
}
for( ; j > 7; j -= 8, p -= 8) {
i = (x & (uint64_t)0x3) << 4;
AND_XOR("0x30");
x >>= 2;
i = (x & (uint64_t)0x3) << 4;
AND_XOR("0x20");
x >>= 2;
i = (x & (uint64_t)0x3) << 4;
AND_XOR("0x10");
x >>= 2;
i = (x & (uint64_t)0x3) << 4;
AND_XOR("");
x >>= 2;
}
}
// Finish loop
p = _columns;
switch(j) {
case 6:
i = (x & (uint64_t)0x3) << 4;
AND_XOR("0x20");
x >>= 2;
case 4:
i = (x & (uint64_t)0x3) << 4;
AND_XOR("0x10");
x >>= 2;
case 2:
i = (x & (uint64_t)0x3) << 4;
AND_XOR("");
}
// Get result out
uint64_t res1, res2;
asm("movd %[acc], %[res1]\n\t"
"psrldq $8, %[acc]\n\t"
"movd %[acc], %[res2]\n\t"
: [res1]"=r"(res1), [res2]"=r"(res2)
: [acc]"x"(acc));
return res1 ^ res2;
}
#endif // HAVE_SSE
#ifdef HAVE_INT128
template
uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
typedef unsigned __int128 u128;
static const u128 smear[4] =
{ (u128)0,
(((u128)1 << 64) - 1) << 64,
((u128)1 << 64) - 1,
(u128)-1
};\
u128 res = res ^ res;
u128* p = (u128*)(_columns + _c - 2);
uint64_t j = 0, x = 0;
for(unsigned int w = 0; w < nb_words(); ++w) {
x = v[w];
j = sizeof(uint64_t) * 8;
if(w == nb_words() - 1) {
x &= msw();
j = nb_msb();
}
for( ; j > 7; j -= 8, p -= 4) {
res ^= smear[x & (uint64_t)0x3] & p[ 0]; x >>= 2;
res ^= smear[x & (uint64_t)0x3] & p[-1]; x >>= 2;
res ^= smear[x & (uint64_t)0x3] & p[-2]; x >>= 2;
res ^= smear[x & (uint64_t)0x3] & p[-3]; x >>= 2;
}
}
switch(j) {
case 6: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
case 4: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
case 2: res ^= smear[x & (uint64_t)0x3] & *p;
}
return (res ^ (res >> 64)) & smear[2];
}
#endif // HAVE_INT128
}
#endif