// Boost.Units - A C++ library for zero-overhead dimensional analysis and // unit/quantity manipulation and conversion // // Copyright (C) 2003-2008 Matthias Christian Schabel // Copyright (C) 2008 Steven Watanabe // // Distributed under the Boost Software License, Version 1.0. (See // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) /** \file \brief performance.cpp \detailed Test runtime performance. Output: @verbatim multiplying ublas::matrix(1000, 1000) : 25.03 seconds multiplying ublas::matrix(1000, 1000) : 24.49 seconds tiled_matrix_multiply(1000, 1000) : 1.12 seconds tiled_matrix_multiply(1000, 1000) : 1.16 seconds solving y' = 1 - x + 4 * y with double: 1.97 seconds solving y' = 1 - x + 4 * y with quantity: 1.84 seconds @endverbatim **/ #define _SCL_SECURE_NO_WARNINGS #include #include #include #include #include #include #include #include #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable:4267; disable:4127; disable:4244; disable:4100) #endif #include #ifdef BOOST_MSVC #pragma warning(pop) #endif #include #include #include enum { tile_block_size = 24 }; template void tiled_multiply_carray_inner(T0* first, T1* second, Out* out, int totalwidth, int width2, int height1, int common) { for(int j = 0; j < height1; ++j) { for(int i = 0; i < width2; ++i) { Out value = out[j * totalwidth + i]; for(int k = 0; k < common; ++k) { value += first[k + totalwidth * j] * second[k * totalwidth + i]; } out[j * totalwidth + i] = value; } } } template void tiled_multiply_carray_outer(T0* first, T1* second, Out* out, int width2, int height1, int common) { std::fill_n(out, width2 * height1, Out()); int j = 0; for(; j < height1 - tile_block_size; j += tile_block_size) { int i = 0; for(; i < width2 - tile_block_size; i += tile_block_size) { int k = 0; for(; k < common - tile_block_size; k += tile_block_size) { tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, tile_block_size, tile_block_size, tile_block_size); } tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, tile_block_size, tile_block_size, common - k); } int k = 0; for(; k < common - tile_block_size; k += tile_block_size) { tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, width2 - i, tile_block_size, tile_block_size); } tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, width2 - i, tile_block_size, common - k); } int i = 0; for(; i < width2 - tile_block_size; i += tile_block_size) { int k = 0; for(; k < common - tile_block_size; k += tile_block_size) { tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, tile_block_size, height1 - j, tile_block_size); } tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, tile_block_size, height1 - j, common - k); } int k = 0; for(; k < common - tile_block_size; k += tile_block_size) { tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, width2 - i, height1 - j, tile_block_size); } tiled_multiply_carray_inner( &first[k + width2 * j], &second[k * width2 + i], &out[j * width2 + i], width2, width2 - i, height1 - j, common - k); } enum { max_value = 1000}; template R solve_differential_equation(F f, T lower, T upper, N steps, R start) { typedef typename F::template result::type f_result; T h = (upper - lower) / (1.0*steps); for(N i = N(); i < steps; ++i) { R y = start; T x = lower + h * (1.0*i); f_result k1 = f(x, y); f_result k2 = f(x + h / 2.0, y + h * k1 / 2.0); f_result k3 = f(x + h / 2.0, y + h * k2 / 2.0); f_result k4 = f(x + h, y + h * k3); start = y + h * (k1 + 2.0 * k2 + 2.0 * k3 + k4) / 6.0; } return(start); } using namespace boost::units; //y' = 1 - x + 4 * y struct f { template struct result; double operator()(const double& x, const double& y) const { return(1.0 - x + 4.0 * y); } boost::units::quantity operator()(const quantity& x, const quantity& y) const { using namespace boost::units; using namespace si; return(1.0 * meters / second - x * meters / pow<2>(seconds) + 4.0 * y / seconds ); } }; template<> struct f::result { typedef double type; }; template<> struct f::result, quantity > { typedef quantity type; }; //y' = 1 - x + 4 * y //y' - 4 * y = 1 - x //e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx //d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x)) //y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C //y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x) //y = 1/4 * x - 3/16 + C * e ^ (4 * x) //y(0) = 1 //1 = - 3/16 + C //C = 19/16 //y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x) int main() { boost::numeric::ublas::matrix ublas_result; { boost::numeric::ublas::matrix m1(max_value, max_value); boost::numeric::ublas::matrix m2(max_value, max_value); std::srand(1492); for(int i = 0; i < max_value; ++i) { for(int j = 0; j < max_value; ++j) { m1(i,j) = std::rand(); m2(i,j) = std::rand(); } } std::cout << "multiplying ublas::matrix(" << max_value << ", " << max_value << ") : "; boost::timer timer; ublas_result = (prod(m1, m2)); std::cout << timer.elapsed() << " seconds" << std::endl; } typedef boost::numeric::ublas::matrix< boost::units::quantity > matrix_type; matrix_type ublas_resultq; { matrix_type m1(max_value, max_value); matrix_type m2(max_value, max_value); std::srand(1492); for(int i = 0; i < max_value; ++i) { for(int j = 0; j < max_value; ++j) { m1(i,j) = std::rand(); m2(i,j) = std::rand(); } } std::cout << "multiplying ublas::matrix(" << max_value << ", " << max_value << ") : "; boost::timer timer; ublas_resultq = (prod(m1, m2)); std::cout << timer.elapsed() << " seconds" << std::endl; } std::vector cresult(max_value * max_value); { std::vector m1(max_value * max_value); std::vector m2(max_value * max_value); std::srand(1492); for(int i = 0; i < max_value * max_value; ++i) { m1[i] = std::rand(); m2[i] = std::rand(); } std::cout << "tiled_matrix_multiply(" << max_value << ", " << max_value << ") : "; boost::timer timer; tiled_multiply_carray_outer( &m1[0], &m2[0], &cresult[0], max_value, max_value, max_value); std::cout << timer.elapsed() << " seconds" << std::endl; } std::vector< boost::units::quantity > cresultq(max_value * max_value); { std::vector< boost::units::quantity > m1(max_value * max_value); std::vector< boost::units::quantity > m2(max_value * max_value); std::srand(1492); for(int i = 0; i < max_value * max_value; ++i) { m1[i] = std::rand() * boost::units::si::newtons; m2[i] = std::rand() * boost::units::si::meters; } std::cout << "tiled_matrix_multiply(" << max_value << ", " << max_value << ") : "; boost::timer timer; tiled_multiply_carray_outer( &m1[0], &m2[0], &cresultq[0], max_value, max_value, max_value); std::cout << timer.elapsed() << " seconds" << std::endl; } for(int i = 0; i < max_value; ++i) { for(int j = 0; j < max_value; ++j) { double diff = std::abs(ublas_result(i,j) - cresult[i * max_value + j]); if(diff > ublas_result(i,j) /1e14) { std::cout << std::setprecision(15) << "Uh Oh. ublas_result(" << i << "," << j << ") = " << ublas_result(i,j) << std::endl << "cresult[" << i << " * " << max_value << " + " << j << "] = " << cresult[i * max_value + j] << std::endl; return(EXIT_FAILURE); } } } { std::vector values(1000); std::cout << "solving y' = 1 - x + 4 * y with double: "; boost::timer timer; for(int i = 0; i < 1000; ++i) { double x = .1 * i; values[i] = solve_differential_equation(f(), 0.0, x, i * 100, 1.0); } std::cout << timer.elapsed() << " seconds" << std::endl; for(int i = 0; i < 1000; ++i) { double x = .1 * i; double value = 1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x); if(std::abs(values[i] - value) > value / 1e9) { std::cout << std::setprecision(15) << "i = : " << i << ", value = " << value << " approx = " << values[i] << std::endl; return(EXIT_FAILURE); } } } { using namespace boost::units; using namespace si; std::vector > values(1000); std::cout << "solving y' = 1 - x + 4 * y with quantity: "; boost::timer timer; for(int i = 0; i < 1000; ++i) { quantity x = .1 * i * seconds; values[i] = solve_differential_equation( f(), 0.0 * seconds, x, i * 100, 1.0 * meters); } std::cout << timer.elapsed() << " seconds" << std::endl; for(int i = 0; i < 1000; ++i) { double x = .1 * i; quantity value = (1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x)) * meters; if(abs(values[i] - value) > value / 1e9) { std::cout << std::setprecision(15) << "i = : " << i << ", value = " << value << " approx = " << values[i] << std::endl; return(EXIT_FAILURE); } } } }