/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include #include "common/parallel_reduce_common.h" #include "common/cpu_usertime.h" #include "common/exception_handling.h" #include "common/concepts_common.h" //! \file test_parallel_reduce.cpp //! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification #if _MSC_VER #pragma warning (push) // Suppress conditional expression is constant #pragma warning (disable: 4127) #endif //#if _MSC_VER using ValueType = uint64_t; struct Sum { template T operator() ( const T& v1, const T& v2 ) const { return v1 + v2; } }; struct Accumulator { ValueType operator() ( const tbb::blocked_range& r, ValueType value ) const { for ( ValueType* pv = r.begin(); pv != r.end(); ++pv ) value += *pv; return value; } }; class ParallelSumTester { public: ParallelSumTester( const ParallelSumTester& ) = default; void operator=( const ParallelSumTester& ) = delete; ParallelSumTester() : m_range(nullptr, nullptr) { m_array = new ValueType[unsigned(count)]; for ( ValueType i = 0; i < count; ++i ) m_array[i] = i + 1; m_range = tbb::blocked_range( m_array, m_array + count ); } ~ParallelSumTester() { delete[] m_array; } template void CheckParallelReduce() { Partitioner partitioner; ValueType result1 = reduce_invoker( m_range, Accumulator(), Sum(), partitioner ); REQUIRE_MESSAGE( result1 == expected, "Wrong parallel summation result" ); ValueType result2 = reduce_invoker( m_range, [](const tbb::blocked_range& r, ValueType value) -> ValueType { for ( const ValueType* pv = r.begin(); pv != r.end(); ++pv ) value += *pv; return value; }, Sum(), partitioner ); REQUIRE_MESSAGE( result2 == expected, "Wrong parallel summation result" ); } private: ValueType* m_array; tbb::blocked_range m_range; static const ValueType count, expected; }; const ValueType ParallelSumTester::count = 1000000; const ValueType ParallelSumTester::expected = count * (count + 1) / 2; namespace test_cancellation { struct ReduceToCancel { std::size_t operator()( const tbb::blocked_range&, std::size_t ) const { ++g_CurExecuted; Cancellator::WaitUntilReady(); return 1; } }; // struct ReduceToCancel struct JoinToCancel { std::size_t operator()( std::size_t, std::size_t ) const { ++g_CurExecuted; Cancellator::WaitUntilReady(); return 1; } }; // struct Join struct ReduceFunctorToCancel { std::size_t result; ReduceFunctorToCancel() : result(0) {} ReduceFunctorToCancel( ReduceFunctorToCancel&, tbb::split ) : result(0) {} void operator()( const tbb::blocked_range& br ) { result = ReduceToCancel{}(br, result); } void join( ReduceFunctorToCancel& rhs ) { result = JoinToCancel{}(result, rhs.result); } }; // struct ReduceFunctorToCancel static constexpr std::size_t buffer_test_size = 1024; static constexpr std::size_t maxParallelReduceRunnerMode = 9; template class ParallelReduceRunner { tbb::task_group_context& my_ctx; static_assert(Mode >= 0 && Mode <= maxParallelReduceRunnerMode, "Incorrect mode for ParallelReduceTask"); template void run_parallel_reduce( Args&&... args ) const { switch(Mode % 5) { case 0 : { tbb::parallel_reduce(std::forward(args)..., my_ctx); break; } case 1 : { tbb::parallel_reduce(std::forward(args)..., tbb::simple_partitioner{}, my_ctx); break; } case 2 : { tbb::parallel_reduce(std::forward(args)..., tbb::auto_partitioner{}, my_ctx); break; } case 3 : { tbb::parallel_reduce(std::forward(args)..., tbb::static_partitioner{}, my_ctx); break; } case 4 : { tbb::affinity_partitioner aff; tbb::parallel_reduce(std::forward(args)..., aff, my_ctx); break; } } } public: ParallelReduceRunner( tbb::task_group_context& ctx ) : my_ctx(ctx) {} void operator()() const { tbb::blocked_range br(0, buffer_test_size); if (Mode < 5) { ReduceFunctorToCancel functor; run_parallel_reduce(br, functor); } else { run_parallel_reduce(br, std::size_t(0), ReduceToCancel{}, JoinToCancel{}); } } }; // class ParallelReduceRunner static constexpr std::size_t maxParallelDeterministicReduceRunnerMode = 5; // TODO: unify with ParallelReduceRunner template class ParallelDeterministicReduceRunner { tbb::task_group_context& my_ctx; static_assert(Mode >= 0 && Mode <= maxParallelDeterministicReduceRunnerMode, "Incorrect Mode for deterministic_reduce task"); template void run_parallel_deterministic_reduce( Args&&... args ) const { switch(Mode % 3) { case 0 : { tbb::parallel_deterministic_reduce(std::forward(args)..., my_ctx); break; } case 1 : { tbb::parallel_deterministic_reduce(std::forward(args)..., tbb::simple_partitioner{}, my_ctx); break; } case 2 : { tbb::parallel_deterministic_reduce(std::forward(args)..., tbb::static_partitioner{}, my_ctx); break; } } } public: ParallelDeterministicReduceRunner( tbb::task_group_context& ctx ) : my_ctx(ctx) {} void operator()() const { tbb::blocked_range br(0, buffer_test_size); if (Mode < 3) { ReduceFunctorToCancel functor; run_parallel_deterministic_reduce(br, functor); } else { run_parallel_deterministic_reduce(br, std::size_t(0), ReduceToCancel{}, JoinToCancel{}); } } }; // class ParallelDeterministicReduceRunner template void run_parallel_reduce_cancellation_test() { for ( auto concurrency_level : utils::concurrency_range() ) { if (concurrency_level < 2) continue; tbb::global_control gc(tbb::global_control::max_allowed_parallelism, concurrency_level); ResetEhGlobals(); RunCancellationTest, Cancellator>(); } } template void run_parallel_deterministic_reduce_cancellation_test() { for ( auto concurrency_level : utils::concurrency_range() ) { if (concurrency_level < 2) continue; tbb::global_control gc(tbb::global_control::max_allowed_parallelism, concurrency_level); ResetEhGlobals(); RunCancellationTest, Cancellator>(); } } template struct ParallelReduceTestRunner { static void run() { run_parallel_reduce_cancellation_test(); ParallelReduceTestRunner::run(); } }; // struct ParallelReduceTestRunner template <> struct ParallelReduceTestRunner { static void run() { run_parallel_reduce_cancellation_test(); } }; // struct ParallelReduceTestRunner template struct ParallelDeterministicReduceTestRunner { static void run() { run_parallel_deterministic_reduce_cancellation_test(); ParallelDeterministicReduceTestRunner::run(); } }; // struct ParallelDeterministicReduceTestRunner template <> struct ParallelDeterministicReduceTestRunner { static void run() { run_parallel_deterministic_reduce_cancellation_test(); } }; // struct ParallelDeterministicReduceTestRunner } // namespace test_cancellation #if __TBB_CPP20_CONCEPTS_PRESENT template concept can_call_parallel_reduce_basic = requires( Args&&... args ) { tbb::parallel_reduce(std::forward(args)...); }; template concept can_call_parallel_deterministic_reduce_basic = requires ( Args&&... args ) { tbb::parallel_deterministic_reduce(std::forward(args)...); }; template concept can_call_preduce_helper = can_call_parallel_reduce_basic && can_call_parallel_reduce_basic; template concept can_call_pdet_reduce_helper = can_call_parallel_deterministic_reduce_basic && can_call_parallel_deterministic_reduce_basic; template concept can_call_preduce_with_partitioner = can_call_preduce_helper && can_call_preduce_helper && can_call_preduce_helper && can_call_preduce_helper && can_call_preduce_helper; template concept can_call_pdet_reduce_with_partitioner = can_call_pdet_reduce_helper && can_call_pdet_reduce_helper && can_call_pdet_reduce_helper; template concept can_call_imperative_preduce = can_call_preduce_with_partitioner; template concept can_call_imperative_pdet_reduce = can_call_pdet_reduce_with_partitioner; template concept can_call_functional_preduce = can_call_preduce_with_partitioner; template concept can_call_functional_pdet_reduce = can_call_pdet_reduce_with_partitioner; template using CorrectBody = test_concepts::parallel_reduce_body::Correct; template using CorrectFunc = test_concepts::parallel_reduce_function::Correct; using CorrectReduction = test_concepts::parallel_reduce_combine::Correct; using CorrectRange = test_concepts::range::Correct; void test_preduce_range_constraints() { using namespace test_concepts::range; static_assert(can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); } void test_preduce_body_constraints() { using namespace test_concepts::parallel_reduce_body; static_assert(can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); static_assert(!can_call_imperative_preduce>); } void test_preduce_func_constraints() { using namespace test_concepts::parallel_reduce_function; static_assert(can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); static_assert(!can_call_functional_preduce, CorrectReduction>); } void test_preduce_combine_constraints() { using namespace test_concepts::parallel_reduce_combine; static_assert(can_call_functional_preduce, Correct>); static_assert(!can_call_functional_preduce, NoOperatorRoundBrackets>); static_assert(!can_call_functional_preduce, OperatorRoundBracketsNonConst>); static_assert(!can_call_functional_preduce, WrongFirstInputOperatorRoundBrackets>); static_assert(!can_call_functional_preduce, WrongSecondInputOperatorRoundBrackets>); static_assert(!can_call_functional_preduce, WrongReturnOperatorRoundBrackets>); } void test_pdet_reduce_range_constraints() { using namespace test_concepts::range; static_assert(can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); } void test_pdet_reduce_body_constraints() { using namespace test_concepts::parallel_reduce_body; static_assert(can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); static_assert(!can_call_imperative_pdet_reduce>); } void test_pdet_reduce_func_constraints() { using namespace test_concepts::parallel_reduce_function; static_assert(can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); static_assert(!can_call_functional_pdet_reduce, CorrectReduction>); } void test_pdet_reduce_combine_constraints() { using namespace test_concepts::parallel_reduce_combine; static_assert(can_call_functional_pdet_reduce, Correct>); static_assert(!can_call_functional_pdet_reduce, NoOperatorRoundBrackets>); static_assert(!can_call_functional_pdet_reduce, OperatorRoundBracketsNonConst>); static_assert(!can_call_functional_pdet_reduce, WrongFirstInputOperatorRoundBrackets>); static_assert(!can_call_functional_pdet_reduce, WrongSecondInputOperatorRoundBrackets>); static_assert(!can_call_functional_pdet_reduce, WrongReturnOperatorRoundBrackets>); } #endif // __TBB_CPP20_CONCEPTS_PRESENT //! Test parallel summation correctness //! \brief \ref stress TEST_CASE("Test parallel summation correctness") { ParallelSumTester pst; pst.CheckParallelReduce(); pst.CheckParallelReduce(); pst.CheckParallelReduce(); pst.CheckParallelReduce(); pst.CheckParallelReduce(); } static std::atomic ForkCount; static std::atomic FooBodyCount; //! Class with public interface that is exactly minimal requirements for Range concept class MinimalRange { size_t begin, end; friend class FooBody; explicit MinimalRange( size_t i ) : begin(0), end(i) {} template friend void TestSplitting( std::size_t nthread ); public: MinimalRange( MinimalRange& r, tbb::split ) : end(r.end) { begin = r.end = (r.begin+r.end)/2; } bool is_divisible() const {return end-begin>=2;} bool empty() const {return begin==end;} }; //! Class with public interface that is exactly minimal requirements for Body of a parallel_reduce class FooBody { private: FooBody( const FooBody& ); // Deny access void operator=( const FooBody& ); // Deny access template friend void TestSplitting( std::size_t nthread ); //! Parent that created this body via split operation. NULL if original body. FooBody* parent; //! Total number of index values processed by body and its children. size_t sum; //! Number of join operations done so far on this body and its children. long join_count; //! Range that has been processed so far by this body and its children. size_t begin, end; //! True if body has not yet been processed at least once by operator(). bool is_new; //! 1 if body was created by split; 0 if original body. int forked; FooBody() {++FooBodyCount;} public: ~FooBody() { forked = 0xDEADBEEF; sum=0xDEADBEEF; join_count=0xDEADBEEF; --FooBodyCount; } FooBody( FooBody& other, tbb::split ) { ++FooBodyCount; ++ForkCount; sum = 0; parent = &other; join_count = 0; is_new = true; forked = 1; } void init() { sum = 0; parent = nullptr; join_count = 0; is_new = true; forked = 0; begin = ~size_t(0); end = ~size_t(0); } void join( FooBody& s ) { REQUIRE( s.forked==1 ); REQUIRE( this!=&s ); REQUIRE( this==s.parent ); REQUIRE( end==s.begin ); end = s.end; sum += s.sum; join_count += s.join_count + 1; s.forked = 2; } void operator()( const MinimalRange& r ) { for( size_t k=r.begin; k void TestSplitting( std::size_t nthread ) { ForkCount = 0; long join_count = 0; Partitioner partitioner; for( size_t i=0; i<=1000; ++i ) { FooBody f; f.init(); REQUIRE_MESSAGE( FooBodyCount==1, "Wrong initial BodyCount value" ); reduce_invoker(MinimalRange(i), f, partitioner); if (nthread == 1) REQUIRE_MESSAGE(ForkCount==0, "Body was split during 1 thread execution"); join_count += f.join_count; REQUIRE_MESSAGE( FooBodyCount==1, "Some copies of FooBody was not removed after reduction"); REQUIRE_MESSAGE( f.sum==i, "Incorrect reduction" ); REQUIRE_MESSAGE( f.begin==(i==0 ? ~size_t(0) : 0), "Incorrect range borders" ); REQUIRE_MESSAGE( f.end==(i==0 ? ~size_t(0) : i), "Incorrect range borders" ); } } //! Test splitting range and body during reduction, test that all workers sleep when no work //! \brief \ref resource_usage \ref error_guessing TEST_CASE("Test splitting range and body during reduction, test that all workers sleep when no work") { for ( auto concurrency_level : utils::concurrency_range() ) { tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level); TestSplitting(concurrency_level); TestSplitting(concurrency_level); TestSplitting(concurrency_level); TestSplitting(concurrency_level); TestSplitting(concurrency_level); // Test that all workers sleep when no work TestCPUUserTime(concurrency_level); } } //! Define overloads of parallel_deterministic_reduce that accept "undesired" types of partitioners namespace unsupported { template void parallel_deterministic_reduce(const Range&, Body&, const tbb::auto_partitioner&) { } template void parallel_deterministic_reduce(const Range&, Body&, tbb::affinity_partitioner&) { } template Value parallel_deterministic_reduce(const Range& , const Value& identity, const RealBody& , const Reduction& , const tbb::auto_partitioner&) { return identity; } template Value parallel_deterministic_reduce(const Range& , const Value& identity, const RealBody& , const Reduction& , tbb::affinity_partitioner&) { return identity; } } struct Body { float value; Body() : value(0) {} Body(Body&, tbb::split) { value = 0; } void operator()(const tbb::blocked_range&) {} void join(Body&) {} }; //! Check that other types of partitioners are not supported (auto, affinity) //! In the case of "unsupported" API unexpectedly sneaking into namespace tbb, //! this test should result in a compilation error due to overload resolution ambiguity //! \brief \ref negative \ref error_guessing TEST_CASE("Test Unsupported Partitioners") { using namespace tbb; using namespace unsupported; Body body; parallel_deterministic_reduce(blocked_range(0, 10), body, tbb::auto_partitioner()); tbb::affinity_partitioner ap; parallel_deterministic_reduce(blocked_range(0, 10), body, ap); parallel_deterministic_reduce( blocked_range(0, 10), 0, [](const blocked_range&, int init)->int { return init; }, [](int x, int y)->int { return x + y; }, tbb::auto_partitioner() ); parallel_deterministic_reduce( blocked_range(0, 10), 0, [](const blocked_range&, int init)->int { return init; }, [](int x, int y)->int { return x + y; }, ap ); } //! Testing tbb::parallel_reduce with tbb::task_group_context //! \brief \ref interface \ref error_guessing TEST_CASE("cancellation test for tbb::parallel_reduce") { test_cancellation::ParallelReduceTestRunner::run(); } //! Testing tbb::parallel_deterministic_reduce with tbb::task_group_context //! \brief \ref interface \ref error_guessing TEST_CASE("cancellation test for tbb::parallel_deterministic_reduce") { test_cancellation::ParallelDeterministicReduceTestRunner::run(); } #if __TBB_CPP20_CONCEPTS_PRESENT //! \brief \ref error_guessing TEST_CASE("parallel_reduce constraints") { test_preduce_range_constraints(); test_preduce_body_constraints(); test_preduce_func_constraints(); test_preduce_combine_constraints(); } //! \brief \ref error_guessing TEST_CASE("parallel_deterministic_reduce constraints") { test_pdet_reduce_range_constraints(); test_pdet_reduce_body_constraints(); test_pdet_reduce_func_constraints(); test_pdet_reduce_combine_constraints(); } #endif #if _MSC_VER #pragma warning (pop) #endif