/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "common/test.h" #include "tbb/parallel_for.h" #include "common/config.h" #include "common/utils.h" #include "common/utils_concurrency_limit.h" #include "common/utils_report.h" #include "common/vector_types.h" #include "common/cpu_usertime.h" #include "common/spin_barrier.h" #include "common/exception_handling.h" #include "common/concepts_common.h" #include "test_partitioner.h" #include #include //! \file test_parallel_for.cpp //! \brief Test for [algorithms.parallel_for] specification #if _MSC_VER #pragma warning (push) // Suppress conditional expression is constant #pragma warning (disable: 4127) #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED // Suppress pointless "unreachable code" warning. #pragma warning (disable: 4702) #endif #if defined(_Wp64) // Workaround for overzealous compiler warnings in /Wp64 mode #pragma warning (disable: 4267) #endif #define _SCL_SECURE_NO_WARNINGS #endif //#if _MSC_VER #if (HAVE_m128 || HAVE_m256) template struct SSE_Functor { ClassWithVectorType* Src, * Dst; SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {} void operator()( tbb::blocked_range& r ) const { for( int i=r.begin(); i!=r.end(); ++i ) Dst[i] = Src[i]; } }; //! Test that parallel_for works with stack-allocated __m128 template void TestVectorTypes() { const int aSize = 300; ClassWithVectorType Array1[aSize], Array2[aSize]; for( int i=0; i(0,aSize), SSE_Functor(Array1, Array2) ); for( int i=0; i & ranges; TestSimplePartitionerStabilityFunctor(std::vector & theRanges):ranges(theRanges){} void operator()(tbb::blocked_range& r)const{ ranges.at(r.begin()) = 1; } }; void TestSimplePartitionerStability(){ const std::size_t repeat_count= 10; const std::size_t rangeToSplitSize=1000000; const std::size_t grainsizeStep=rangeToSplitSize/repeat_count; typedef TestSimplePartitionerStabilityFunctor FunctorType; for (std::size_t i=0 , grainsize=grainsizeStep; i firstSeries(rangeToSplitSize,0); std::vector secondSeries(rangeToSplitSize,0); tbb::parallel_for(tbb::blocked_range(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner()); tbb::parallel_for(tbb::blocked_range(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner()); CHECK_MESSAGE( firstSeries == secondSeries, "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i ); } } namespace various_range_implementations { using namespace test_partitioner_utils; using namespace test_partitioner_utils::TestRanges; // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing class Body { utils::SpinBarrier &m_sb; public: Body(utils::SpinBarrier& sb) : m_sb(sb) { } Body(Body& b, tbb::split) : m_sb(b.m_sb) { } template void operator()(Range& r) const { INFO("Executing range [" << r.begin() << ", " << r.end() << "]"); m_sb.wait(); // waiting for all threads } }; namespace correctness { /* Testing only correctness (that is parallel_for does not hang) */ template void test() { RangeType range( 0, utils::get_platform_max_threads(), NULL, false, ensure_non_emptiness ); tbb::affinity_partitioner ap; tbb::parallel_for( range, SimpleBody(), ap ); } } // namespace correctness namespace uniform_distribution { /* Body of parallel_for algorithm would hang if non-uniform work distribution happened */ template void test() { static const std::size_t thread_num = utils::get_platform_max_threads(); utils::SpinBarrier sb( thread_num ); RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness); const Body sync_body( sb ); tbb::affinity_partitioner ap; tbb::parallel_for( range, sync_body, ap ); tbb::parallel_for( range, sync_body, tbb::static_partitioner() ); } } // namespace uniform_distribution void test() { const bool provide_feedback = false; const bool ensure_non_empty_range = true; // BlockedRange does not take into account feedback and non-emptiness settings but uses the // tbb::blocked_range implementation uniform_distribution::test(); using correctness::test; { test(); test(); } { test(); test(); } // Testing that parallel_for algorithm works with such weird ranges correctness::test(); correctness::test(); correctness::test(); // The following ranges do not comply with the proportion suggested by partitioner. Therefore // they have to provide the proportion in which they were actually split back to partitioner and // ensure theirs non-emptiness test(); test(); test(); } } // namespace various_range_implementations namespace test_cancellation { struct FunctorToCancel { static std::atomic need_to_wait; void operator()( std::size_t ) const { ++g_CurExecuted; if (need_to_wait) { need_to_wait = Cancellator::WaitUntilReady(); } } void operator()( const tbb::blocked_range& ) const { ++g_CurExecuted; Cancellator::WaitUntilReady(); } static void reset() { need_to_wait = true; } }; // struct FunctorToCancel std::atomic FunctorToCancel::need_to_wait(true); static constexpr std::size_t buffer_test_size = 1024; static constexpr std::size_t maxParallelForRunnerMode = 14; template class ParallelForRunner { tbb::task_group_context& my_ctx; const std::size_t worker_task_step = 1; static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner"); template void run_parallel_for( Args&&... args ) const { Partitioner part; tbb::parallel_for(std::forward(args)..., part, my_ctx); } template void run_overload( Args&&... args ) const { switch(Mode % 5) { case 0 : { tbb::parallel_for(std::forward(args)..., my_ctx); break; } case 1 : { run_parallel_for(std::forward(args)...); break; } case 2 : { run_parallel_for(std::forward(args)...); break; } case 3 : { run_parallel_for(std::forward(args)...); break; } case 4 : { run_parallel_for(std::forward(args)...); break; } } } public: ParallelForRunner( tbb::task_group_context& ctx ) : my_ctx(ctx) {} ~ParallelForRunner() { FunctorToCancel::reset(); } void operator()() const { if (Mode < 5) { // Overload with blocked range tbb::blocked_range br(0, buffer_test_size); run_overload(br, FunctorToCancel{}); } else if (Mode < 10) { // Overload with two indexes run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{}); } else { // Overload with two indexes and step run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{}); } } }; // class ParallelForRunner template void run_parallel_for_cancellation_test() { // TODO: enable concurrency_range if (utils::get_platform_max_threads() < 2) { // The test requires at least one worker thread to request cancellation return; } ResetEhGlobals(); RunCancellationTest, Cancellator>(); } template struct ParallelForTestRunner { static void run() { run_parallel_for_cancellation_test(); ParallelForTestRunner::run(); } }; // struct ParallelForTestRunner template <> struct ParallelForTestRunner { static void run() { run_parallel_for_cancellation_test(); } }; // struct ParallelForTestRunner } // namespace test_cancellation #if __TBB_CPP20_CONCEPTS_PRESENT template concept can_call_parallel_for_basic = requires( Args&&... args ) { tbb::parallel_for(std::forward(args)...); }; template concept can_call_parallel_for_helper = can_call_parallel_for_basic && can_call_parallel_for_basic; template concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper && can_call_parallel_for_helper && can_call_parallel_for_helper && can_call_parallel_for_helper && can_call_parallel_for_helper; template concept can_call_range_pfor = can_call_parallel_for_with_partitioner; template concept can_call_index_pfor = can_call_parallel_for_with_partitioner && can_call_parallel_for_with_partitioner; template using CorrectBody = test_concepts::parallel_for_body::Correct; template using CorrectFunc = test_concepts::parallel_for_function::Correct; void test_pfor_range_constraints() { using namespace test_concepts::range; static_assert(can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); } void test_pfor_body_constraints() { using namespace test_concepts::parallel_for_body; using CorrectRange = test_concepts::range::Correct; static_assert(can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); static_assert(!can_call_range_pfor>); } void test_pfor_func_constraints() { using namespace test_concepts::parallel_for_function; using CorrectIndex = test_concepts::parallel_for_index::Correct; static_assert(can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); } void test_pfor_index_constraints() { using namespace test_concepts::parallel_for_index; static_assert(can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); static_assert(!can_call_index_pfor>); } #endif // __TBB_CPP20_CONCEPTS_PRESENT #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO #include "tbb/global_control.h" //! Testing exceptions //! \brief \ref requirement TEST_CASE("Exceptions support") { for ( int p = MinThread; p <= MaxThread; ++p ) { if ( p > 0 ) { tbb::global_control control(tbb::global_control::max_allowed_parallelism, p); TestExceptionsSupport(); } } } #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */ //! Testing cancellation //! \brief \ref error_guessing TEST_CASE("Vector types") { #if HAVE_m128 TestVectorTypes(); #endif #if HAVE_m256 if (have_AVX()) TestVectorTypes(); #endif } //! Testing workers going to sleep //! \brief \ref resource_usage TEST_CASE("That all workers sleep when no work") { const std::size_t N = 100000; std::atomic counter{}; tbb::parallel_for(std::size_t(0), N, [&](std::size_t) { for (int i = 0; i < 1000; ++i) { ++counter; } }, tbb::simple_partitioner()); TestCPUUserTime(utils::get_platform_max_threads()); } //! Testing simple partitioner stability //! \brief \ref error_guessing TEST_CASE("Simple partitioner stability") { TestSimplePartitionerStability(); } //! Testing various range implementations //! \brief \ref requirement TEST_CASE("Various range implementations") { various_range_implementations::test(); } //! Testing parallel_for with explicit task_group_context //! \brief \ref interface \ref error_guessing TEST_CASE("Сancellation test for tbb::parallel_for") { test_cancellation::ParallelForTestRunner::run(); } #if __TBB_CPP20_CONCEPTS_PRESENT //! \brief \ref error_guessing TEST_CASE("parallel_for constraints") { test_pfor_range_constraints(); test_pfor_body_constraints(); test_pfor_func_constraints(); test_pfor_index_constraints(); } #endif // __TBB_CPP20_CONCEPTS_PRESENT #if _MSC_VER #pragma warning (pop) #endif