/* Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifndef __TBB__flow_graph_node_impl_H #define __TBB__flow_graph_node_impl_H #ifndef __TBB_flow_graph_H #error Do not #include this internal file directly; use public TBB headers instead. #endif #include "_flow_graph_item_buffer_impl.h" template< typename T, typename A > class function_input_queue : public item_buffer { public: bool empty() const { return this->buffer_empty(); } const T& front() const { return this->item_buffer::front(); } void pop() { this->destroy_front(); } bool push( T& t ) { return this->push_back( t ); } }; //! Input and scheduling for a function node that takes a type Input as input // The only up-ref is apply_body_impl, which should implement the function // call and any handling of the result. template< typename Input, typename Policy, typename A, typename ImplType > class function_input_base : public receiver, no_assign { enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency }; typedef function_input_base class_type; public: //! The input type of this receiver typedef Input input_type; typedef typename receiver::predecessor_type predecessor_type; typedef predecessor_cache predecessor_cache_type; typedef function_input_queue input_queue_type; typedef typename allocator_traits::template rebind_alloc allocator_type; static_assert(!has_policy::value || !has_policy::value, ""); //! Constructor for function_input_base function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) : my_graph_ref(g), my_max_concurrency(max_concurrency) , my_concurrency(0), my_priority(a_priority) , my_queue(!has_policy::value ? new input_queue_type() : NULL) , my_predecessors(this) , forwarder_busy(false) { my_aggregator.initialize_handler(handler_type(this)); } //! Copy constructor function_input_base( const function_input_base& src ) : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} //! Destructor // The queue is allocated by the constructor for {multi}function_node. // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. // This would be an interface-breaking change. virtual ~function_input_base() { if ( my_queue ) delete my_queue; } graph_task* try_put_task( const input_type& t) override { return try_put_task_impl(t, has_policy()); } //! Adds src to the list of cached predecessors. bool register_predecessor( predecessor_type &src ) override { operation_type op_data(reg_pred); op_data.r = &src; my_aggregator.execute(&op_data); return true; } //! Removes src from the list of cached predecessors. bool remove_predecessor( predecessor_type &src ) override { operation_type op_data(rem_pred); op_data.r = &src; my_aggregator.execute(&op_data); return true; } protected: void reset_function_input_base( reset_flags f) { my_concurrency = 0; if(my_queue) { my_queue->reset(); } reset_receiver(f); forwarder_busy = false; } graph& my_graph_ref; const size_t my_max_concurrency; size_t my_concurrency; node_priority_t my_priority; input_queue_type *my_queue; predecessor_cache my_predecessors; void reset_receiver( reset_flags f) { if( f & rf_clear_edges) my_predecessors.clear(); else my_predecessors.reset(); __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); } graph& graph_reference() const override { return my_graph_ref; } graph_task* try_get_postponed_task(const input_type& i) { operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item my_aggregator.execute(&op_data); return op_data.bypass_t; } private: friend class apply_body_task_bypass< class_type, input_type >; friend class forward_task_bypass< class_type >; class operation_type : public aggregated_operation< operation_type > { public: char type; union { input_type *elem; predecessor_type *r; }; graph_task* bypass_t; operation_type(const input_type& e, op_type t) : type(char(t)), elem(const_cast(&e)) {} operation_type(op_type t) : type(char(t)), r(NULL) {} }; bool forwarder_busy; typedef aggregating_functor handler_type; friend class aggregating_functor; aggregator< handler_type, operation_type > my_aggregator; graph_task* perform_queued_requests() { graph_task* new_task = NULL; if(my_queue) { if(!my_queue->empty()) { ++my_concurrency; new_task = create_body_task(my_queue->front()); my_queue->pop(); } } else { input_type i; if(my_predecessors.get_item(i)) { ++my_concurrency; new_task = create_body_task(i); } } return new_task; } void handle_operations(operation_type *op_list) { operation_type* tmp; while (op_list) { tmp = op_list; op_list = op_list->next; switch (tmp->type) { case reg_pred: my_predecessors.add(*(tmp->r)); tmp->status.store(SUCCEEDED, std::memory_order_release); if (!forwarder_busy) { forwarder_busy = true; spawn_forward_task(); } break; case rem_pred: my_predecessors.remove(*(tmp->r)); tmp->status.store(SUCCEEDED, std::memory_order_release); break; case app_body_bypass: { tmp->bypass_t = NULL; __TBB_ASSERT(my_max_concurrency != 0, NULL); --my_concurrency; if(my_concurrencybypass_t = perform_queued_requests(); tmp->status.store(SUCCEEDED, std::memory_order_release); } break; case tryput_bypass: internal_try_put_task(tmp); break; case try_fwd: internal_forward(tmp); break; case occupy_concurrency: if (my_concurrency < my_max_concurrency) { ++my_concurrency; tmp->status.store(SUCCEEDED, std::memory_order_release); } else { tmp->status.store(FAILED, std::memory_order_release); } break; } } } //! Put to the node, but return the task instead of enqueueing it void internal_try_put_task(operation_type *op) { __TBB_ASSERT(my_max_concurrency != 0, NULL); if (my_concurrency < my_max_concurrency) { ++my_concurrency; graph_task * new_task = create_body_task(*(op->elem)); op->bypass_t = new_task; op->status.store(SUCCEEDED, std::memory_order_release); } else if ( my_queue && my_queue->push(*(op->elem)) ) { op->bypass_t = SUCCESSFULLY_ENQUEUED; op->status.store(SUCCEEDED, std::memory_order_release); } else { op->bypass_t = NULL; op->status.store(FAILED, std::memory_order_release); } } //! Creates tasks for postponed messages if available and if concurrency allows void internal_forward(operation_type *op) { op->bypass_t = NULL; if (my_concurrency < my_max_concurrency) op->bypass_t = perform_queued_requests(); if(op->bypass_t) op->status.store(SUCCEEDED, std::memory_order_release); else { forwarder_busy = false; op->status.store(FAILED, std::memory_order_release); } } graph_task* internal_try_put_bypass( const input_type& t ) { operation_type op_data(t, tryput_bypass); my_aggregator.execute(&op_data); if( op_data.status == SUCCEEDED ) { return op_data.bypass_t; } return NULL; } graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { if( my_max_concurrency == 0 ) { return apply_body_bypass(t); } else { operation_type check_op(t, occupy_concurrency); my_aggregator.execute(&check_op); if( check_op.status == SUCCEEDED ) { return apply_body_bypass(t); } return internal_try_put_bypass(t); } } graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { if( my_max_concurrency == 0 ) { return create_body_task(t); } else { return internal_try_put_bypass(t); } } //! Applies the body to the provided input // then decides if more work is available graph_task* apply_body_bypass( const input_type &i ) { return static_cast(this)->apply_body_impl_bypass(i); } //! allocates a task to apply a body graph_task* create_body_task( const input_type &input ) { if (!is_graph_active(my_graph_ref)) { return nullptr; } // TODO revamp: extract helper for common graph task allocation part small_object_allocator allocator{}; typedef apply_body_task_bypass task_type; graph_task* t = allocator.new_object( my_graph_ref, allocator, *this, input, my_priority ); graph_reference().reserve_wait(); return t; } //! This is executed by an enqueued task, the "forwarder" graph_task* forward_task() { operation_type op_data(try_fwd); graph_task* rval = NULL; do { op_data.status = WAIT; my_aggregator.execute(&op_data); if(op_data.status == SUCCEEDED) { graph_task* ttask = op_data.bypass_t; __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); rval = combine_tasks(my_graph_ref, rval, ttask); } } while (op_data.status == SUCCEEDED); return rval; } inline graph_task* create_forward_task() { if (!is_graph_active(my_graph_ref)) { return nullptr; } small_object_allocator allocator{}; typedef forward_task_bypass task_type; graph_task* t = allocator.new_object( graph_reference(), allocator, *this, my_priority ); graph_reference().reserve_wait(); return t; } //! Spawns a task that calls forward() inline void spawn_forward_task() { graph_task* tp = create_forward_task(); if(tp) { spawn_in_graph_arena(graph_reference(), *tp); } } node_priority_t priority() const override { return my_priority; } }; // function_input_base //! Implements methods for a function node that takes a type Input as input and sends // a type Output to its successors. template< typename Input, typename Output, typename Policy, typename A> class function_input : public function_input_base > { public: typedef Input input_type; typedef Output output_type; typedef function_body function_body_type; typedef function_input my_class; typedef function_input_base base_type; typedef function_input_queue input_queue_type; // constructor template function_input( graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) : base_type(g, max_concurrency, a_priority) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } //! Copy constructor function_input( const function_input& src ) : base_type(src), my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ) { } #if __INTEL_COMPILER <= 2021 // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited // class while the parent class has the virtual keyword for the destrocutor. virtual #endif ~function_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { function_body_type &body_ref = *this->my_body; return dynamic_cast< function_body_leaf & >(body_ref).get_body(); } output_type apply_body_impl( const input_type& i) { // There is an extra copied needed to capture the // body execution without the try_put fgt_begin_body( my_body ); output_type v = (*my_body)(i); fgt_end_body( my_body ); return v; } //TODO: consider moving into the base class graph_task* apply_body_impl_bypass( const input_type &i) { output_type v = apply_body_impl(i); graph_task* postponed_task = NULL; if( base_type::my_max_concurrency != 0 ) { postponed_task = base_type::try_get_postponed_task(i); __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); } if( postponed_task ) { // make the task available for other workers since we do not know successors' // execution policy spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); } graph_task* successor_task = successors().try_put_task(v); #if _MSC_VER && !__INTEL_COMPILER #pragma warning (push) #pragma warning (disable: 4127) /* suppress conditional expression is constant */ #endif if(has_policy::value) { #if _MSC_VER && !__INTEL_COMPILER #pragma warning (pop) #endif if(!successor_task) { // Return confirmative status since current // node's body has been executed anyway successor_task = SUCCESSFULLY_ENQUEUED; } } return successor_task; } protected: void reset_function_input(reset_flags f) { base_type::reset_function_input_base(f); if(f & rf_reset_bodies) { function_body_type *tmp = my_init_body->clone(); delete my_body; my_body = tmp; } } function_body_type *my_body; function_body_type *my_init_body; virtual broadcast_cache &successors() = 0; }; // function_input // helper templates to clear the successor edges of the output ports of an multifunction_node template struct clear_element { template static void clear_this(P &p) { (void)std::get(p).successors().clear(); clear_element::clear_this(p); } #if TBB_USE_ASSERT template static bool this_empty(P &p) { if(std::get(p).successors().empty()) return clear_element::this_empty(p); return false; } #endif }; template<> struct clear_element<1> { template static void clear_this(P &p) { (void)std::get<0>(p).successors().clear(); } #if TBB_USE_ASSERT template static bool this_empty(P &p) { return std::get<0>(p).successors().empty(); } #endif }; template struct init_output_ports { template static OutputTuple call(graph& g, const std::tuple&) { return OutputTuple(Args(g)...); } }; // struct init_output_ports //! Implements methods for a function node that takes a type Input as input // and has a tuple of output ports specified. template< typename Input, typename OutputPortSet, typename Policy, typename A> class multifunction_input : public function_input_base > { public: static const int N = std::tuple_size::value; typedef Input input_type; typedef OutputPortSet output_ports_type; typedef multifunction_body multifunction_body_type; typedef multifunction_input my_class; typedef function_input_base base_type; typedef function_input_queue input_queue_type; // constructor template multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) : base_type(g, max_concurrency, a_priority) , my_body( new multifunction_body_leaf(body) ) , my_init_body( new multifunction_body_leaf(body) ) , my_output_ports(init_output_ports::call(g, my_output_ports)){ } //! Copy constructor multifunction_input( const multifunction_input& src ) : base_type(src), my_body( src.my_init_body->clone() ), my_init_body(src.my_init_body->clone() ), my_output_ports( init_output_ports::call(src.my_graph_ref, my_output_ports) ) { } ~multifunction_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { multifunction_body_type &body_ref = *this->my_body; return *static_cast(dynamic_cast< multifunction_body_leaf & >(body_ref).get_body_ptr()); } // for multifunction nodes we do not have a single successor as such. So we just tell // the task we were successful. //TODO: consider moving common parts with implementation in function_input into separate function graph_task* apply_body_impl_bypass( const input_type &i ) { fgt_begin_body( my_body ); (*my_body)(i, my_output_ports); fgt_end_body( my_body ); graph_task* ttask = NULL; if(base_type::my_max_concurrency != 0) { ttask = base_type::try_get_postponed_task(i); } return ttask ? ttask : SUCCESSFULLY_ENQUEUED; } output_ports_type &output_ports(){ return my_output_ports; } protected: void reset(reset_flags f) { base_type::reset_function_input_base(f); if(f & rf_clear_edges)clear_element::clear_this(my_output_ports); if(f & rf_reset_bodies) { multifunction_body_type* tmp = my_init_body->clone(); delete my_body; my_body = tmp; } __TBB_ASSERT(!(f & rf_clear_edges) || clear_element::this_empty(my_output_ports), "multifunction_node reset failed"); } multifunction_body_type *my_body; multifunction_body_type *my_init_body; output_ports_type my_output_ports; }; // multifunction_input // template to refer to an output port of a multifunction_node template typename std::tuple_element::type &output_port(MOP &op) { return std::get(op.output_ports()); } inline void check_task_and_spawn(graph& g, graph_task* t) { if (t && t != SUCCESSFULLY_ENQUEUED) { spawn_in_graph_arena(g, *t); } } // helper structs for split_node template struct emit_element { template static graph_task* emit_this(graph& g, const T &t, P &p) { // TODO: consider to collect all the tasks in task_list and spawn them all at once graph_task* last_task = std::get(p).try_put_task(std::get(t)); check_task_and_spawn(g, last_task); return emit_element::emit_this(g,t,p); } }; template<> struct emit_element<1> { template static graph_task* emit_this(graph& g, const T &t, P &p) { graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); check_task_and_spawn(g, last_task); return SUCCESSFULLY_ENQUEUED; } }; //! Implements methods for an executable node that takes continue_msg as input template< typename Output, typename Policy> class continue_input : public continue_receiver { public: //! The input type of this receiver typedef continue_msg input_type; //! The output type of this receiver typedef Output output_type; typedef function_body function_body_type; typedef continue_input class_type; template< typename Body > continue_input( graph &g, Body& body, node_priority_t a_priority ) : continue_receiver(/*number_of_predecessors=*/0, a_priority) , my_graph_ref(g) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } template< typename Body > continue_input( graph &g, int number_of_predecessors, Body& body, node_priority_t a_priority ) : continue_receiver( number_of_predecessors, a_priority ) , my_graph_ref(g) , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { } continue_input( const continue_input& src ) : continue_receiver(src), my_graph_ref(src.my_graph_ref), my_body( src.my_init_body->clone() ), my_init_body( src.my_init_body->clone() ) {} ~continue_input() { delete my_body; delete my_init_body; } template< typename Body > Body copy_function_object() { function_body_type &body_ref = *my_body; return dynamic_cast< function_body_leaf & >(body_ref).get_body(); } void reset_receiver( reset_flags f) override { continue_receiver::reset_receiver(f); if(f & rf_reset_bodies) { function_body_type *tmp = my_init_body->clone(); delete my_body; my_body = tmp; } } protected: graph& my_graph_ref; function_body_type *my_body; function_body_type *my_init_body; virtual broadcast_cache &successors() = 0; friend class apply_body_task_bypass< class_type, continue_msg >; //! Applies the body to the provided input graph_task* apply_body_bypass( input_type ) { // There is an extra copied needed to capture the // body execution without the try_put fgt_begin_body( my_body ); output_type v = (*my_body)( continue_msg() ); fgt_end_body( my_body ); return successors().try_put_task( v ); } graph_task* execute() override { if(!is_graph_active(my_graph_ref)) { return NULL; } #if _MSC_VER && !__INTEL_COMPILER #pragma warning (push) #pragma warning (disable: 4127) /* suppress conditional expression is constant */ #endif if(has_policy::value) { #if _MSC_VER && !__INTEL_COMPILER #pragma warning (pop) #endif return apply_body_bypass( continue_msg() ); } else { small_object_allocator allocator{}; typedef apply_body_task_bypass task_type; graph_task* t = allocator.new_object( graph_reference(), allocator, *this, continue_msg(), my_priority ); graph_reference().reserve_wait(); return t; } } graph& graph_reference() const override { return my_graph_ref; } }; // continue_input //! Implements methods for both executable and function nodes that puts Output to its successors template< typename Output > class function_output : public sender { public: template friend struct clear_element; typedef Output output_type; typedef typename sender::successor_type successor_type; typedef broadcast_cache broadcast_cache_type; function_output(graph& g) : my_successors(this), my_graph_ref(g) {} function_output(const function_output& other) = delete; //! Adds a new successor to this node bool register_successor( successor_type &r ) override { successors().register_successor( r ); return true; } //! Removes a successor from this node bool remove_successor( successor_type &r ) override { successors().remove_successor( r ); return true; } broadcast_cache_type &successors() { return my_successors; } graph& graph_reference() const { return my_graph_ref; } protected: broadcast_cache_type my_successors; graph& my_graph_ref; }; // function_output template< typename Output > class multifunction_output : public function_output { public: typedef Output output_type; typedef function_output base_type; using base_type::my_successors; multifunction_output(graph& g) : base_type(g) {} multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} bool try_put(const output_type &i) { graph_task *res = try_put_task(i); if( !res ) return false; if( res != SUCCESSFULLY_ENQUEUED ) { // wrapping in task_arena::execute() is not needed since the method is called from // inside task::execute() spawn_in_graph_arena(graph_reference(), *res); } return true; } using base_type::graph_reference; protected: graph_task* try_put_task(const output_type &i) { return my_successors.try_put_task(i); } template friend struct emit_element; }; // multifunction_output //composite_node template void add_nodes_impl(CompositeType*, bool) {} template< typename CompositeType, typename NodeType1, typename... NodeTypes > void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { void *addr = const_cast(&n1); fgt_alias_port(c_node, addr, visible); add_nodes_impl(c_node, visible, n...); } #endif // __TBB__flow_graph_node_impl_H