//---------------------------------------------------------------------------// // Copyright (c) 2016 Jakub Szuppe // // Distributed under the Boost Software License, Version 1.0 // See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt // // See http://boostorg.github.com/compute for more information. //---------------------------------------------------------------------------// #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP #define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP #include #include #include #include #include #include namespace boost { namespace compute { namespace detail { template inline InputIterator find_extrema_on_cpu(InputIterator first, InputIterator last, Compare compare, const bool find_minimum, command_queue &queue) { typedef typename std::iterator_traits::value_type input_type; typedef typename std::iterator_traits::difference_type difference_type; size_t count = iterator_range_size(first, last); const device &device = queue.get_device(); const uint_ compute_units = queue.get_device().compute_units(); boost::shared_ptr parameters = detail::parameter_cache::get_global_cache(device); std::string cache_key = "__boost_find_extrema_cpu_" + boost::lexical_cast(sizeof(input_type)); // for inputs smaller than serial_find_extrema_threshold // serial_find_extrema algorithm is used uint_ serial_find_extrema_threshold = parameters->get( cache_key, "serial_find_extrema_threshold", 16384 * sizeof(input_type) ); serial_find_extrema_threshold = (std::max)(serial_find_extrema_threshold, uint_(2 * compute_units)); const context &context = queue.get_context(); if(count < serial_find_extrema_threshold) { return serial_find_extrema(first, last, compare, find_minimum, queue); } meta_kernel k("find_extrema_on_cpu"); buffer output(context, sizeof(input_type) * compute_units); buffer output_idx( context, sizeof(uint_) * compute_units, buffer::read_write | buffer::alloc_host_ptr ); size_t count_arg = k.add_arg("count"); size_t output_arg = k.add_arg(memory_object::global_memory, "output"); size_t output_idx_arg = k.add_arg(memory_object::global_memory, "output_idx"); k << "uint block = " << "(uint)ceil(((float)count)/get_global_size(0));\n" << "uint index = get_global_id(0) * block;\n" << "uint end = min(count, index + block);\n" << "uint value_index = index;\n" << k.decl("value") << " = " << first[k.var("index")] << ";\n" << "index++;\n" << "while(index < end){\n" << k.decl("candidate") << " = " << first[k.var("index")] << ";\n" << "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" << "bool compare = " << compare(k.var("candidate"), k.var("value")) << ";\n" << "#else\n" << "bool compare = " << compare(k.var("value"), k.var("candidate")) << ";\n" << "#endif\n" << "value = compare ? candidate : value;\n" << "value_index = compare ? index : value_index;\n" << "index++;\n" << "}\n" << "output[get_global_id(0)] = value;\n" << "output_idx[get_global_id(0)] = value_index;\n"; size_t global_work_size = compute_units; std::string options; if(!find_minimum){ options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; } kernel kernel = k.compile(context, options); kernel.set_arg(count_arg, static_cast(count)); kernel.set_arg(output_arg, output); kernel.set_arg(output_idx_arg, output_idx); queue.enqueue_1d_range_kernel(kernel, 0, global_work_size, 0); buffer_iterator result = serial_find_extrema( make_buffer_iterator(output), make_buffer_iterator(output, global_work_size), compare, find_minimum, queue ); uint_* output_idx_host_ptr = static_cast( queue.enqueue_map_buffer( output_idx, command_queue::map_read, 0, global_work_size * sizeof(uint_) ) ); difference_type extremum_idx = static_cast(*(output_idx_host_ptr + result.get_index())); return first + extremum_idx; } } // end detail namespace } // end compute namespace } // end boost namespace #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_ON_CPU_HPP