21 #ifndef __TBB_flow_graph_opencl_node_H 22 #define __TBB_flow_graph_opencl_node_H 25 #if __TBB_PREVIEW_OPENCL_NODE 38 #include <OpenCL/opencl.h> 46 namespace interface10 {
48 template <
typename DeviceFilter>
51 namespace opencl_info {
55 template <
typename Factory>
59 if (err != CL_SUCCESS) {
60 std::cerr << msg <<
"; error code: " << err << std::endl;
68 enforce_cl_retcode(clGetEventInfo(e, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL event information");
75 enforce_cl_retcode(clGetDeviceInfo(d, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL device information");
80 inline std::string device_info<std::string>(cl_device_id
d, cl_device_info i) {
82 enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required),
"Failed to get OpenCL device information");
84 char *buff = (
char*)alloca(required);
85 enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL),
"Failed to get OpenCL device information");
93 enforce_cl_retcode(clGetPlatformInfo(p, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL platform information");
98 inline std::string platform_info<std::string>(cl_platform_id
p, cl_platform_info i) {
100 enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required),
"Failed to get OpenCL platform information");
102 char *buff = (
char*)alloca(required);
103 enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL),
"Failed to get OpenCL platform information");
112 enum : device_id_type {
113 unknown = device_id_type( -2 ),
114 host = device_id_type( -1 )
117 opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
119 opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
121 opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
124 return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
127 return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
130 return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
133 return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
136 return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
139 template <
typename T>
140 void info( cl_device_info i, T &t )
const {
141 t = device_info<T>( my_cl_device_id, i );
145 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
149 std::sscanf( version().c_str(),
"OpenCL %d", &major );
154 std::sscanf( version().c_str(),
"OpenCL %d.%d", &major, &minor );
159 if ( major_version() >= 2 )
160 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
163 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
167 if ( major_version() >= 2 )
168 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
174 return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
177 return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
180 const std::string semi =
";";
182 return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
185 return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
188 return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
191 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
194 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
197 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
200 const std::string space =
" ";
202 return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
205 return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
209 return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
213 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
217 return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
221 return my_cl_device_id;
225 return my_cl_command_queue;
229 my_cl_command_queue = cmd_queue;
233 return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
244 template <
typename DeviceFilter>
246 template <
typename Factory>
248 template <
typename Factory>
252 template <
typename T,
typename Factory>
268 size_type
size()
const {
return my_container.size(); }
269 bool empty()
const {
return my_container.empty(); }
270 iterator
begin() {
return my_container.begin(); }
271 iterator
end() {
return my_container.end(); }
272 const_iterator
begin()
const {
return my_container.begin(); }
273 const_iterator
end()
const {
return my_container.end(); }
274 const_iterator
cbegin()
const {
return my_container.cbegin(); }
275 const_iterator
cend()
const {
return my_container.cend(); }
287 cl_uint num_platforms;
288 enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms),
"clGetPlatformIDs failed");
290 std::vector<cl_platform_id> platforms(num_platforms);
291 enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL),
"clGetPlatformIDs failed");
294 std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
295 cl_uint num_all_devices = 0;
296 while (platforms_it != platforms.end()) {
297 cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
298 if (err == CL_DEVICE_NOT_FOUND) {
299 platforms_it = platforms.erase(platforms_it);
303 num_all_devices += num_devices;
308 std::vector<cl_device_id> devices(num_all_devices);
309 std::vector<cl_device_id>::iterator devices_it = devices.begin();
310 for (
auto p = platforms.begin(); p != platforms.end(); ++
p) {
311 enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices),
"clGetDeviceIDs failed");
312 devices_it += num_devices;
315 for (
auto d = devices.begin(); d != devices.end(); ++
d) {
319 return opencl_devices;
325 namespace opencl_info {
338 virtual void call() = 0;
342 template <
typename Callback,
typename T>
347 callback( Callback c,
const T& t ) : my_callback( c ), my_data( t ) {}
350 my_callback( my_data );
354 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
373 if ( my_is_event && wait ) {
381 const T&
data(
bool wait =
true )
const {
382 if ( my_is_event && wait ) {
391 my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
392 my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
399 my_data(
std::
move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
400 my_callback_flag_ptr(
std::
move(dmsg.my_callback_flag_ptr) )
402 dmsg.my_is_event =
false;
429 cl_event
const *
get_event()
const {
return my_is_event ? &my_event : NULL; }
432 cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
433 if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
439 clRetainEvent( my_event );
444 enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ),
"Failed to flush an OpenCL command queue" );
450 template <
typename Callback>
452 __TBB_ASSERT( my_is_event,
"The OpenCL event is not set" );
456 operator T&() {
return data(); }
457 operator const T&()
const {
return data(); }
464 if (! my_callback_flag_ptr->fetch_and_store(
true)) {
467 register_callback([a](
const T& t)
mutable {
481 __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
490 mutable bool my_is_event =
false;
495 template <
typename K,
typename T,
typename Factory>
498 const T &t = dmsg.
data(
false );
500 return key_from_message<K, T>( t );
503 template <
typename Factory>
507 opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
508 my_curr_device_id = my_factory->devices().begin()->my_device_id;
512 if ( my_sending_event_present )
enforce_cl_retcode( clReleaseEvent( my_sending_event ),
"Failed to release an event for the OpenCL buffer" );
513 enforce_cl_retcode( clReleaseMemObject( my_cl_mem ),
"Failed to release an memory object" );
521 if ( !my_host_ptr ) {
529 Factory *
factory()
const {
return my_factory; }
541 map_memory(*my_factory->devices().begin(),
d);
543 my_host_ptr = d.
data(
false);
546 if (my_sending_event_present) {
548 my_sending_event_present =
false;
555 if (!my_factory->is_same_context(my_curr_device_id.load<
tbb::acquire>(), device_id)) {
558 if (!my_factory->is_same_context(my_curr_device_id.load<
tbb::relaxed>(), device_id)) {
559 __TBB_ASSERT(my_host_ptr,
"The buffer has not been mapped");
561 my_factory->enqueue_unmap_buffer(device, *
this, d);
563 my_sending_event_present =
true;
573 if (!e && my_sending_event_present) e = &my_sending_event;
575 __TBB_ASSERT(!my_host_ptr,
"The buffer has not been unmapped");
591 template <
typename Factory>
597 this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR,
size, NULL, &err );
604 cl_buffer_region region = { index, size };
605 this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err );
614 this->my_factory->enqueue_map_buffer( device, *
this, dmsg );
618 template <
typename,
typename>
629 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
632 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
641 template <access_type a>
643 T* ptr = (T*)my_impl->get_host_ptr();
648 T*
data()
const {
return &access<read_write>()[0]; }
650 template <access_type a = read_write>
653 template <access_type a = read_write>
656 size_t size()
const {
return my_impl->size()/
sizeof(T); }
658 T& operator[] ( ptrdiff_t k ) {
return begin()[k]; }
665 return my_impl->get_cl_mem();
690 opencl_buffer( Factory &f, cl_mem m,
size_t index,
size_t size ) : my_impl(
std::make_shared<
impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
702 template <
typename,
typename>
706 template <
typename T,
typename Factory>
712 opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
715 template <
typename T,
typename Factory>
721 #define is_typedef(type) \ 722 template <typename T> \ 724 template <typename C> \ 725 static std::true_type check( typename C::type* ); \ 726 template <typename C> \ 727 static std::false_type check( ... ); \ 729 static const bool value = decltype(check<T>(0))::value; \ 735 template <
typename T>
737 return t.native_object();
740 template <
typename T>
746 template <
typename T,
typename Factory>
748 const T &t = dmsg.
data(
false );
749 typedef typename T::memory_object_type mem_obj_t;
750 mem_obj_t mem_obj = t.memory_object();
753 mem_obj.send( device, d );
757 template <
typename T>
759 typedef typename T::memory_object_type mem_obj_t;
760 mem_obj_t mem_obj = t.memory_object();
762 mem_obj.send( device, dmsg );
765 template <
typename T>
769 template <
typename T,
typename Factory>
771 const T &t = dmsg.
data(
false );
772 typedef typename T::memory_object_type mem_obj_t;
773 mem_obj_t mem_obj = t.memory_object();
776 mem_obj.receive( d );
780 template <
typename T>
788 template <
typename G = std::initializer_list<
int>,
typename L = std::initializer_list<
int>,
789 typename =
typename std::enable_if<!std::is_same<
typename std::decay<G>::type, opencl_range>::value>::type>
790 opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
791 auto g_it = global_work.begin();
792 auto l_it = local_work.begin();
793 my_global_work_size = { size_t(-1), size_t(-1), size_t(-1) };
795 for (
int s = 0;
s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++
s) {
796 __TBB_ASSERT(l_it != local_work.end(),
"global_work & local_work must have same size");
797 my_global_work_size[
s] = *g_it;
798 my_local_work_size[
s] = *l_it;
802 const nd_range_type&
global_range()
const {
return my_global_work_size; }
803 const nd_range_type&
local_range()
const {
return my_local_work_size; }
810 template <
typename DeviceFilter>
822 std::vector<char> kernel_name;
823 for (
size_t curr_size = 32;; curr_size <<= 1 ) {
824 kernel_name.resize( curr_size <<= 1 );
825 enforce_cl_retcode( clGetKernelInfo( k.
my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ),
"Failed to get kernel info" );
826 if ( ret_size < curr_size )
break;
843 kernel(
const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
849 template <
typename DeviceFilter_>
852 template <
typename Factory>
864 if ( my_devices.size() ) {
865 for (
auto d = my_devices.begin(); d != my_devices.end(); ++
d ) {
866 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ),
"Failed to release a command queue" );
868 enforce_cl_retcode( clReleaseContext( my_cl_context ),
"Failed to release a context" );
874 if ( !my_devices.size() ) {
875 my_devices = device_list;
883 template <
typename Factory>
889 e1 == NULL ? 0 : 1, e1, &e2, &err );
891 dmsg.
data(
false ) = ptr;
897 template <
typename Factory>
903 "Failed to unmap a buffer" );
909 template <
size_t NUM_ARGS,
typename T>
910 void process_one_arg(
const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&,
int&,
int& place,
const T& t ) {
912 enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++,
sizeof(p), &p),
"Failed to set a kernel argument" );
915 template <
size_t NUM_ARGS,
typename T,
typename F>
917 __TBB_ASSERT((
static_cast<typename std::array<cl_event, NUM_ARGS>::size_type
>(num_events) < events.size()), NULL);
919 const cl_event *
const e = msg.
get_event();
921 events[num_events++] = *e;
924 process_one_arg( kernel, events, num_events, place, msg.
data(
false) );
927 template <
size_t NUM_ARGS,
typename T,
typename ...Rest>
928 void process_arg_list(
const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events,
int& num_events,
int& place,
const T& t,
const Rest&... args ) {
929 process_one_arg( kernel, events, num_events, place, t );
930 process_arg_list( kernel, events, num_events, place, args... );
933 template <
size_t NUM_ARGS>
934 void process_arg_list(
const kernel_type&, std::array<cl_event, NUM_ARGS>&,
int&,
int& ) {}
936 template <
typename T>
939 template <
typename T,
typename F>
944 template <
typename T,
typename ...Rest>
946 update_one_arg( e, t );
947 update_arg_list( e, args... );
953 template <
typename ...Args>
955 std::array<cl_event,
sizeof...(Args)> events;
958 process_arg_list( kernel, events, num_events, place, args... );
960 const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
962 update_arg_list(e, args...);
969 template <
typename T,
typename ...Rest>
972 send_data( device, args... );
980 const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
985 for ( s = 1; s < 3 && g_size[
s] != size_t(-1); ++
s) {}
989 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &
event ),
990 "Failed to enqueue a kernel" );
995 template <
typename T>
1000 template <
typename T,
typename F>
1002 cl_event
const *e_ptr = msg.
get_event();
1004 if ( e_ptr != NULL ) {
1012 template <
typename T,
typename ...Rest>
1014 if ( get_event_from_one_arg( e, t ) ) {
1018 return get_event_from_args( e, args... );
1028 virtual void operator() () {}
1031 template<
typename Fn>
1040 __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1042 finalize_fn *
const fn_ptr =
static_cast<finalize_fn*
>(
data);
1043 __TBB_ASSERT(fn_ptr != NULL,
"Invalid finalize function pointer");
1050 template <
typename FinalizeFn,
typename ...Args>
1054 if ( get_event_from_args( e, args... ) ) {
1056 new finalize_fn_leaf<FinalizeFn>(fn) ),
"Failed to set a callback" );
1081 return my_cl_context;
1087 if (!my_devices.size())
1091 enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE,
"No devices in the device list");
1092 cl_platform_id platform_id = my_devices.begin()->platform_id();
1094 enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM,
"All devices should be in the same platform");
1096 std::vector<cl_device_id> cl_device_ids;
1097 for (
auto d = my_devices.begin(); d != my_devices.end(); ++
d) {
1098 cl_device_ids.push_back((*d).my_cl_device_id);
1101 cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1103 cl_context ctx = clCreateContext(context_properties,
1104 (cl_uint)cl_device_ids.size(),
1105 cl_device_ids.data(),
1108 my_cl_context = ctx;
1110 size_t device_counter = 0;
1111 for (
auto d = my_devices.begin(); d != my_devices.end(); d++) {
1112 (*d).my_device_id = device_counter++;
1114 cl_command_queue cq;
1116 if ((*d).major_version() >= 2) {
1117 if ((*d).out_of_order_exec_mode_on_host_present()) {
1118 cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1119 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1121 cl_queue_properties props[] = { 0 };
1122 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1127 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1129 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1130 #pragma GCC diagnostic push 1131 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 1133 #if _MSC_VER || __INTEL_COMPILER 1134 #pragma warning( push ) 1135 #if __INTEL_COMPILER 1136 #pragma warning (disable: 1478) 1138 #pragma warning (disable: 4996) 1141 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1142 #if _MSC_VER || __INTEL_COMPILER 1143 #pragma warning( pop ) 1145 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1146 #pragma GCC diagnostic pop 1150 (*d).my_cl_command_queue = cq;
1160 template <
typename Factory>
1162 template <
typename Factory>
1164 template <
typename Factory>
1169 namespace opencl_info {
1173 template <
typename Factory>
1176 __TBB_ASSERT(!f.devices().empty(),
"No available devices");
1177 return *(f.devices().begin());
1184 cl_platform_id platform_id = devices.
begin()->platform_id();
1186 if (it->platform_id() == platform_id) {
1211 template <
typename T,
typename Factory>
1221 template <
typename Factory = opencl_info::default_opencl_factory>
1237 std::call_once( my_do_once_flag, [](){} );
1241 return kernel_type( get_cl_kernel(k), my_factory );
1245 opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1247 std::call_once( my_do_once_flag, [](){} );
1251 std::call_once( my_do_once_flag, [
this, &k](){ this->init( k ); } );
1253 cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1261 std::ifstream file_descriptor( filepath, std::ifstream::binary );
1262 if ( !file_descriptor.is_open() ) {
1263 std::string str = std::string(
"Could not open file: " ) + filepath;
1264 std::cerr << str << std::endl;
1267 file_descriptor.seekg( 0, file_descriptor.end );
1268 size_t length = size_t( file_descriptor.tellg() );
1269 file_descriptor.seekg( 0, file_descriptor.beg );
1270 my_content.resize( length );
1271 char*
begin = &*my_content.begin();
1272 file_descriptor.read( begin, length );
1273 file_descriptor.close();
1275 const char*
content() {
return &*my_content.cbegin(); }
1276 size_t length() {
return my_content.length(); }
1283 typedef void (CL_CALLBACK *cl_callback_type)(cl_program,
void*);
1285 cl_uint num_devices, cl_device_id* device_list,
1286 const char* options, cl_callback_type
callback,
1288 cl_int err = clBuildProgram( program, num_devices, device_list, options,
1289 callback, user_data );
1290 if( err == CL_SUCCESS )
1292 std::string str = std::string(
"Failed to build program: " ) +
name;
1293 if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1295 for (
auto d = devices.
begin(); d != devices.
end(); ++
d ) {
1296 std::cerr <<
"Build log for device: " << (*d).name() << std::endl;
1298 cl_int query_err = clGetProgramBuildInfo(
1299 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1303 std::vector<char> output;
1304 output.resize( log_size );
1305 query_err = clGetProgramBuildInfo(
1306 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1307 output.size(), output.data(), NULL );
1309 std::cerr << output.data() << std::endl;
1311 std::cerr <<
"No build log available" << std::endl;
1321 template<
typename Filter>
1323 Filter
filter,
const char* message ) {
1324 for ( cl_uint i = 0; i < num_devices; ++i )
1325 if ( filter(device_list[i]) ) {
1326 device_list[i--] = device_list[--num_devices];
1333 void init(
const std::string& )
const {
1334 cl_uint num_devices;
1335 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES,
sizeof( num_devices ), &num_devices, NULL ),
1336 "Failed to get OpenCL context info" );
1339 cl_device_id *device_list = (cl_device_id *)alloca( num_devices*
sizeof( cl_device_id ) );
1340 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*
sizeof( cl_device_id ), device_list, NULL ),
1341 "Failed to get OpenCL context info" );
1342 const char *options = NULL;
1343 switch ( my_type ) {
1346 const char *
s[] = { fr.
content() };
1347 const size_t l[] = { fr.
length() };
1349 my_cl_program = clCreateProgramWithSource( my_factory.context(), 1,
s, l, &err );
1352 num_devices, device_list,
1355 },
"No one device supports building program from sources" );
1357 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1358 options, NULL, NULL );
1362 options =
"-x spir";
1365 std::vector<const unsigned char*>
s(
1366 num_devices, reinterpret_cast<const unsigned char*>(fr.
content()) );
1367 std::vector<size_t> l( num_devices, fr.
length() );
1368 std::vector<cl_int> bin_statuses( num_devices, -1 );
1370 my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1371 device_list, l.data(), s.data(),
1372 bin_statuses.data(), &err );
1373 if( err != CL_SUCCESS ) {
1374 std::string statuses_str;
1375 for (
auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1376 statuses_str += std::to_string((*st));
1379 enforce_cl_retcode( err, std::string(
"Failed to create program, error " + std::to_string( err ) +
" : " ) + my_arg_str +
1380 std::string(
", binary_statuses = " ) + statuses_str );
1383 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1384 options, NULL, NULL );
1398 template <
typename DeviceFilter>
1401 template <
typename DeviceFilter>
1405 template<typename... Args>
1408 template<typename JP, typename Factory, typename... Ports>
1415 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1421 : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1426 template <
typename DeviceSelector>
1428 : base_type( g, kernel, d, f)
1434 template<
typename JP,
typename... Ports>
1441 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1444 template <
typename DeviceSelector>
1450 template<
typename... Ports>
1451 class opencl_node< tuple<Ports...> > :
public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1457 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1460 template <
typename DeviceSelector>
1486 #endif // __TBB_flow_graph_opencl_node_H
opencl_device_list my_devices
callback(Callback c, const T &t)
opencl_program(const opencl_program &src)
opencl_node(graph &g, const kernel_type &kernel)
kernel(const cl_kernel &k, factory_type &f)
Primary template for atomic.
device_id_type my_device_id
opencl_buffer(Factory &f, size_t size)
opencl_memory(Factory &f)
void set_event(cl_event e) const
const_iterator end() const
base_type::kernel_type kernel_type
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)
opencl_async_msg & operator=(const opencl_async_msg &dmsg)
cl_mem get_cl_mem() const
std::vector< opencl_device > container_type
std::string platform_vendor() const
std::once_flag my_once_flag
bool extension_available(const std::string &ext) const
void finalize() const __TBB_override
opencl_program(Factory &factory, opencl_program_type type, const std::string &program_name)
const opencl_device_list & devices()
bool get_event_from_args(cl_event &)
static void CL_CALLBACK register_callback_func(cl_event, cl_int event_command_exec_status, void *data)
void send_data(opencl_device)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
cl_kernel get_cl_kernel(const std::string &k) const
friend bool operator==(opencl_device d1, opencl_device d2)
#define __TBB_STATIC_ASSERT(condition, msg)
void register_callback(Callback c) const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
void enqueue_map_buffer(opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
tbb::spin_mutex my_devices_mutex
opencl_buffer memory_object_type
bool init(const opencl_device_list &device_list)
Factory * factory() const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
nd_range_type my_global_work_size
T device_info(cl_device_id d, cl_device_info i)
void map_memory(opencl_device device, opencl_async_msg< void *, Factory > &dmsg) __TBB_override
bool get_event_from_one_arg(cl_event &, const T &)
base_type::kernel_type kernel_type
Base class for types that should not be assigned.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp begin
opencl_program(Factory &factory, cl_program program)
container_type::const_iterator const_iterator
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
opencl_program_builder(Factory &f, const std::string &name, cl_program program, cl_uint num_devices, cl_device_id *device_list, const char *options, cl_callback_type callback, void *user_data)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
bool built_in_kernel_available(const std::string &k) const
std::string version() const
opencl_device_list operator()(const opencl_device_list &devices)
opencl_node(graph &g, const kernel_type &kernel)
bool out_of_order_exec_mode_on_device_present() const
void receive(const opencl_async_msg< opencl_buffer, Factory > &dependency) const
void set_command_queue(cl_command_queue cmd_queue)
opencl_device_list(std::initializer_list< opencl_device > il)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
opencl_program_type my_type
opencl_buffer< T, Factory > my_owner
K key_from_message(const T &t)
const_iterator cend() const
opencl_buffer_impl(size_t size, Factory &f)
container_type::iterator iterator
base_type::kernel_type kernel_type
Represents acquisition of a mutex.
opencl_program(const char *program_name)
std::shared_ptr< tbb::atomic< bool > > my_callback_flag_ptr
opencl_program(Factory &factory, const char *program_name)
void enqueue_unmap_buffer(opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
cl_bool linker_available() const
T platform_info(cl_platform_id p, cl_platform_info i)
void send_data(opencl_device device, T &t, Rest &... args)
tbb::atomic< opencl_device::device_id_type > my_curr_device_id
Factory::kernel_type kernel_type
opencl_range(G &&global_work=std::initializer_list< int >({ 0 }), L &&local_work=std::initializer_list< int >({ 0, 0, 0 }))
void process_arg_list(const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
std::string vendor() const
container_type my_container
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
file_reader(const std::string &filepath)
is_typedef(native_object_type)
void update_one_arg(cl_event, T &)
void const char const char int ITT_FORMAT __itt_group_sync p
cl_bool available() const
opencl_async_msg(const T &data)
default_opencl_factory & default_factory()
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
opencl_device(cl_device_id cl_d_id, device_id_type device_id)
opencl_async_msg(opencl_async_msg &&dmsg)
opencl_buffer_impl(cl_mem m, size_t index, size_t size, Factory &f)
bool get_event_from_one_arg(cl_event &e, const opencl_async_msg< T, F > &msg)
container_type::size_type size_type
opencl_async_msg< void *, Factory > receive(const cl_event *e)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
bool out_of_order_exec_mode_on_host_present() const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d, Factory &f)
cl_command_queue command_queue() const
opencl_device_list find_available_devices()
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
size_t max_work_group_size() const
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
kernel_type get_kernel(const std::string &k) const
opencl_factory< DeviceFilter > factory_type
void info(cl_device_info i, T &t) const
opencl_node(graph &g, const kernel_type &kernel, DeviceSelector d)
tbb::spin_mutex my_sending_lock
static void fgt_multiinput_multioutput_node(string_index, void *, void *)
const opencl_buffer & memory_object() const
opencl_async_msg< void *, Factory > send(opencl_device device, const cl_event *e)
bool is_same_context(opencl_device::device_id_type d1, opencl_device::device_id_type d2)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
bool my_sending_event_present
void enforce_cl_retcode(cl_int err, std::string msg)
void update_arg_list(cl_event e, T &t, Rest &... args)
opencl_program(const std::string &program_name)
factory_type & my_factory
std::array< size_t, 3 > max_work_item_sizes() const
void send_kernel(opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
cl_device_type type() const
cl_uint address_bits() const
int minor_version() const
iterator< a > begin() const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long value
iterator< a > access() const
void call(F &&f, Pack &&p)
Calls the given function with arguments taken from a stored_pack.
opencl_subbuffer< T, Factory > subbuffer(size_t index, size_t size) const
void finalize(opencl_device device, FinalizeFn fn, Args &... args)
cl_command_queue my_cl_command_queue
void update_one_arg(cl_event e, opencl_async_msg< T, F > &msg)
void init(const std::string &) const
std::string platform_extensions() const
std::array< range_index_type, 3 > nd_range_type
cl_device_id my_cl_device_id
const_iterator begin() const
const nd_range_type & global_range() const
opencl_node< tuple< Ports... >, queueing, opencl_info::default_opencl_factory > base_type
opencl_node< tuple< Ports... >, JP, opencl_info::default_opencl_factory > base_type
K key_from_message(const opencl_async_msg< T, Factory > &dmsg)
friend bool operator==(const opencl_buffer< T, Factory > &lhs, const opencl_buffer< T, Factory > &rhs)
void move(tbb_thread &t1, tbb_thread &t2)
opencl_node(graph &g, const kernel_type &kernel, Factory &f)
cl_bool compiler_available() const
const opencl_device_list & available_devices()
opencl_subbuffer(const opencl_buffer< T, Factory > &owner, size_t index, size_t size)
opencl_program(opencl_program_type type)
void const char const char int ITT_FORMAT __itt_group_sync s
std::string platform_name() const
opencl_buffer(Factory &f, cl_mem m, size_t index, size_t size)
T event_info(cl_event e, cl_event_info i)
cl_event my_sending_event
std::once_flag my_do_once_flag
std::string platform_version() const
opencl_node(graph &g, const kernel_type &kernel)
opencl_buffer_impl< Factory > impl_type
void update_arg_list(cl_event)
opencl_async_msg(const opencl_async_msg &dmsg)
cl_platform_id platform_id() const
cl_event const * get_event() const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t size
opencl_device_filter(cl_uint &num_devices, cl_device_id *device_list, Filter filter, const char *message)
opencl_async_msg(const T &data, cl_event event)
int major_version() const
const_iterator cbegin() const
const T & data(bool wait=true) const
opencl_device operator()(Factory &f)
The graph related classes and functions.
const nd_range_type & local_range() const
A lock that occupies a single byte.
cl_mem native_object() const
std::enable_if< is_memory_object_type< T >::value >::type receive_if_memory_object(const opencl_async_msg< T, Factory > &dmsg)
std::string built_in_kernels() const
Base class for types that should not be copied or assigned.
Factory opencl_factory_type
std::string extensions() const
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t length
std::string platform_profile() const
opencl_program(Factory &factory, const std::string &program_name)
streaming_node< tuple< Ports... >, JP, Factory > base_type
void call() __TBB_override
opencl_program(opencl_program_type type, const std::string &program_name)
opencl_device device_type
void const char const char int ITT_FORMAT __itt_group_sync x void const char * name
iterator< a > end() const
cl_device_id device_id() const
nd_range_type my_local_work_size
opencl_device(cl_device_id d_id)
std::shared_ptr< impl_type > my_impl
cl_mem native_object_type
void add(opencl_device d)
void send(opencl_device device, opencl_async_msg< opencl_buffer, Factory > &dependency) const