Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
tbb::flow::interface10::opencl_factory< DeviceFilter > Class Template Reference

#include <flow_graph_opencl_node.h>

Inheritance diagram for tbb::flow::interface10::opencl_factory< DeviceFilter >:
Collaboration diagram for tbb::flow::interface10::opencl_factory< DeviceFilter >:

Classes

struct  finalize_fn
 
struct  finalize_fn_leaf
 
class  kernel
 

Public Types

template<typename T >
using async_msg_type = opencl_async_msg< T, opencl_factory< DeviceFilter > >
 
typedef opencl_device device_type
 
typedef kernel kernel_type
 
typedef opencl_range range_type
 

Public Member Functions

 opencl_factory ()
 
 ~opencl_factory ()
 
bool init (const opencl_device_list &device_list)
 
template<typename ... Args>
void send_kernel (opencl_device device, const kernel_type &kernel, const range_type &work_size, Args &... args)
 
template<typename T , typename ... Rest>
void send_data (opencl_device device, T &t, Rest &... args)
 
void send_data (opencl_device)
 
template<typename FinalizeFn , typename ... Args>
void finalize (opencl_device device, FinalizeFn fn, Args &... args)
 
const opencl_device_listdevices ()
 

Private Member Functions

template<typename Factory >
void enqueue_map_buffer (opencl_device device, opencl_buffer_impl< Factory > &buffer, opencl_async_msg< void *, Factory > &dmsg)
 
template<typename Factory >
void enqueue_unmap_buffer (opencl_device device, opencl_memory< Factory > &memory, opencl_async_msg< void *, Factory > &dmsg)
 
template<size_t NUM_ARGS, typename T >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)
 
template<size_t NUM_ARGS, typename T , typename F >
void process_one_arg (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const opencl_async_msg< T, F > &msg)
 
template<size_t NUM_ARGS, typename T , typename ... Rest>
void process_arg_list (const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
 
template<size_t NUM_ARGS>
void process_arg_list (const kernel_type &, std::array< cl_event, NUM_ARGS > &, int &, int &)
 
template<typename T >
void update_one_arg (cl_event, T &)
 
template<typename T , typename F >
void update_one_arg (cl_event e, opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
void update_arg_list (cl_event e, T &t, Rest &... args)
 
void update_arg_list (cl_event)
 
cl_event send_kernel_impl (opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
 
template<typename T >
bool get_event_from_one_arg (cl_event &, const T &)
 
template<typename T , typename F >
bool get_event_from_one_arg (cl_event &e, const opencl_async_msg< T, F > &msg)
 
template<typename T , typename ... Rest>
bool get_event_from_args (cl_event &e, const T &t, const Rest &... args)
 
bool get_event_from_args (cl_event &)
 
bool is_same_context (opencl_device::device_id_type d1, opencl_device::device_id_type d2)
 
 opencl_factory (const opencl_factory &)
 
opencl_factoryoperator= (const opencl_factory &)
 
cl_context context ()
 
void init_once ()
 

Static Private Member Functions

static void CL_CALLBACK finalize_callback (cl_event, cl_int event_command_exec_status, void *data)
 

Private Attributes

std::once_flag my_once_flag
 
opencl_device_list my_devices
 
cl_context my_cl_context
 
tbb::spin_mutex my_devices_mutex
 

Friends

template<typename Factory >
class opencl_program
 
template<typename Factory >
class opencl_buffer_impl
 
template<typename Factory >
class opencl_memory
 

Detailed Description

template<typename DeviceFilter>
class tbb::flow::interface10::opencl_factory< DeviceFilter >

Definition at line 45 of file flow_graph_opencl_node.h.

Member Typedef Documentation

◆ async_msg_type

template<typename DeviceFilter>
template<typename T >
using tbb::flow::interface10::opencl_factory< DeviceFilter >::async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter> >

Definition at line 809 of file flow_graph_opencl_node.h.

◆ device_type

template<typename DeviceFilter>
typedef opencl_device tbb::flow::interface10::opencl_factory< DeviceFilter >::device_type

Definition at line 810 of file flow_graph_opencl_node.h.

◆ kernel_type

template<typename DeviceFilter>
typedef kernel tbb::flow::interface10::opencl_factory< DeviceFilter >::kernel_type

Definition at line 852 of file flow_graph_opencl_node.h.

◆ range_type

template<typename DeviceFilter>
typedef opencl_range tbb::flow::interface10::opencl_factory< DeviceFilter >::range_type

Definition at line 856 of file flow_graph_opencl_node.h.

Constructor & Destructor Documentation

◆ opencl_factory() [1/2]

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::opencl_factory ( )
inline

Definition at line 858 of file flow_graph_opencl_node.h.

858 {}

◆ ~opencl_factory()

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::~opencl_factory ( )
inline

Definition at line 859 of file flow_graph_opencl_node.h.

859  {
860  if ( my_devices.size() ) {
861  for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
862  enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
863  }
864  enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
865  }
866  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void enforce_cl_retcode(cl_int err, std::string msg)

◆ opencl_factory() [2/2]

template<typename DeviceFilter>
tbb::flow::interface10::opencl_factory< DeviceFilter >::opencl_factory ( const opencl_factory< DeviceFilter > &  )
private

Member Function Documentation

◆ context()

template<typename DeviceFilter>
cl_context tbb::flow::interface10::opencl_factory< DeviceFilter >::context ( )
inlineprivate

◆ devices()

template<typename DeviceFilter>
const opencl_device_list& tbb::flow::interface10::opencl_factory< DeviceFilter >::devices ( )
inline

Definition at line 1058 of file flow_graph_opencl_node.h.

◆ enqueue_map_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::enqueue_map_buffer ( opencl_device  device,
opencl_buffer_impl< Factory > &  buffer,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 880 of file flow_graph_opencl_node.h.

880  {
881  cl_event const* e1 = dmsg.get_event();
882  cl_event e2;
883  cl_int err;
884  void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
885  e1 == NULL ? 0 : 1, e1, &e2, &err );
886  enforce_cl_retcode( err, "Failed to map a buffer" );
887  dmsg.data( false ) = ptr;
888  dmsg.set_event( e2 );
889  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
890  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ enqueue_unmap_buffer()

template<typename DeviceFilter>
template<typename Factory >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::enqueue_unmap_buffer ( opencl_device  device,
opencl_memory< Factory > &  memory,
opencl_async_msg< void *, Factory > &  dmsg 
)
inlineprivate

Definition at line 894 of file flow_graph_opencl_node.h.

894  {
895  cl_event const* e1 = dmsg.get_event();
896  cl_event e2;
898  clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
899  "Failed to unmap a buffer" );
900  dmsg.set_event( e2 );
901  enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
902  }
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize()

template<typename DeviceFilter>
template<typename FinalizeFn , typename ... Args>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::finalize ( opencl_device  device,
FinalizeFn  fn,
Args &...  args 
)
inline

Definition at line 1047 of file flow_graph_opencl_node.h.

1047  {
1048  cl_event e;
1049 
1050  if ( get_event_from_args( e, args... ) ) {
1051  enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1052  new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1053  }
1054 
1055  enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1056  }
static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data)
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event ITT_FORMAT __itt_group_mark d void const wchar_t const wchar_t int ITT_FORMAT __itt_group_sync __itt_group_fsync x void const wchar_t int const wchar_t int int ITT_FORMAT __itt_group_sync __itt_group_fsync x void ITT_FORMAT __itt_group_sync __itt_group_fsync p void ITT_FORMAT __itt_group_sync __itt_group_fsync p void size_t ITT_FORMAT lu no args __itt_obj_prop_t __itt_obj_state_t ITT_FORMAT d const char ITT_FORMAT s __itt_frame ITT_FORMAT p const char const char ITT_FORMAT s __itt_counter ITT_FORMAT p __itt_counter unsigned long long ITT_FORMAT lu const wchar_t ITT_FORMAT S __itt_mark_type const wchar_t ITT_FORMAT S __itt_mark_type const char ITT_FORMAT s __itt_mark_type ITT_FORMAT d __itt_caller ITT_FORMAT p __itt_caller ITT_FORMAT p no args const __itt_domain __itt_clock_domain unsigned long long __itt_id ITT_FORMAT lu const __itt_domain __itt_clock_domain unsigned long long __itt_id __itt_id void * fn
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)
void enforce_cl_retcode(cl_int err, std::string msg)

◆ finalize_callback()

template<typename DeviceFilter>
static void CL_CALLBACK tbb::flow::interface10::opencl_factory< DeviceFilter >::finalize_callback ( cl_event  ,
cl_int  event_command_exec_status,
void data 
)
inlinestaticprivate

Definition at line 1034 of file flow_graph_opencl_node.h.

1034  {
1035  tbb::internal::suppress_unused_warning(event_command_exec_status);
1036  __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1037 
1038  finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1039  __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1040  (*fn_ptr)();
1041 
1042  // Function pointer was created by 'new' & this callback must be called once only
1043  delete fn_ptr;
1044  }
void suppress_unused_warning(const T1 &)
Utility template function to prevent "unused" warnings by various compilers.
Definition: tbb_stddef.h:377
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void * data
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ get_event_from_args() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  e,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 1009 of file flow_graph_opencl_node.h.

1009  {
1010  if ( get_event_from_one_arg( e, t ) ) {
1011  return true;
1012  }
1013 
1014  return get_event_from_args( e, args... );
1015  }
bool get_event_from_one_arg(cl_event &, const T &)
bool get_event_from_args(cl_event &e, const T &t, const Rest &... args)

◆ get_event_from_args() [2/2]

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_args ( cl_event &  )
inlineprivate

Definition at line 1017 of file flow_graph_opencl_node.h.

1017  {
1018  return false;
1019  }

◆ get_event_from_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  ,
const T &   
)
inlineprivate

Definition at line 992 of file flow_graph_opencl_node.h.

992  {
993  return false;
994  }

◆ get_event_from_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::get_event_from_one_arg ( cl_event &  e,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 997 of file flow_graph_opencl_node.h.

997  {
998  cl_event const *e_ptr = msg.get_event();
999 
1000  if ( e_ptr != NULL ) {
1001  e = *e_ptr;
1002  return true;
1003  }
1004 
1005  return false;
1006  }

◆ init()

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::init ( const opencl_device_list device_list)
inline

Definition at line 868 of file flow_graph_opencl_node.h.

868  {
870  if ( !my_devices.size() ) {
871  my_devices = device_list;
872  return true;
873  }
874  return false;
875  }
Represents acquisition of a mutex.
Definition: spin_mutex.h:50
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock

◆ init_once()

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::init_once ( )
inlineprivate

Definition at line 1080 of file flow_graph_opencl_node.h.

Referenced by tbb::flow::interface10::opencl_factory< default_device_filter >::context(), and tbb::flow::interface10::opencl_factory< default_device_filter >::devices().

1080  {
1081  {
1083  if (!my_devices.size())
1084  my_devices = DeviceFilter()( opencl_info::available_devices() );
1085  }
1086 
1087  enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1088  cl_platform_id platform_id = my_devices.begin()->platform_id();
1089  for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1090  enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1091 
1092  std::vector<cl_device_id> cl_device_ids;
1093  for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1094  cl_device_ids.push_back((*d).my_cl_device_id);
1095  }
1096 
1097  cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1098  cl_int err;
1099  cl_context ctx = clCreateContext(context_properties,
1100  (cl_uint)cl_device_ids.size(),
1101  cl_device_ids.data(),
1102  NULL, NULL, &err);
1103  enforce_cl_retcode(err, "Failed to create context");
1104  my_cl_context = ctx;
1105 
1106  size_t device_counter = 0;
1107  for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1108  (*d).my_device_id = device_counter++;
1109  cl_int err2;
1110  cl_command_queue cq;
1111 #if CL_VERSION_2_0
1112  if ((*d).major_version() >= 2) {
1113  if ((*d).out_of_order_exec_mode_on_host_present()) {
1114  cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1115  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1116  } else {
1117  cl_queue_properties props[] = { 0 };
1118  cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1119  }
1120  } else
1121 #endif
1122  {
1123  cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1124  // Suppress "declared deprecated" warning for the next line.
1125 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1126 #pragma GCC diagnostic push
1127 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1128 #endif
1129 #if _MSC_VER || __INTEL_COMPILER
1130 #pragma warning( push )
1131 #if __INTEL_COMPILER
1132 #pragma warning (disable: 1478)
1133 #else
1134 #pragma warning (disable: 4996)
1135 #endif
1136 #endif
1137  cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1138 #if _MSC_VER || __INTEL_COMPILER
1139 #pragma warning( pop )
1140 #endif
1141 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1142 #pragma GCC diagnostic pop
1143 #endif
1144  }
1145  enforce_cl_retcode(err2, "Failed to create command queue");
1146  (*d).my_cl_command_queue = cq;
1147  }
1148  }
const opencl_device_list & available_devices()
Represents acquisition of a mutex.
Definition: spin_mutex.h:50
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void * lock
void enforce_cl_retcode(cl_int err, std::string msg)
Here is the caller graph for this function:

◆ is_same_context()

template<typename DeviceFilter>
bool tbb::flow::interface10::opencl_factory< DeviceFilter >::is_same_context ( opencl_device::device_id_type  d1,
opencl_device::device_id_type  d2 
)
inlineprivate

Definition at line 1064 of file flow_graph_opencl_node.h.

1064  {
1066  // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1067  if ( d1 != opencl_device::host && d2 != opencl_device::host )
1068  return true;
1069  return d1 == d2;
1070  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165

◆ operator=()

template<typename DeviceFilter>
opencl_factory& tbb::flow::interface10::opencl_factory< DeviceFilter >::operator= ( const opencl_factory< DeviceFilter > &  )
private

◆ process_arg_list() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const T &  t,
const Rest &...  args 
)
inlineprivate

Definition at line 924 of file flow_graph_opencl_node.h.

924  {
925  process_one_arg( kernel, events, num_events, place, t );
926  process_arg_list( kernel, events, num_events, place, args... );
927  }
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

◆ process_arg_list() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_arg_list ( const kernel_type ,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int  
)
inlineprivate

Definition at line 930 of file flow_graph_opencl_node.h.

930 {}

◆ process_one_arg() [1/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  ,
int ,
int place,
const T &  t 
)
inlineprivate

Definition at line 906 of file flow_graph_opencl_node.h.

906  {
907  auto p = get_native_object(t);
908  enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
909  }
void enforce_cl_retcode(cl_int err, std::string msg)
std::enable_if< is_native_object_type< T >::value, typename T::native_object_type >::type get_native_object(const T &t)
void const char const char int ITT_FORMAT __itt_group_sync p

◆ process_one_arg() [2/2]

template<typename DeviceFilter>
template<size_t NUM_ARGS, typename T , typename F >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::process_one_arg ( const kernel_type kernel,
std::array< cl_event, NUM_ARGS > &  events,
int num_events,
int place,
const opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 912 of file flow_graph_opencl_node.h.

912  {
913  __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
914 
915  const cl_event * const e = msg.get_event();
916  if (e != NULL) {
917  events[num_events++] = *e;
918  }
919 
920  process_one_arg( kernel, events, num_events, place, msg.data(false) );
921  }
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void process_one_arg(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &, int &, int &place, const T &t)

◆ send_data() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_data ( opencl_device  device,
T &  t,
Rest &...  args 
)
inline

Definition at line 966 of file flow_graph_opencl_node.h.

966  {
967  send_if_memory_object( device, t );
968  send_data( device, args... );
969  }
void send_data(opencl_device device, T &t, Rest &... args)
std::enable_if< is_memory_object_type< T >::value >::type send_if_memory_object(opencl_device device, opencl_async_msg< T, Factory > &dmsg)

◆ send_data() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_data ( opencl_device  )
inline

Definition at line 971 of file flow_graph_opencl_node.h.

971 {}

◆ send_kernel()

template<typename DeviceFilter>
template<typename ... Args>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::send_kernel ( opencl_device  device,
const kernel_type kernel,
const range_type work_size,
Args &...  args 
)
inline

Definition at line 950 of file flow_graph_opencl_node.h.

950  {
951  std::array<cl_event, sizeof...(Args)> events;
952  int num_events = 0;
953  int place = 0;
954  process_arg_list( kernel, events, num_events, place, args... );
955 
956  const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
957 
958  update_arg_list(e, args...);
959 
960  // Release our own reference to cl_event
961  enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
962  }
cl_event send_kernel_impl(opencl_device device, const cl_kernel &kernel, const range_type &work_size, cl_uint num_events, cl_event *event_list)
void process_arg_list(const kernel_type &kernel, std::array< cl_event, NUM_ARGS > &events, int &num_events, int &place, const T &t, const Rest &... args)
void update_arg_list(cl_event e, T &t, Rest &... args)
void enforce_cl_retcode(cl_int err, std::string msg)

◆ send_kernel_impl()

template<typename DeviceFilter>
cl_event tbb::flow::interface10::opencl_factory< DeviceFilter >::send_kernel_impl ( opencl_device  device,
const cl_kernel &  kernel,
const range_type work_size,
cl_uint  num_events,
cl_event *  event_list 
)
inlineprivate

Definition at line 975 of file flow_graph_opencl_node.h.

976  {
977  const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
978  const typename range_type::nd_range_type& g_size = work_size.global_range();
979  const typename range_type::nd_range_type& l_size = work_size.local_range();
980  cl_uint s;
981  for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
982  cl_event event;
984  clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
985  g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
986  "Failed to enqueue a kernel" );
987  return event;
988  }
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d __itt_event event
void const char const char int ITT_FORMAT __itt_group_sync s
std::array< range_index_type, 3 > nd_range_type
void enforce_cl_retcode(cl_int err, std::string msg)

◆ update_arg_list() [1/2]

template<typename DeviceFilter>
template<typename T , typename ... Rest>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  e,
T &  t,
Rest &...  args 
)
inlineprivate

Definition at line 941 of file flow_graph_opencl_node.h.

941  {
942  update_one_arg( e, t );
943  update_arg_list( e, args... );
944  }
void update_arg_list(cl_event e, T &t, Rest &... args)

◆ update_arg_list() [2/2]

template<typename DeviceFilter>
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_arg_list ( cl_event  )
inlineprivate

Definition at line 946 of file flow_graph_opencl_node.h.

946 {}

◆ update_one_arg() [1/2]

template<typename DeviceFilter>
template<typename T >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  ,
T &   
)
inlineprivate

Definition at line 933 of file flow_graph_opencl_node.h.

933 {}

◆ update_one_arg() [2/2]

template<typename DeviceFilter>
template<typename T , typename F >
void tbb::flow::interface10::opencl_factory< DeviceFilter >::update_one_arg ( cl_event  e,
opencl_async_msg< T, F > &  msg 
)
inlineprivate

Definition at line 936 of file flow_graph_opencl_node.h.

936  {
937  msg.set_event( e );
938  }

Friends And Related Function Documentation

◆ opencl_buffer_impl

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_buffer_impl
friend

Definition at line 1159 of file flow_graph_opencl_node.h.

◆ opencl_memory

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_memory
friend

Definition at line 1161 of file flow_graph_opencl_node.h.

◆ opencl_program

template<typename DeviceFilter>
template<typename Factory >
friend class opencl_program
friend

Definition at line 1157 of file flow_graph_opencl_node.h.

Member Data Documentation

◆ my_cl_context

template<typename DeviceFilter>
cl_context tbb::flow::interface10::opencl_factory< DeviceFilter >::my_cl_context
private

Definition at line 1152 of file flow_graph_opencl_node.h.

◆ my_devices

template<typename DeviceFilter>
opencl_device_list tbb::flow::interface10::opencl_factory< DeviceFilter >::my_devices
private

Definition at line 1151 of file flow_graph_opencl_node.h.

◆ my_devices_mutex

template<typename DeviceFilter>
tbb::spin_mutex tbb::flow::interface10::opencl_factory< DeviceFilter >::my_devices_mutex
private

Definition at line 1154 of file flow_graph_opencl_node.h.

◆ my_once_flag

template<typename DeviceFilter>
std::once_flag tbb::flow::interface10::opencl_factory< DeviceFilter >::my_once_flag
private

Definition at line 1150 of file flow_graph_opencl_node.h.


The documentation for this class was generated from the following file:

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.