1 #ifndef VIENNACL_OCL_KERNEL_HPP_
2 #define VIENNACL_OCL_KERNEL_HPP_
26 #include <OpenCL/cl.h>
61 template <
typename KernelType>
64 template<cl_kernel_info param>
67 template<cl_kernel_info param>
74 kernel() : handle_(), p_program_(NULL), p_context_(NULL), name_()
76 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
77 std::cout <<
"ViennaCL: Creating kernel object (default CTOR)" << std::endl;
82 : handle_(kernel_handle, kernel_context), p_program_(&kernel_program), p_context_(&kernel_context), name_(name)
84 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
85 std::cout <<
"ViennaCL: Creating kernel object (full CTOR)" << std::endl;
87 set_work_size_defaults();
91 : handle_(other.handle_), p_program_(other.p_program_), p_context_(other.p_context_), name_(other.name_)
93 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
94 std::cout <<
"ViennaCL: Creating kernel object (Copy CTOR)" << std::endl;
96 local_work_size_[0] = other.local_work_size_[0];
97 local_work_size_[1] = other.local_work_size_[1];
98 local_work_size_[2] = other.local_work_size_[2];
100 global_work_size_[0] = other.global_work_size_[0];
101 global_work_size_[1] = other.global_work_size_[1];
102 global_work_size_[2] = other.global_work_size_[2];
107 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
108 std::cout <<
"ViennaCL: Assigning kernel object" << std::endl;
110 handle_ = other.handle_;
111 p_program_ = other.p_program_;
112 p_context_ = other.p_context_;
114 local_work_size_[0] = other.local_work_size_[0];
115 local_work_size_[1] = other.local_work_size_[1];
116 local_work_size_[2] = other.local_work_size_[2];
117 global_work_size_[0] = other.global_work_size_[0];
118 global_work_size_[1] = other.global_work_size_[1];
119 global_work_size_[2] = other.global_work_size_[2];
124 void arg(
unsigned int pos, cl_char val)
126 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
127 std::cout <<
"ViennaCL: Setting char kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
129 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_char), (
void*)&val);
134 void arg(
unsigned int pos, cl_uchar val)
136 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
137 std::cout <<
"ViennaCL: Setting unsigned char kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
139 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_uchar), (
void*)&val);
144 void arg(
unsigned int pos, cl_short val)
146 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
147 std::cout <<
"ViennaCL: Setting short kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
149 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_short), (
void*)&val);
154 void arg(
unsigned int pos, cl_ushort val)
156 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
157 std::cout <<
"ViennaCL: Setting unsigned short kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
159 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_ushort), (
void*)&val);
165 void arg(
unsigned int pos, cl_uint val)
167 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
168 std::cout <<
"ViennaCL: Setting unsigned int kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
170 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_uint), (
void*)&val);
177 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
178 std::cout <<
"ViennaCL: Setting packed_cl_uint kernel argument (" << val.
start <<
", " << val.
stride <<
", " << val.
size <<
", " << val.
internal_size <<
") at pos " << pos <<
" for kernel " << name_ << std::endl;
180 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(
packed_cl_uint), (
void*)&val);
185 void arg(
unsigned int pos,
float val)
187 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
188 std::cout <<
"ViennaCL: Setting floating point kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
190 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(float), (
void*)&val);
195 void arg(
unsigned int pos,
double val)
197 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
198 std::cout <<
"ViennaCL: Setting double precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
200 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(double), (
void*)&val);
205 void arg(
unsigned int pos, cl_int val)
207 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
208 std::cout <<
"ViennaCL: Setting int precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
210 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_int), (
void*)&val);
215 void arg(
unsigned int pos, cl_ulong val)
217 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
218 std::cout <<
"ViennaCL: Setting ulong precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
220 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_ulong), (
void*)&val);
225 void arg(
unsigned int pos, cl_long val)
227 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
228 std::cout <<
"ViennaCL: Setting long precision kernel argument " << val <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
230 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_long), (
void*)&val);
236 template<
class VCL_TYPE>
237 void arg(
unsigned int pos, VCL_TYPE
const & val)
239 assert(&val.handle().opencl_handle().context() == &handle_.
context() && bool(
"Kernel and memory object not in the same context!"));
241 cl_mem temp = val.handle().opencl_handle().get();
242 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
243 std::cout <<
"ViennaCL: Setting generic kernel argument " << temp <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
245 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(cl_mem), (
void*)&temp);
251 template<
class CL_TYPE>
254 CL_TYPE temp = h.
get();
255 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
256 std::cout <<
"ViennaCL: Setting handle kernel argument " << temp <<
" at pos " << pos <<
" for kernel " << name_ << std::endl;
258 cl_int err = clSetKernelArg(handle_.
get(), pos,
sizeof(CL_TYPE), (
void*)&temp);
267 cl_uint
size =
static_cast<cl_uint
>(mem.
size());
268 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
269 std::cout <<
"ViennaCL: Setting local memory kernel argument of size " << size <<
" bytes at pos " << pos <<
" for kernel " << name_ << std::endl;
271 cl_int err = clSetKernelArg(handle_.
get(), pos,
size, 0);
278 template <
typename T0>
286 template <
typename T0,
typename T1>
294 template <
typename T0,
typename T1,
typename T2>
302 template <
typename T0,
typename T1,
typename T2,
typename T3>
310 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4>
311 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4)
318 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5>
319 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5)
326 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6>
327 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6)
334 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7>
335 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7)
342 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
typename T6,
typename T7,
typename T8>
343 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8)
350 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
351 typename T5,
typename T6,
typename T7,
typename T8,
typename T9>
352 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4,
353 T5
const & t5, T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9)
355 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
360 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
361 typename T6,
typename T7,
typename T8,
typename T9,
typename T10>
362 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
363 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10)
365 arg(0, t0);
arg(1, t1);
arg(2, t2);
arg(3, t3);
arg(4, t4);
arg(5, t5);
arg(6, t6);
arg(7, t7);
arg(8, t8);
arg(9, t9);
arg(10, t10);
370 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
371 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11>
372 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
373 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11)
381 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
382 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
typename T12>
383 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
384 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11, T12
const & t12)
392 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
393 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
394 typename T12,
typename T13>
395 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
396 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
397 T12
const & t12, T13
const & t13)
401 arg(12, t12);
arg(13, t13);
406 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
407 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
408 typename T12,
typename T13,
typename T14>
409 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
410 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
411 T12
const & t12, T13
const & t13, T14
const & t14)
415 arg(12, t12);
arg(13, t13);
arg(14, t14);
420 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
421 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
422 typename T12,
typename T13,
typename T14,
typename T15>
423 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
424 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
425 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15)
434 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
435 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
436 typename T12,
typename T13,
typename T14,
typename T15,
typename T16>
437 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
438 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
439 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16)
448 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
449 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
450 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17>
451 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
452 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
453 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17)
462 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
463 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
464 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
466 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
467 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
468 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
480 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
481 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
482 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
483 typename T18,
typename T19>
484 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
485 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
486 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
487 T18
const & t18, T19
const & t19
493 arg(18, t18);
arg(19, t19);
498 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
499 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
500 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
501 typename T18,
typename T19,
typename T20>
502 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
503 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
504 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
505 T18
const & t18, T19
const & t19, T20
const & t20
511 arg(18, t18);
arg(19, t19);
arg(20, t20);
516 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
517 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
518 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
519 typename T18,
typename T19,
typename T20,
typename T21>
520 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
521 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
522 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
523 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21
534 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
535 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
536 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
537 typename T18,
typename T19,
typename T20,
typename T21,
typename T22>
538 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
539 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
540 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
541 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22
552 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
553 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
554 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
555 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23>
556 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
557 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
558 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
559 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23
570 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
571 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
572 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
573 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
575 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
576 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
577 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
578 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
591 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
592 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
593 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
594 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
595 typename T24,
typename T25>
596 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
597 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
598 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
599 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
600 T24
const & t24, T25
const & t25
607 arg(24, t24);
arg(25, t25);
612 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
613 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
614 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
615 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
616 typename T24,
typename T25,
typename T26>
617 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
618 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
619 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
620 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
621 T24
const & t24, T25
const & t25, T26
const & t26
628 arg(24, t24);
arg(25, t25);
arg(26, t26);
633 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
634 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
635 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
636 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
637 typename T24,
typename T25,
typename T26,
typename T27>
638 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
639 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
640 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
641 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
642 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27
654 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
655 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
656 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
657 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
658 typename T24,
typename T25,
typename T26,
typename T27,
typename T28>
659 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
660 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
661 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
662 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
663 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28
675 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
676 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
677 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
678 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
679 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29>
680 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
681 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
682 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
683 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
684 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29
696 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
697 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
698 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
699 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
700 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29,
702 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
703 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
704 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
705 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
706 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29,
720 template <
typename T0,
typename T1,
typename T2,
typename T3,
typename T4,
typename T5,
721 typename T6,
typename T7,
typename T8,
typename T9,
typename T10,
typename T11,
722 typename T12,
typename T13,
typename T14,
typename T15,
typename T16,
typename T17,
723 typename T18,
typename T19,
typename T20,
typename T21,
typename T22,
typename T23,
724 typename T24,
typename T25,
typename T26,
typename T27,
typename T28,
typename T29,
725 typename T30,
typename T31>
726 kernel &
operator()(T0
const & t0, T1
const & t1, T2
const & t2, T3
const & t3, T4
const & t4, T5
const & t5,
727 T6
const & t6, T7
const & t7, T8
const & t8, T9
const & t9, T10
const & t10, T11
const & t11,
728 T12
const & t12, T13
const & t13, T14
const & t14, T15
const & t15, T16
const & t16, T17
const & t17,
729 T18
const & t18, T19
const & t19, T20
const & t20, T21
const & t21, T22
const & t22, T23
const & t23,
730 T24
const & t24, T25
const & t25, T26
const & t26, T27
const & t27, T28
const & t28, T29
const & t29,
731 T30
const & t30, T31
const & t31
739 arg(30, t30);
arg(31, t31);
752 assert(index < 3 &&
bool(
"Work size index out of bounds"));
753 return local_work_size_[index];
761 assert(index < 3 &&
bool(
"Work size index out of bounds"));
762 return global_work_size_[index];
772 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
773 std::cout <<
"ViennaCL: Setting local work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
775 assert(index < 3 &&
bool(
"Work size index out of bounds"));
776 local_work_size_[index] = s;
785 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_KERNEL)
786 std::cout <<
"ViennaCL: Setting global work size to " << s <<
" at index " << index <<
" for kernel " << name_ << std::endl;
788 assert(index < 3 &&
bool(
"Work size index out of bounds"));
789 global_work_size_[index] = s;
792 std::string
const &
name()
const {
return name_; }
800 inline void set_work_size_defaults();
814 template<cl_kernel_info param>
817 typedef typename detail::return_type<cl_kernel, param>::Result res_t;
818 return detail::get_info_impl<res_t>()(k.handle_.
get(),param);
826 template<cl_kernel_info param>
829 typedef typename detail::return_type<cl_kernel, param>::Result res_t;
830 return detail::get_info_impl<res_t>()(k.handle_.
get(),d.
id(),param);
void arg(unsigned int pos, packed_cl_uint val)
Sets four packed unsigned integers as argument at the provided position.
Definition: kernel.hpp:175
vcl_size_t size() const
Returns size in bytes.
Definition: local_mem.hpp:39
void arg(unsigned int pos, cl_char val)
Sets a char argument at the provided position.
Definition: kernel.hpp:124
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10)
Convenience function for setting eleven kernel parameters.
Definition: kernel.hpp:362
void arg(unsigned int pos, const local_mem &mem)
Sets an OpenCL local memory object at the provided position.
Definition: kernel.hpp:265
This file provides the forward declarations for the OpenCL layer of ViennaCL.
std::size_t vcl_size_t
Definition: forwards.h:58
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:46
Represents an OpenCL device within ViennaCL.
friend void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12)
Convenience function for setting thirteen kernel parameters.
Definition: kernel.hpp:383
detail::return_type< cl_device_id, param >::Result info(cl_device_id const &handle)
Definition: infos.hpp:231
vcl_size_t size_type
Definition: kernel.hpp:72
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
std::string const & name() const
Definition: kernel.hpp:792
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2)
Convenience function for setting three kernel parameters.
Definition: kernel.hpp:295
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21)
Convenience function for setting twentytwo kernel parameters.
Definition: kernel.hpp:520
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20)
Convenience function for setting twentyone kernel parameters.
Definition: kernel.hpp:502
viennacl::ocl::context const & context() const
Definition: kernel.hpp:796
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:49
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29, T30 const &t30, T31 const &t31)
Convenience function for setting 32 kernel parameters.
Definition: kernel.hpp:726
A class representing a command queue.
Definition: command_queue.hpp:45
void local_work_size(int index, size_type s)
Sets the local work size at the respective dimension.
Definition: kernel.hpp:770
void arg(unsigned int pos, cl_int val)
Sets an int argument at the provided position.
Definition: kernel.hpp:205
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25)
Convenience function for setting 26 kernel parameters.
Definition: kernel.hpp:596
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5)
Convenience function for setting six kernel parameters.
Definition: kernel.hpp:319
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18)
Convenience function for setting nineteen kernel parameters.
Definition: kernel.hpp:466
kernel & operator()(T0 const &t0)
Convenience function for setting one kernel parameter.
Definition: kernel.hpp:279
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23)
Convenience function for setting 24 kernel parameters.
Definition: kernel.hpp:556
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:655
void arg(unsigned int pos, cl_ulong val)
Sets an unsigned long argument at the provided position.
Definition: kernel.hpp:215
void arg(unsigned int pos, VCL_TYPE const &val)
Sets an OpenCL memory object at the provided position.
Definition: kernel.hpp:237
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27)
Convenience function for setting 28 kernel parameters.
Definition: kernel.hpp:638
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Implementation of convenience functions to get infos.
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28)
Convenience function for setting 29 kernel parameters.
Definition: kernel.hpp:659
const OCL_TYPE & get() const
Definition: handle.hpp:189
cl_device_id id() const
Returns the OpenCL device id.
Definition: device.hpp:981
Implements an OpenCL program class for ViennaCL.
void arg(unsigned int pos, cl_ushort val)
Sets a argument of type unsigned short at the provided position.
Definition: kernel.hpp:154
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9)
Convenience function for setting ten kernel parameters.
Definition: kernel.hpp:352
friend detail::return_type< cl_kernel, param >::Result info(viennacl::ocl::kernel &k)
Queries information about a kernel.
Definition: kernel.hpp:815
kernel(kernel const &other)
Definition: kernel.hpp:90
Implementation of a smart-pointer-like class for handling OpenCL handles.
kernel()
Definition: kernel.hpp:74
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29, T30 const &t30)
Convenience function for setting 31 kernel parameters.
Definition: kernel.hpp:702
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3)
Convenience function for setting four kernel parameters.
Definition: kernel.hpp:303
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22)
Convenience function for setting 23 kernel parameters.
Definition: kernel.hpp:538
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17)
Convenience function for setting eighteen kernel parameters.
Definition: kernel.hpp:451
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6)
Convenience function for setting seven kernel parameters.
Definition: kernel.hpp:327
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26)
Convenience function for setting 27 kernel parameters.
Definition: kernel.hpp:617
void arg(unsigned int pos, double val)
Sets a double precision floating point argument at the provided position.
Definition: kernel.hpp:195
Wrapper class for an OpenCL program.
Definition: program.hpp:40
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11)
Convenience function for setting twelve kernel parameters.
Definition: kernel.hpp:372
void arg(unsigned int pos, cl_short val)
Sets a argument of type short at the provided position.
Definition: kernel.hpp:144
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14)
Convenience function for setting fifteen kernel parameters.
Definition: kernel.hpp:409
viennacl::ocl::kernel & operator=(const kernel &other)
Definition: kernel.hpp:105
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15)
Convenience function for setting sixteen kernel parameters.
Definition: kernel.hpp:423
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13)
Convenience function for setting fourteen kernel parameters.
Definition: kernel.hpp:395
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16)
Convenience function for setting seventeen kernel parameters.
Definition: kernel.hpp:437
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24)
Convenience function for setting 25 kernel parameters.
Definition: kernel.hpp:575
void arg(unsigned int pos, viennacl::ocl::handle< CL_TYPE > const &h)
Sets an OpenCL object at the provided position.
Definition: kernel.hpp:252
void global_work_size(int index, size_type s)
Sets the global work size at the respective dimension.
Definition: kernel.hpp:783
void arg(unsigned int pos, cl_uint val)
Sets an unsigned integer argument at the provided position.
Definition: kernel.hpp:165
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19)
Convenience function for setting twenty kernel parameters.
Definition: kernel.hpp:484
void arg(unsigned int pos, cl_long val)
Sets an unsigned long argument at the provided position.
Definition: kernel.hpp:225
cl_uint stride
Increment between integers.
Definition: kernel.hpp:51
void arg(unsigned int pos, cl_uchar val)
Sets a char argument at the provided position.
Definition: kernel.hpp:134
kernel(cl_kernel kernel_handle, viennacl::ocl::program const &kernel_program, viennacl::ocl::context const &kernel_context, std::string const &name)
Definition: kernel.hpp:81
void arg(unsigned int pos, float val)
Sets a single precision floating point argument at the provided position.
Definition: kernel.hpp:185
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8)
Convenience function for setting nine kernel parameters.
Definition: kernel.hpp:343
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4)
Convenience function for setting five kernel parameters.
Definition: kernel.hpp:311
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:53
viennacl::ocl::handle< cl_kernel > const & handle() const
Definition: kernel.hpp:794
A local (shared) memory object for OpenCL.
kernel & operator()(T0 const &t0, T1 const &t1)
Convenience function for setting two kernel parameters.
Definition: kernel.hpp:287
Handle class the effectively represents a smart pointer for OpenCL handles.
Definition: forwards.h:51
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7)
Convenience function for setting eight kernel parameters.
Definition: kernel.hpp:335
kernel & operator()(T0 const &t0, T1 const &t1, T2 const &t2, T3 const &t3, T4 const &t4, T5 const &t5, T6 const &t6, T7 const &t7, T8 const &t8, T9 const &t9, T10 const &t10, T11 const &t11, T12 const &t12, T13 const &t13, T14 const &t14, T15 const &t15, T16 const &t16, T17 const &t17, T18 const &t18, T19 const &t19, T20 const &t20, T21 const &t21, T22 const &t22, T23 const &t23, T24 const &t24, T25 const &t25, T26 const &t26, T27 const &t27, T28 const &t28, T29 const &t29)
Convenience function for setting 30 kernel parameters.
Definition: kernel.hpp:680
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:55