1 #ifndef VIENNACL_OCL_DEVICE_HPP_
2 #define VIENNACL_OCL_DEVICE_HPP_
26 #include <OpenCL/cl.h>
52 explicit device() : device_(0) { flush_cache(); }
54 explicit device(cl_device_id dev) : device_(dev)
56 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
57 std::cout <<
"ViennaCL: Creating device object (CTOR with cl_device_id)" << std::endl;
64 #if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE)
65 std::cout <<
"ViennaCL: Creating device object (Copy CTOR)" << std::endl;
67 if (device_ != other.device_)
69 device_ = other.device_;
77 if (!address_bits_valid_)
79 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ADDRESS_BITS,
sizeof(cl_uint), static_cast<void *>(&address_bits_), NULL);
81 address_bits_valid_ =
true;
89 if (!available_valid_)
91 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_AVAILABLE,
sizeof(cl_bool), static_cast<void *>(&available_), NULL);
93 available_valid_ =
true;
101 if (!compiler_available_valid_)
103 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_COMPILER_AVAILABLE ,
sizeof(cl_bool), static_cast<void *>(&compiler_available_), NULL);
105 compiler_available_valid_ =
true;
107 return compiler_available_;
110 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
124 cl_device_fp_config double_fp_config()
const
126 if (!double_fp_config_valid_)
128 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_DOUBLE_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&double_fp_config_), NULL);
130 double_fp_config_valid_ =
true;
132 return double_fp_config_;
139 if (!endian_little_valid_)
141 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ENDIAN_LITTLE,
sizeof(cl_bool), static_cast<void *>(&endian_little_), NULL);
143 endian_little_valid_ =
true;
145 return endian_little_;
151 if (!error_correction_support_valid_)
153 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_ERROR_CORRECTION_SUPPORT ,
sizeof(cl_bool), static_cast<void *>(&error_correction_support_), NULL);
155 error_correction_support_valid_ =
true;
157 return error_correction_support_;
169 if (!execution_capabilities_valid_)
171 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXECUTION_CAPABILITIES ,
sizeof(cl_device_exec_capabilities), static_cast<void *>(&execution_capabilities_), NULL);
173 execution_capabilities_valid_ =
true;
175 return execution_capabilities_;
191 if (!extensions_valid_)
193 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_EXTENSIONS,
sizeof(
char) * 2048, static_cast<void *>(&extensions_), NULL);
195 extensions_valid_ =
true;
203 if (!global_mem_cache_size_valid_)
205 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
sizeof(cl_ulong), static_cast<void *>(&global_mem_cache_size_), NULL);
207 global_mem_cache_size_valid_ =
true;
209 return global_mem_cache_size_;
215 if (!global_mem_cache_type_valid_)
217 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
sizeof(cl_device_mem_cache_type), static_cast<void *>(&global_mem_cache_type_), NULL);
219 global_mem_cache_type_valid_ =
true;
221 return global_mem_cache_type_;
227 if (!global_mem_cacheline_size_valid_)
229 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
sizeof(cl_uint), static_cast<void *>(&global_mem_cacheline_size_), NULL);
231 global_mem_cacheline_size_valid_ =
true;
233 return global_mem_cacheline_size_;
239 if (!global_mem_size_valid_)
241 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(cl_ulong), static_cast<void *>(&global_mem_size_), NULL);
243 global_mem_size_valid_ =
true;
245 return global_mem_size_;
248 #ifdef CL_DEVICE_HALF_FP_CONFIG
261 cl_device_fp_config half_fp_config()
const
263 if (!half_fp_config_valid_)
265 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HALF_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&half_fp_config_), NULL);
267 half_fp_config_valid_ =
true;
269 return half_fp_config_;
276 if (!host_unified_memory_valid_)
278 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_HOST_UNIFIED_MEMORY,
sizeof(cl_bool), static_cast<void *>(&host_unified_memory_), NULL);
280 host_unified_memory_valid_ =
true;
282 return host_unified_memory_;
288 if (!image_support_valid_)
290 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE_SUPPORT,
sizeof(cl_bool), static_cast<void *>(&image_support_), NULL);
292 image_support_valid_ =
true;
294 return image_support_;
300 if (!image2d_max_height_valid_)
302 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
sizeof(
size_t), static_cast<void *>(&image2d_max_height_), NULL);
304 image2d_max_height_valid_ =
true;
306 return image2d_max_height_;
312 if (!image2d_max_width_valid_)
314 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE2D_MAX_WIDTH,
sizeof(
size_t), static_cast<void *>(&image2d_max_width_), NULL);
316 image2d_max_width_valid_ =
true;
318 return image2d_max_width_;
324 if (!image3d_max_depth_valid_)
326 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_DEPTH,
sizeof(
size_t), static_cast<void *>(&image3d_max_depth_), NULL);
328 image3d_max_depth_valid_ =
true;
330 return image3d_max_depth_;
336 if (!image3d_max_height_valid_)
338 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
sizeof(
size_t), static_cast<void *>(&image3d_max_height_), NULL);
340 image3d_max_height_valid_ =
true;
342 return image3d_max_height_;
348 if (!image3d_max_width_valid_)
350 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_IMAGE3D_MAX_WIDTH,
sizeof(
size_t), static_cast<void *>(&image3d_max_width_), NULL);
352 image3d_max_width_valid_ =
true;
354 return image3d_max_width_;
360 if (!local_mem_size_valid_)
362 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(cl_ulong), static_cast<void *>(&local_mem_size_), NULL);
364 local_mem_size_valid_ =
true;
366 return local_mem_size_;
372 if (!local_mem_type_valid_)
374 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_LOCAL_MEM_TYPE,
sizeof(cl_device_local_mem_type), static_cast<void *>(&local_mem_type_), NULL);
376 local_mem_type_valid_ =
true;
378 return local_mem_type_;
384 if (!max_clock_frequency_valid_)
386 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(cl_uint), static_cast<void *>(&max_clock_frequency_), NULL);
388 max_clock_frequency_valid_ =
true;
390 return max_clock_frequency_;
396 if (!max_compute_units_valid_)
398 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), static_cast<void *>(&max_compute_units_), NULL);
400 max_compute_units_valid_ =
true;
402 return max_compute_units_;
408 if (!max_constant_args_valid_)
410 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_constant_args_), NULL);
412 max_constant_args_valid_ =
true;
414 return max_constant_args_;
420 if (!max_constant_buffer_size_valid_)
422 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(cl_ulong), static_cast<void *>(&max_constant_buffer_size_), NULL);
424 max_constant_buffer_size_valid_ =
true;
426 return max_constant_buffer_size_;
432 if (!max_mem_alloc_size_valid_)
434 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_ulong), static_cast<void *>(&max_mem_alloc_size_), NULL);
436 max_mem_alloc_size_valid_ =
true;
438 return max_mem_alloc_size_;
447 if (!max_parameter_size_valid_)
449 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_PARAMETER_SIZE,
sizeof(
size_t), static_cast<void *>(&max_parameter_size_), NULL);
451 max_parameter_size_valid_ =
true;
453 return max_parameter_size_;
459 if (!max_read_image_args_valid_)
461 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_READ_IMAGE_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_read_image_args_), NULL);
463 max_read_image_args_valid_ =
true;
465 return max_read_image_args_;
471 if (!max_samplers_valid_)
473 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_SAMPLERS,
sizeof(cl_uint), static_cast<void *>(&max_samplers_), NULL);
475 max_samplers_valid_ =
true;
477 return max_samplers_;
483 if (!max_work_group_size_valid_)
485 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(
size_t), static_cast<void *>(&max_work_group_size_), NULL);
487 max_work_group_size_valid_ =
true;
489 return max_work_group_size_;
495 if (!max_work_item_dimensions_valid_)
497 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(cl_uint), static_cast<void *>(&max_work_item_dimensions_), NULL);
499 max_work_item_dimensions_valid_ =
true;
501 return max_work_item_dimensions_;
512 assert(result.size() < 16 && bool(
"Supported work item dimensions exceed available capacity!"));
514 if (!max_work_item_sizes_valid_)
516 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(
size_t) * 16, static_cast<void *>(&max_work_item_sizes_), NULL);
518 max_work_item_sizes_valid_ =
true;
522 result[i] = max_work_item_sizes_[i];
530 if (!max_write_image_args_valid_)
532 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
sizeof(cl_uint), static_cast<void *>(&max_write_image_args_), NULL);
534 max_write_image_args_valid_ =
true;
536 return max_write_image_args_;
542 if (!mem_base_addr_align_valid_)
544 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(cl_uint), static_cast<void *>(&mem_base_addr_align_), NULL);
546 mem_base_addr_align_valid_ =
true;
548 return mem_base_addr_align_;
554 if (!min_data_type_align_size_valid_)
556 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
sizeof(cl_uint), static_cast<void *>(&min_data_type_align_size_), NULL);
558 min_data_type_align_size_valid_ =
true;
560 return min_data_type_align_size_;
568 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NAME,
sizeof(
char) * 256, static_cast<void *>(name_), NULL);
578 if( !architecture_family_valid_)
580 architecture_family_ = get_device_architecture(
vendor_id(),
name());
581 architecture_family_valid_ =
true;
583 return architecture_family_;
589 if (!native_vector_width_char_valid_)
591 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_char_), NULL);
593 native_vector_width_char_valid_ =
true;
595 return native_vector_width_char_;
601 if (!native_vector_width_short_valid_)
603 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_short_), NULL);
605 native_vector_width_short_valid_ =
true;
607 return native_vector_width_short_;
613 if (!native_vector_width_int_valid_)
615 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_int_), NULL);
617 native_vector_width_int_valid_ =
true;
619 return native_vector_width_int_;
625 if (!native_vector_width_long_valid_)
627 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_long_), NULL);
629 native_vector_width_long_valid_ =
true;
631 return native_vector_width_long_;
637 if (!native_vector_width_float_valid_)
639 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_float_), NULL);
641 native_vector_width_float_valid_ =
true;
643 return native_vector_width_float_;
652 if (!native_vector_width_double_valid_)
654 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_double_), NULL);
656 native_vector_width_double_valid_ =
true;
658 return native_vector_width_double_;
667 if (!native_vector_width_half_valid_)
669 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,
sizeof(cl_uint), static_cast<void *>(&native_vector_width_half_), NULL);
671 native_vector_width_half_valid_ =
true;
673 return native_vector_width_half_;
686 if (!opencl_c_version_valid_)
688 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_OPENCL_C_VERSION,
sizeof(
char) * 128, static_cast<void *>(opencl_c_version_), NULL);
690 opencl_c_version_valid_ =
true;
692 return opencl_c_version_;
698 if (!platform_valid_)
700 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PLATFORM,
sizeof(cl_platform_id), static_cast<void *>(&platform_), NULL);
702 platform_valid_ =
true;
710 if (!preferred_vector_width_char_valid_)
712 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_char_), NULL);
714 preferred_vector_width_char_valid_ =
true;
716 return preferred_vector_width_char_;
722 if (!preferred_vector_width_short_valid_)
724 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_short_), NULL);
726 preferred_vector_width_short_valid_ =
true;
728 return preferred_vector_width_short_;
734 if (!preferred_vector_width_int_valid_)
736 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_int_), NULL);
738 preferred_vector_width_int_valid_ =
true;
740 return preferred_vector_width_int_;
746 if (!preferred_vector_width_long_valid_)
748 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_long_), NULL);
750 preferred_vector_width_long_valid_ =
true;
752 return preferred_vector_width_long_;
758 if (!preferred_vector_width_float_valid_)
760 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_float_), NULL);
762 preferred_vector_width_float_valid_ =
true;
764 return preferred_vector_width_float_;
773 if (!preferred_vector_width_double_valid_)
775 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_double_), NULL);
777 preferred_vector_width_double_valid_ =
true;
779 return preferred_vector_width_double_;
788 if (!preferred_vector_width_half_valid_)
790 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,
sizeof(cl_uint), static_cast<void *>(&preferred_vector_width_half_), NULL);
792 preferred_vector_width_half_valid_ =
true;
794 return preferred_vector_width_half_;
807 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILE,
sizeof(
char) * 32, static_cast<void *>(profile_), NULL);
809 profile_valid_ =
true;
817 if (!profiling_timer_resolution_valid_)
819 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_PROFILING_TIMER_RESOLUTION,
sizeof(
size_t), static_cast<void *>(&profiling_timer_resolution_), NULL);
821 profiling_timer_resolution_valid_ =
true;
823 return profiling_timer_resolution_;
836 if (!queue_properties_valid_)
838 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_QUEUE_PROPERTIES,
sizeof(cl_command_queue_properties), static_cast<void *>(&queue_properties_), NULL);
840 queue_properties_valid_ =
true;
842 return queue_properties_;
860 if (!single_fp_config_valid_)
862 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(cl_device_fp_config), static_cast<void *>(&single_fp_config_), NULL);
864 single_fp_config_valid_ =
true;
866 return single_fp_config_;
877 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_TYPE,
sizeof(cl_device_type), static_cast<void *>(&type_), NULL);
889 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR,
sizeof(
char) * 256, static_cast<void *>(vendor_), NULL);
891 vendor_valid_ =
true;
899 if (!vendor_id_valid_)
901 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VENDOR_ID,
sizeof(cl_uint), static_cast<void *>(&vendor_id_), NULL);
903 vendor_id_valid_ =
true;
913 cl_int err = clGetDeviceInfo(device_, CL_DEVICE_VERSION,
sizeof(
char) * 256, static_cast<void *>(version_), NULL);
915 version_valid_ =
true;
923 if (!driver_version_valid_)
925 cl_int err = clGetDeviceInfo(device_, CL_DRIVER_VERSION,
sizeof(
char) * 256, static_cast<void *>(driver_version_), NULL);
927 driver_version_valid_ =
true;
929 return driver_version_;
940 if (ext.find(
"cl_khr_fp64") != std::string::npos || ext.find(
"cl_amd_fp64") != std::string::npos)
951 if (ext.find(
"cl_amd_fp64") != std::string::npos)
952 return "cl_amd_fp64";
954 if (ext.find(
"cl_khr_fp64") != std::string::npos)
955 return "cl_khr_fp64";
961 cl_device_id
id()
const
963 assert(device_ != 0 &&
bool(
"Device ID invalid!"));
977 std::string line_indent(indent, indent_char);
978 std::ostringstream oss;
979 oss << line_indent <<
"Name: " <<
name() << std::endl;
980 oss << line_indent <<
"Vendor: " <<
vendor() << std::endl;
981 oss << line_indent <<
"Type: " << device_type_to_string(
type()) << std::endl;
982 oss << line_indent <<
"Available: " <<
available() << std::endl;
985 oss << line_indent <<
"Global Mem Size: " <<
global_mem_size() << std::endl;
986 oss << line_indent <<
"Local Mem Size: " <<
local_mem_size() << std::endl;
987 oss << line_indent <<
"Local Mem Type: " <<
local_mem_type() << std::endl;
1000 std::string line_indent(indent, indent_char);
1001 std::ostringstream oss;
1002 oss << line_indent <<
"Address Bits: " <<
address_bits() << std::endl;
1003 oss << line_indent <<
"Available: " <<
available() << std::endl;
1005 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1006 oss << line_indent <<
"Double FP Config: " << fp_config_to_string(double_fp_config()) << std::endl;
1008 oss << line_indent <<
"Endian Little: " <<
endian_little() << std::endl;
1010 oss << line_indent <<
"Execution Capabilities: " << exec_capabilities_to_string(
execution_capabilities()) << std::endl;
1011 oss << line_indent <<
"Extensions: " <<
extensions() << std::endl;
1012 oss << line_indent <<
"Global Mem Cache Size: " <<
global_mem_cache_size() <<
" Bytes" << std::endl;
1013 oss << line_indent <<
"Global Mem Cache Type: " << mem_cache_type_to_string(
global_mem_cache_type()) << std::endl;
1015 oss << line_indent <<
"Global Mem Size: " <<
global_mem_size() <<
" Bytes" << std::endl;
1016 #ifdef CL_DEVICE_HALF_FP_CONFIG
1017 oss << line_indent <<
"Half PF Config: " << fp_config_to_string(half_fp_config()) << std::endl;
1020 oss << line_indent <<
"Image Support: " <<
image_support() << std::endl;
1022 oss << line_indent <<
"Image2D Max Width: " <<
image2d_max_width() << std::endl;
1023 oss << line_indent <<
"Image3D Max Depth: " <<
image3d_max_depth() << std::endl;
1025 oss << line_indent <<
"Image3D Max Width: " <<
image3d_max_width() << std::endl;
1026 oss << line_indent <<
"Local Mem Size: " <<
local_mem_size() <<
" Bytes" << std::endl;
1027 oss << line_indent <<
"Local Mem Type: " << local_mem_type_to_string(
local_mem_type()) << std::endl;
1028 oss << line_indent <<
"Max Clock Frequency: " <<
max_clock_frequency() <<
" MHz" << std::endl;
1029 oss << line_indent <<
"Max Compute Units: " <<
max_compute_units() << std::endl;
1030 oss << line_indent <<
"Max Constant Args: " <<
max_constant_args() << std::endl;
1032 oss << line_indent <<
"Max Mem Alloc Size: " <<
max_mem_alloc_size() <<
" Bytes" << std::endl;
1033 oss << line_indent <<
"Max Parameter Size: " <<
max_parameter_size() <<
" Bytes" << std::endl;
1035 oss << line_indent <<
"Max Samplers: " <<
max_samplers() << std::endl;
1038 oss << line_indent <<
"Max Work Item Sizes: " << convert_to_string(
max_work_item_sizes()) << std::endl;
1042 oss << line_indent <<
"Name: " <<
name() << std::endl;
1050 oss << line_indent <<
"OpenCL C Version: " <<
opencl_c_version() << std::endl;
1051 oss << line_indent <<
"Platform: " <<
platform() << std::endl;
1059 oss << line_indent <<
"Profile: " <<
profile() << std::endl;
1061 oss << line_indent <<
"Queue Properties: " << queue_properties_to_string(
queue_properties()) << std::endl;
1062 oss << line_indent <<
"Single FP Config: " << fp_config_to_string(
single_fp_config()) << std::endl;
1063 oss << line_indent <<
"Type: " << device_type_to_string(
type()) << std::endl;
1064 oss << line_indent <<
"Vendor: " <<
vendor() << std::endl;
1065 oss << line_indent <<
"Vendor ID: " <<
vendor_id() << std::endl;
1066 oss << line_indent <<
"Version: " <<
version() << std::endl;
1067 oss << line_indent <<
"Driver Version: " <<
driver_version() << std::endl;
1074 return device_ == other.device_;
1079 return device_ == other;
1085 std::string fp_config_to_string(cl_device_fp_config conf)
const
1087 std::ostringstream oss;
1088 if (conf & CL_FP_DENORM)
1089 oss <<
"CL_FP_DENORM ";
1090 if (conf & CL_FP_INF_NAN)
1091 oss <<
"CL_FP_INF_NAN ";
1092 if (conf & CL_FP_ROUND_TO_NEAREST)
1093 oss <<
"CL_FP_ROUND_TO_NEAREST ";
1094 if (conf & CL_FP_ROUND_TO_ZERO)
1095 oss <<
"CL_FP_ROUND_TO_ZERO ";
1096 if (conf & CL_FP_ROUND_TO_INF)
1097 oss <<
"CL_FP_ROUND_TO_INF ";
1098 if (conf & CL_FP_FMA)
1099 oss <<
"CL_FP_FMA ";
1100 if (conf & CL_FP_SOFT_FLOAT)
1101 oss <<
"CL_FP_SOFT_FLOAT ";
1106 std::string exec_capabilities_to_string(cl_device_exec_capabilities cap)
const
1108 std::ostringstream oss;
1109 if (cap & CL_EXEC_KERNEL)
1110 oss <<
"CL_EXEC_KERNEL ";
1111 if (cap & CL_EXEC_NATIVE_KERNEL)
1112 oss <<
"CL_EXEC_NATIVE_KERNEL ";
1117 std::string mem_cache_type_to_string(cl_device_mem_cache_type cachetype)
const
1119 std::ostringstream oss;
1120 if (cachetype == CL_NONE)
1122 else if (cachetype == CL_READ_ONLY_CACHE)
1123 oss <<
"CL_READ_ONLY_CACHE ";
1124 else if (cachetype == CL_READ_WRITE_CACHE)
1125 oss <<
"CL_READ_WRITE_CACHE ";
1130 std::string local_mem_type_to_string(cl_device_local_mem_type loc_mem_type)
const
1132 std::ostringstream oss;
1133 if (loc_mem_type & CL_LOCAL)
1135 if (loc_mem_type & CL_GLOBAL)
1136 oss <<
"CL_GLOBAL ";
1141 std::string convert_to_string(std::vector<size_t>
const & vec)
const
1143 std::ostringstream oss;
1145 oss << vec[i] <<
" ";
1150 std::string queue_properties_to_string(cl_command_queue_properties queue_prop)
const
1152 std::ostringstream oss;
1153 if (queue_prop & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
1154 oss <<
"CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ";
1155 if (queue_prop & CL_QUEUE_PROFILING_ENABLE)
1156 oss <<
"CL_QUEUE_PROFILING_ENABLE ";
1161 std::string device_type_to_string(cl_device_type dev_type)
const
1163 std::ostringstream oss;
1164 if (dev_type & CL_DEVICE_TYPE_GPU)
1166 if (dev_type & CL_DEVICE_TYPE_CPU)
1168 if (dev_type & CL_DEVICE_TYPE_ACCELERATOR)
1169 oss <<
"Accelerator ";
1170 if (dev_type & CL_DEVICE_TYPE_DEFAULT)
1178 address_bits_valid_ =
false;
1179 architecture_family_valid_ =
false;
1180 available_valid_ =
false;
1181 compiler_available_valid_ =
false;
1182 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1183 double_fp_config_valid_ =
false;
1185 endian_little_valid_ =
false;
1186 error_correction_support_valid_ =
false;
1187 execution_capabilities_valid_ =
false;
1188 extensions_valid_ =
false;
1189 global_mem_cache_size_valid_ =
false;
1190 global_mem_cache_type_valid_ =
false;
1191 global_mem_cacheline_size_valid_ =
false;
1192 global_mem_size_valid_ =
false;
1193 #ifdef CL_DEVICE_HALF_FP_CONFIG
1194 half_fp_config_valid_ =
false;
1196 host_unified_memory_valid_ =
false;
1197 image_support_valid_ =
false;
1198 image2d_max_height_valid_ =
false;
1199 image2d_max_width_valid_ =
false;
1200 image3d_max_depth_valid_ =
false;
1201 image3d_max_height_valid_ =
false;
1202 image3d_max_width_valid_ =
false;
1203 local_mem_size_valid_ =
false;
1204 local_mem_type_valid_ =
false;
1205 max_clock_frequency_valid_ =
false;
1206 max_compute_units_valid_ =
false;
1207 max_constant_args_valid_ =
false;
1208 max_constant_buffer_size_valid_ =
false;
1209 max_mem_alloc_size_valid_ =
false;
1210 max_parameter_size_valid_ =
false;
1211 max_read_image_args_valid_ =
false;
1212 max_samplers_valid_ =
false;
1213 max_work_group_size_valid_ =
false;
1214 max_work_item_dimensions_valid_ =
false;
1215 max_work_item_sizes_valid_ =
false;
1216 max_write_image_args_valid_ =
false;
1217 mem_base_addr_align_valid_ =
false;
1218 min_data_type_align_size_valid_ =
false;
1219 name_valid_ =
false;
1220 native_vector_width_char_valid_ =
false;
1221 native_vector_width_short_valid_ =
false;
1222 native_vector_width_int_valid_ =
false;
1223 native_vector_width_long_valid_ =
false;
1224 native_vector_width_float_valid_ =
false;
1225 native_vector_width_double_valid_ =
false;
1226 native_vector_width_half_valid_ =
false;
1227 opencl_c_version_valid_ =
false;
1228 platform_valid_ =
false;
1229 preferred_vector_width_char_valid_ =
false;
1230 preferred_vector_width_short_valid_ =
false;
1231 preferred_vector_width_int_valid_ =
false;
1232 preferred_vector_width_long_valid_ =
false;
1233 preferred_vector_width_float_valid_ =
false;
1234 preferred_vector_width_double_valid_ =
false;
1235 preferred_vector_width_half_valid_ =
false;
1236 profile_valid_ =
false;
1237 profiling_timer_resolution_valid_ =
false;
1238 queue_properties_valid_ =
false;
1239 single_fp_config_valid_ =
false;
1240 type_valid_ =
false;
1241 vendor_valid_ =
false;
1242 vendor_id_valid_ =
false;
1243 version_valid_ =
false;
1244 driver_version_valid_ =
false;
1247 cl_device_id device_;
1255 mutable bool address_bits_valid_;
1256 mutable cl_uint address_bits_;
1258 mutable bool available_valid_;
1259 mutable cl_bool available_;
1261 mutable bool compiler_available_valid_;
1262 mutable cl_bool compiler_available_;
1264 #ifdef CL_DEVICE_DOUBLE_FP_CONFIG
1265 mutable bool double_fp_config_valid_;
1266 mutable cl_device_fp_config double_fp_config_;
1269 mutable bool endian_little_valid_;
1270 mutable cl_bool endian_little_;
1272 mutable bool error_correction_support_valid_;
1273 mutable cl_bool error_correction_support_;
1275 mutable bool execution_capabilities_valid_;
1276 mutable cl_device_exec_capabilities execution_capabilities_;
1278 mutable bool extensions_valid_;
1279 mutable char extensions_[2048];
1281 mutable bool global_mem_cache_size_valid_;
1282 mutable cl_ulong global_mem_cache_size_;
1284 mutable bool global_mem_cache_type_valid_;
1285 mutable cl_device_mem_cache_type global_mem_cache_type_;
1287 mutable bool global_mem_cacheline_size_valid_;
1288 mutable cl_uint global_mem_cacheline_size_;
1290 mutable bool global_mem_size_valid_;
1291 mutable cl_ulong global_mem_size_;
1293 #ifdef CL_DEVICE_HALF_FP_CONFIG
1294 mutable bool half_fp_config_valid_;
1295 mutable cl_device_fp_config half_fp_config_;
1298 mutable bool host_unified_memory_valid_;
1299 mutable cl_bool host_unified_memory_;
1301 mutable bool image_support_valid_;
1302 mutable cl_bool image_support_;
1304 mutable bool image2d_max_height_valid_;
1305 mutable size_t image2d_max_height_;
1307 mutable bool image2d_max_width_valid_;
1308 mutable size_t image2d_max_width_;
1310 mutable bool image3d_max_depth_valid_;
1311 mutable size_t image3d_max_depth_;
1313 mutable bool image3d_max_height_valid_;
1314 mutable size_t image3d_max_height_;
1316 mutable bool image3d_max_width_valid_;
1317 mutable size_t image3d_max_width_;
1319 mutable bool local_mem_size_valid_;
1320 mutable cl_ulong local_mem_size_;
1322 mutable bool local_mem_type_valid_;
1323 mutable cl_device_local_mem_type local_mem_type_;
1325 mutable bool max_clock_frequency_valid_;
1326 mutable cl_uint max_clock_frequency_;
1328 mutable bool max_compute_units_valid_;
1329 mutable cl_uint max_compute_units_;
1331 mutable bool max_constant_args_valid_;
1332 mutable cl_uint max_constant_args_;
1334 mutable bool max_constant_buffer_size_valid_;
1335 mutable cl_ulong max_constant_buffer_size_;
1337 mutable bool max_mem_alloc_size_valid_;
1338 mutable cl_ulong max_mem_alloc_size_;
1340 mutable bool max_parameter_size_valid_;
1341 mutable size_t max_parameter_size_;
1343 mutable bool max_read_image_args_valid_;
1344 mutable cl_uint max_read_image_args_;
1346 mutable bool max_samplers_valid_;
1347 mutable cl_uint max_samplers_;
1349 mutable bool max_work_group_size_valid_;
1350 mutable size_t max_work_group_size_;
1352 mutable bool max_work_item_dimensions_valid_;
1353 mutable cl_uint max_work_item_dimensions_;
1355 mutable bool max_work_item_sizes_valid_;
1356 mutable size_t max_work_item_sizes_[16];
1358 mutable bool max_write_image_args_valid_;
1359 mutable cl_uint max_write_image_args_;
1361 mutable bool mem_base_addr_align_valid_;
1362 mutable cl_uint mem_base_addr_align_;
1364 mutable bool min_data_type_align_size_valid_;
1365 mutable cl_uint min_data_type_align_size_;
1367 mutable bool name_valid_;
1368 mutable char name_[256];
1370 mutable bool native_vector_width_char_valid_;
1371 mutable cl_uint native_vector_width_char_;
1373 mutable bool native_vector_width_short_valid_;
1374 mutable cl_uint native_vector_width_short_;
1376 mutable bool native_vector_width_int_valid_;
1377 mutable cl_uint native_vector_width_int_;
1379 mutable bool native_vector_width_long_valid_;
1380 mutable cl_uint native_vector_width_long_;
1382 mutable bool native_vector_width_float_valid_;
1383 mutable cl_uint native_vector_width_float_;
1385 mutable bool native_vector_width_double_valid_;
1386 mutable cl_uint native_vector_width_double_;
1388 mutable bool native_vector_width_half_valid_;
1389 mutable cl_uint native_vector_width_half_;
1391 mutable bool opencl_c_version_valid_;
1392 mutable char opencl_c_version_[128];
1394 mutable bool platform_valid_;
1395 mutable cl_platform_id platform_;
1397 mutable bool preferred_vector_width_char_valid_;
1398 mutable cl_uint preferred_vector_width_char_;
1400 mutable bool preferred_vector_width_short_valid_;
1401 mutable cl_uint preferred_vector_width_short_;
1403 mutable bool preferred_vector_width_int_valid_;
1404 mutable cl_uint preferred_vector_width_int_;
1406 mutable bool preferred_vector_width_long_valid_;
1407 mutable cl_uint preferred_vector_width_long_;
1409 mutable bool preferred_vector_width_float_valid_;
1410 mutable cl_uint preferred_vector_width_float_;
1412 mutable bool preferred_vector_width_double_valid_;
1413 mutable cl_uint preferred_vector_width_double_;
1415 mutable bool preferred_vector_width_half_valid_;
1416 mutable cl_uint preferred_vector_width_half_;
1418 mutable bool profile_valid_;
1419 mutable char profile_[32];
1421 mutable bool profiling_timer_resolution_valid_;
1422 mutable size_t profiling_timer_resolution_;
1424 mutable bool queue_properties_valid_;
1425 mutable cl_command_queue_properties queue_properties_;
1427 mutable bool single_fp_config_valid_;
1428 mutable cl_device_fp_config single_fp_config_;
1430 mutable bool type_valid_;
1431 mutable cl_device_type type_;
1433 mutable bool vendor_valid_;
1434 mutable char vendor_[256];
1436 mutable bool vendor_id_valid_;
1437 mutable cl_uint vendor_id_;
1439 mutable bool version_valid_;
1440 mutable char version_[256];
1442 mutable bool driver_version_valid_;
1443 mutable char driver_version_[256];
1445 mutable bool architecture_family_valid_;
std::string name() const
Device name string.
Definition: device.hpp:564
cl_uint address_bits() const
The default compute device address space size specified as an unsigned integer value in bits...
Definition: device.hpp:75
cl_ulong local_mem_size() const
Size of local memory arena in bytes. The minimum value is 32 KB.
Definition: device.hpp:358
std::size_t vcl_size_t
Definition: forwards.h:58
device()
Definition: device.hpp:52
size_t image3d_max_width() const
Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:346
cl_bool error_correction_support() const
Is CL_TRUE if the device implements error correction for all accesses to compute device memory (globa...
Definition: device.hpp:149
cl_uint preferred_vector_width_short() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:720
cl_uint max_samplers() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
Definition: device.hpp:469
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:481
cl_platform_id platform() const
The platform associated with this device.
Definition: device.hpp:696
size_t image2d_max_width() const
Max width of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:310
std::string vendor() const
Vendor name string.
Definition: device.hpp:885
cl_bool compiler_available() const
Is CL_FALSE if the implementation does not have a compiler available to compile the program source...
Definition: device.hpp:99
device_architecture_family architecture_family() const
Device architecture family.
Definition: device.hpp:576
cl_uint native_vector_width_double() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:650
device(const device &other)
Definition: device.hpp:62
cl_bool available() const
Is CL_TRUE if the device is available and CL_FALSE if the device is not available.
Definition: device.hpp:87
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
size_t profiling_timer_resolution() const
Describes the resolution of device timer. This is measured in nanoseconds.
Definition: device.hpp:815
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:936
cl_uint native_vector_width_int() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:611
std::string full_info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with all device properties defined in the OpenCL 1.1 standard, listed in alphabetical order. Use info() for a short overview.
Definition: device.hpp:998
cl_uint preferred_vector_width_long() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:744
cl_command_queue_properties queue_properties() const
Describes the command-queue properties supported by the device.
Definition: device.hpp:834
device_architecture_family
Definition: device_utils.hpp:51
#define VIENNACL_ERR_CHECK(err)
Definition: error.hpp:655
cl_ulong global_mem_cache_size() const
Size of global memory cache in bytes.
Definition: device.hpp:201
cl_ulong global_mem_size() const
Size of global memory in bytes.
Definition: device.hpp:237
device(cl_device_id dev)
Definition: device.hpp:54
std::string version() const
Vendor name string.
Definition: device.hpp:909
std::vector< size_t > max_work_item_sizes() const
Maximum number of work-items that can be specified in each dimension of the work-group.
Definition: device.hpp:508
std::string driver_version() const
Vendor name string.
Definition: device.hpp:921
cl_device_id id() const
Returns the OpenCL device id.
Definition: device.hpp:961
size_t image2d_max_height() const
Max height of 2D image in pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:298
cl_uint max_clock_frequency() const
Maximum configured clock frequency of the device in MHz.
Definition: device.hpp:382
cl_uint native_vector_width_short() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:599
size_t image3d_max_height() const
Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:334
cl_uint max_read_image_args() const
Max number of simultaneous image objects that can be read by a kernel. The minimum value is 128 if CL...
Definition: device.hpp:457
cl_uint native_vector_width_float() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:635
Implementation of a smart-pointer-like class for handling OpenCL handles.
bool operator==(device const &other) const
Definition: device.hpp:1072
cl_uint preferred_vector_width_float() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:756
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
Definition: device.hpp:897
Various utility implementations for dispatching with respect to the different devices available on th...
std::string opencl_c_version() const
OpenCL C version string. Returns the highest OpenCL C version supported by the compiler for this devi...
Definition: device.hpp:684
cl_bool image_support() const
Is CL_TRUE if images are supported by the OpenCL device and CL_FALSE otherwise.
Definition: device.hpp:286
cl_device_fp_config single_fp_config() const
Describes single precision floating-point capability of the OpenCL device.
Definition: device.hpp:858
cl_uint global_mem_cacheline_size() const
Size of global memory cache in bytes.
Definition: device.hpp:225
bool operator==(cl_device_id other) const
Definition: device.hpp:1077
cl_uint min_data_type_align_size() const
The smallest alignment in bytes which can be used for any data type.
Definition: device.hpp:552
cl_device_local_mem_type local_mem_type() const
Type of local memory supported. This can be set to CL_LOCAL implying dedicated local memory storage s...
Definition: device.hpp:370
std::string profile() const
OpenCL profile string. Returns the profile name supported by the device.
Definition: device.hpp:803
cl_device_exec_capabilities execution_capabilities() const
Describes the execution capabilities of the device.
Definition: device.hpp:167
cl_uint preferred_vector_width_half() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:786
Error handling for the OpenCL layer of ViennaCL.
cl_uint native_vector_width_char() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:587
cl_uint preferred_vector_width_double() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:771
size_t image3d_max_depth() const
Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE...
Definition: device.hpp:322
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:975
std::string extensions() const
Returns a space-separated list of extension names (the extension names themselves do not contain any ...
Definition: device.hpp:189
cl_uint preferred_vector_width_char() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:708
cl_bool endian_little() const
Is CL_TRUE if the OpenCL device is a little endian device and CL_FALSE otherwise. ...
Definition: device.hpp:137
size_t max_parameter_size() const
Max size in bytes of the arguments that can be passed to a kernel. The minimum value is 1024...
Definition: device.hpp:445
cl_uint max_work_item_dimensions() const
Maximum dimensions that specify the global and local work-item IDs used by the data parallel executio...
Definition: device.hpp:493
cl_bool host_unified_memory() const
Is CL_TRUE if the device and the host have a unified memory subsystem and is CL_FALSE otherwise...
Definition: device.hpp:274
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:873
cl_uint native_vector_width_long() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:623
cl_uint max_write_image_args() const
Max number of simultaneous image objects that can be written to by a kernel. The minimum value is 8 i...
Definition: device.hpp:528
cl_uint mem_base_addr_align() const
Describes the alignment in bits of the base address of any allocated memory object.
Definition: device.hpp:540
cl_ulong max_constant_buffer_size() const
Max size in bytes of a constant buffer allocation. The minimum value is 64 KB.
Definition: device.hpp:418
cl_uint native_vector_width_half() const
Returns the native ISA vector width. The vector width is defined as the number of scalar elements tha...
Definition: device.hpp:665
cl_uint max_constant_args() const
Max number of arguments declared with the __constant qualifier in a kernel. The minimum value is 8...
Definition: device.hpp:406
std::string double_support_extension() const
ViennaCL convenience function: Returns the device extension which enables double precision (usually c...
Definition: device.hpp:947
cl_device_mem_cache_type global_mem_cache_type() const
Type of global memory cache supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and CL_READ_WRITE_CACHE.
Definition: device.hpp:213
cl_uint max_compute_units() const
The number of parallel compute cores on the OpenCL device. The minimum value is 1.
Definition: device.hpp:394
cl_ulong max_mem_alloc_size() const
Max size of memory object allocation in bytes. The minimum value is max(1/4th of CL_DEVICE_GLOBAL_MEM...
Definition: device.hpp:430
cl_uint preferred_vector_width_int() const
Preferred native vector width size for built-in scalar types that can be put into vectors...
Definition: device.hpp:732