46 GGcout(
"GGEMSOpenCLManager",
"GGEMSOpenCLManager", 3) <<
"GGEMSOpenCLManager creating..." <<
GGendl;
48 GGcout(
"GGEMSOpenCLManager",
"GGEMSOpenCLManager", 1) <<
"Retrieving OpenCL platform(s)..." <<
GGendl;
52 #ifndef OPENCL_CACHE_KERNEL_COMPILATION
54 _putenv(
"CUDA_CACHE_DISABLE=1");
56 std::string disable_cache(
"CUDA_CACHE_DISABLE=1");
57 putenv(&disable_cache[0]);
62 std::string info_string(
"");
63 cl_device_type device_type;
64 cl_device_fp_config device_fp_config;
65 cl_device_exec_capabilities device_exec_capabilities;
66 cl_device_mem_cache_type device_mem_cache_type;
67 cl_device_local_mem_type device_local_mem_type;
68 cl_device_affinity_domain device_affinity_domain;
74 GGsize size_data[3] = {0, 0, 0};
78 CheckOpenCLError(p.getInfo(CL_PLATFORM_PROFILE, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
81 CheckOpenCLError(p.getInfo(CL_PLATFORM_VERSION, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
84 CheckOpenCLError(p.getInfo(CL_PLATFORM_NAME, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
87 CheckOpenCLError(p.getInfo(CL_PLATFORM_VENDOR, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
90 CheckOpenCLError(p.getInfo(CL_PLATFORM_EXTENSIONS, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
95 GGcout(
"GGEMSOpenCLManager",
"GGEMSOpenCLManager", 1) <<
"Retrieving OpenCL device(s)..." <<
GGendl;
97 std::vector<cl::Device> all_devices;
100 platforms_[i].getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
103 for (
auto& d : all_devices)
devices_.emplace_back(
new cl::Device(d));
117 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_VENDOR_ID, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
120 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PROFILE, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
129 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_OPENCL_C_VERSION, &char_data),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
132 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
135 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
138 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
141 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
144 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
147 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
150 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
153 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
156 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
159 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
162 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
165 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
168 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
171 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
174 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_ADDRESS_BITS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
177 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_AVAILABLE, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
180 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_COMPILER_AVAILABLE, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
184 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_HALF_FP_CONFIG, &device_fp_config),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
188 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_SINGLE_FP_CONFIG, &device_fp_config),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
191 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_DOUBLE_FP_CONFIG, &device_fp_config),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
195 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_ENDIAN_LITTLE, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
198 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_EXTENSIONS, &info_string),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
201 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_ERROR_CORRECTION_SUPPORT, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
204 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_EXECUTION_CAPABILITIES, &device_exec_capabilities),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
207 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, &info_ulong),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
210 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, &device_mem_cache_type),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
213 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
216 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &info_ulong),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
219 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &info_ulong),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
222 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_LOCAL_MEM_TYPE, &device_local_mem_type),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
225 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
228 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE_SUPPORT, &info_bool),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
231 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
234 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
237 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE2D_MAX_WIDTH, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
240 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE2D_MAX_HEIGHT, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
243 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE3D_MAX_WIDTH, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
246 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE3D_MAX_HEIGHT, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
249 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_IMAGE3D_MAX_DEPTH, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
252 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_READ_IMAGE_ARGS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
255 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
258 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
261 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
264 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_CONSTANT_ARGS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
267 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, &info_ulong),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
270 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &info_ulong),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
273 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_PARAMETER_SIZE, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
276 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
279 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
282 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MEM_BASE_ADDR_ALIGN, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
285 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES, &size_data),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
288 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PRINTF_BUFFER_SIZE, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
291 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_MAX_SAMPLERS, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
294 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PARTITION_AFFINITY_DOMAIN, &device_affinity_domain),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
297 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PARTITION_MAX_SUB_DEVICES, &info_uint),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
300 CheckOpenCLError(
devices_[i]->getInfo(CL_DEVICE_PROFILING_TIMER_RESOLUTION, &info_size),
"GGEMSOpenCLManager",
"GGEMSOpenCLManager");
308 build_options_ =
"-cl-std=CL1.2 -w -Werror -cl-fast-relaxed-math";
311 #ifdef DOSIMETRY_DOUBLE_PRECISION
325 vendors_.insert(std::make_pair(
"nvidia",
"NVIDIA Corporation"));
326 vendors_.insert(std::make_pair(
"intel",
"Intel(R) Corporation"));
327 vendors_.insert(std::make_pair(
"amd",
"Advanced Micro Devices, Inc."));
333 GGcout(
"GGEMSOpenCLManager",
"GGEMSOpenCLManager", 3) <<
"GGEMSOpenCLManager created!!!" <<
GGendl;
342 GGcout(
"GGEMSOpenCLManager",
"~GGEMSOpenCLManager", 3) <<
"GGEMSOpenCLManager erasing..." <<
GGendl;
344 GGcout(
"GGEMSOpenCLManager",
"~GGEMSOpenCLManager", 3) <<
"GGEMSOpenCLManager erased!!!" <<
GGendl;
353 GGcout(
"GGEMSOpenCLManager",
"Clean", 3) <<
"GGEMSOpenCLManager cleaning..." <<
GGendl;
445 for (
auto q :
queues_)
delete q;
447 for (
auto e :
events_)
delete e;
454 GGcout(
"GGEMSOpenCLManager",
"Clean", 3) <<
"GGEMSOpenCLManager cleaned!!!" <<
GGendl;
464 GGcout(
"GGEMSOpenCLManager",
"PrintPlatformInfos", 0) <<
GGendl;
465 GGcout(
"GGEMSOpenCLManager",
"PrintPlatformInfos", 0) <<
"#### PLATFORM: " << i <<
" ####" <<
GGendl;
472 GGcout(
"GGEMSOpenCLManager",
"PrintPlatformInfos", 0) <<
GGendl;
482 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
GGendl;
483 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
"#### DEVICE: " << i <<
" ####" <<
GGendl;
491 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Device Type: " <<
"CL_DEVICE_TYPE_CPU" <<
GGendl;
494 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Device Type: " <<
"CL_DEVICE_TYPE_GPU" <<
GGendl;
513 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Device Available: ON" <<
GGendl;
516 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Device Available: OFF" <<
GGendl;
519 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Compiler Available: ON" <<
GGendl;
522 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Compiler Available: OFF" <<
GGendl;
525 std::string half_fp_capability(
"");
533 half_fp_capability +=
device_half_fp_config_[i] & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ?
"CORRECTLY_ROUNDED_DIVIDE_SQRT" :
"";
534 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Half Precision Capability: " << half_fp_capability <<
GGendl;
536 std::string single_fp_capability(
"");
544 single_fp_capability +=
device_single_fp_config_[i] & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ?
"CORRECTLY_ROUNDED_DIVIDE_SQRT" :
"";
545 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Single Precision Capability: " << single_fp_capability <<
GGendl;
547 std::string double_fp_capability(
"");
555 double_fp_capability +=
device_double_fp_config_[i] & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ?
"CORRECTLY_ROUNDED_DIVIDE_SQRT" :
"";
556 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Double Precision Capability: " << double_fp_capability <<
GGendl;
559 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Endian Little: ON" <<
GGendl;
562 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Endian Little: OFF" <<
GGendl;
566 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Error Correction Support: ON" <<
GGendl;
569 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Error Correction Support: OFF" <<
GGendl;
571 std::string execution_capabilities(
"");
574 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Execution Capabilities: " << execution_capabilities <<
GGendl;
576 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Global Mem. Cache Type: " <<
"CL_NONE" <<
GGendl;
579 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Global Mem. Cache Type: " <<
"CL_READ_ONLY_CACHE" <<
GGendl;
582 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Global Mem. Cache Type: " <<
"CL_READ_WRITE_CACHE" <<
GGendl;
588 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Local Mem. Type: " <<
"CL_LOCAL" <<
GGendl;
591 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Local Mem. Type: " <<
"CL_GLOBAL" <<
GGendl;
595 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Host Unified Memory: ON" <<
GGendl;
598 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Host Unified Memory: OFF" <<
GGendl;
601 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Image Support: ON" <<
GGendl;
604 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Image Support: OFF" <<
GGendl;
628 std::string partition_affinity(
"");
634 partition_affinity +=
device_single_fp_config_[i] & CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE ?
"NEXT_PARTITIONABLE " :
"";
635 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
" + Partition Affinity: " << partition_affinity <<
GGendl;
639 GGcout(
"GGEMSOpenCLManager",
"PrintDeviceInfos", 0) <<
GGendl;
658 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 3) <<
"Printing activated devices for GGEMS..." <<
GGendl;
660 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
GGendl;
661 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
"ACTIVATED DEVICES:" <<
GGendl;
662 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
"------------------" <<
GGendl;
666 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
GGendl;
670 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
" -> Type: CL_DEVICE_TYPE_CPU " <<
GGendl;
672 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevices", 0) <<
" -> Type: CL_DEVICE_TYPE_GPU " <<
GGendl;
675 GGcout(
"GGEMSOpenCLManager",
"PrintActivatedDevice", 0) <<
GGendl;
685 std::string type = device_type;
686 std::string vendor = device_vendor;
687 std::transform(type.begin(), type.end(), type.begin(), ::tolower);
688 std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower);
691 bool is_index_device =
false;
692 for (
GGsize i = 0; i < type.size(); ++i) {
693 if (isdigit(type[i]) != 0) {
694 is_index_device =
true;
703 else if (type ==
"cpu") {
708 else if (type ==
"gpu") {
716 else if (is_index_device) {
719 std::string delimiter =
";";
720 while ((pos = type.find(delimiter)) != std::string::npos) {
721 index =
static_cast<GGsize>(std::stoi(type.substr(0, pos)));
723 type.erase(0, pos + delimiter.length());
725 index =
static_cast<GGsize>(std::stoi(type));
729 std::ostringstream oss(std::ostringstream::out);
730 oss <<
"Unknown type of device '"<< type <<
"' !!!";
741 GGcout(
"GGEMSOpenCLManager",
"DeviceToActivate", 3) <<
"Activating a device for GGEMS..." <<
GGendl;
745 std::ostringstream oss(std::ostringstream::out);
746 oss <<
"Your device index is out of range!!! " <<
devices_.size() <<
" device(s) detected. Index must be in the range [" << 0 <<
";" <<
devices_.size() - 1 <<
"]!!!";
752 std::ostringstream oss(std::ostringstream::out);
753 oss <<
"Your device is not a GPU or CPU, please activate another device!!!";
764 #ifdef DOSIMETRY_DOUBLE_PRECISION
766 std::ostringstream oss(std::ostringstream::out);
767 oss <<
"Your OpenCL device '" <<
GetDeviceName(device_id) <<
"' does not support double precision!!! Please recompile GGEMS setting DOSIMETRY_DOUBLE_PRECISION to OFF.";
778 events_.push_back(
new cl::Event());
787 std::string tmp_device_load = device_balancing;
790 std::string delimiter =
";";
793 while ((pos = tmp_device_load.find(delimiter)) != std::string::npos) {
794 balancing = std::stof(tmp_device_load.substr(0, pos));
796 incr_balancing += balancing;
797 tmp_device_load.erase(0, pos + delimiter.length());
800 balancing = std::stof(tmp_device_load.substr(0, pos));
801 incr_balancing += balancing;
805 if (incr_balancing != 1.0f) {
806 std::ostringstream oss(std::ostringstream::out);
807 oss <<
"Device balancing has to be 1 !!! Please change your value. Current value is " << incr_balancing;
813 std::ostringstream oss(std::ostringstream::out);
814 oss <<
"Mismatch between number of device balancing values and number of activated devices!!!";
830 GGcout(
"GGEMSOpenCLManager",
"CheckKernel", 3) <<
"Checking if kernel has already been compiled..." <<
GGendl;
833 std::string registered_kernel_name(
"");
837 CheckOpenCLError(
kernels_.at(i)->getInfo(CL_KERNEL_FUNCTION_NAME, ®istered_kernel_name),
"GGEMSOpenCLManager",
"CheckKernel");
838 registered_kernel_name.erase(registered_kernel_name.end()-1);
849 void GGEMSOpenCLManager::CompileKernel(std::string
const& kernel_filename, std::string
const& kernel_name, cl::Kernel** kernel_list,
char*
const p_custom_options,
char*
const p_additional_options)
851 GGcout(
"GGEMSOpenCLManager",
"CompileKernel", 3) <<
"Compiling a kernel on OpenCL activated context..." <<
GGendl;
854 if (p_custom_options && p_additional_options) {
855 std::ostringstream oss(std::ostringstream::out);
856 oss <<
"Custom and additional options can not by set in same time!!!";
861 char kernel_compilation_option[1024];
862 if (p_custom_options) {
864 ::strcpy_s(kernel_compilation_option, p_custom_options);
866 ::strcpy(kernel_compilation_option, p_custom_options);
869 else if (p_additional_options) {
872 ::strcat_s(kernel_compilation_option,
" ");
873 ::strcat_s(kernel_compilation_option, p_additional_options);
876 ::strcat(kernel_compilation_option,
" ");
877 ::strcat(kernel_compilation_option, p_additional_options);
894 kernel_list[i] =
kernels_[kernel_index+i];
899 std::ifstream source_file_stream(kernel_filename.c_str(), std::ios::in);
903 std::string source_code(std::istreambuf_iterator<char>(source_file_stream), (std::istreambuf_iterator<char>()));
906 cl::Program::Sources program_source(1, std::make_pair(source_code.c_str(), source_code.length() + 1));
911 cl::Program program = cl::Program(*
contexts_[i], program_source);
914 std::vector<cl::Device> device;
917 GGcout(
"GGEMSOpenCLManager",
"CompileKernel", 2) <<
"Compile a new kernel '" << kernel_name <<
"' from file: " << kernel_filename <<
" on device: " <<
GetDeviceName(
device_indices_[i]) <<
" with options: " << kernel_compilation_option <<
GGendl;
920 GGint build_status = program.build(device, kernel_compilation_option);
921 if (build_status != CL_SUCCESS) {
922 std::ostringstream oss(std::ostringstream::out);
924 program.getBuildInfo(device[0], CL_PROGRAM_BUILD_LOG, &log);
925 oss <<
ErrorType(build_status) << std::endl;
931 kernels_.push_back(
new cl::Kernel(program, kernel_name.c_str(), &build_status));
947 GGcout(
"GGEMSOpenCLManager",
"Allocate", 3) <<
"Allocating memory on OpenCL device memory..." <<
GGendl;
957 std::ostringstream oss(std::ostringstream::out);
958 oss <<
"Size of buffer: " << size <<
" bytes, is too big!!! The maximum size is " <<
GetMaxBufferAllocationSize(device_index) <<
" bytes";
968 cl::Buffer* buffer =
new cl::Buffer(*
contexts_[thread_index], flags, size, host_ptr, &error);
983 GGcout(
"GGEMSOpenCLManager",
"Deallocate", 3) <<
"Deallocating memory on OpenCL device memory..." <<
GGendl;
1000 GGcout(
"GGEMSOpenCLManager",
"CleanBuffer", 3) <<
"Cleaning OpenCL buffer..." <<
GGendl;
1002 GGint error =
queues_[thread_index]->enqueueFillBuffer(*buffer, 0, 0, size,
nullptr,
nullptr);
1012 if (
device_extensions_[device_index].find(
"cl_khr_fp64") == std::string::npos)
return false;
1022 if (
device_extensions_[device_index].find(
"cl_khr_int64_base_atomics") == std::string::npos)
return false;
1033 return number_of_elements;
1050 if (error != CL_SUCCESS) {
1062 std::ostringstream oss(std::ostringstream::out);
1071 oss <<
"CL_DEVICE_NOT_FOUND:" << std::endl;
1072 oss <<
" * if no OpenCL devices that matched device_type were found." << std::endl;
1076 oss <<
"CL_DEVICE_NOT_AVAILABLE:" << std::endl;
1077 oss <<
" * if a device in devices is currently not available even though the device was returned by clGetDeviceIDs." << std::endl;
1081 oss <<
"CL_COMPILER_NOT_AVAILABLE:" << std::endl;
1082 oss <<
" * if program is created with clCreateProgramWithSource and a compiler is not available i.e. CL_DEVICE_COMPILER_AVAILABLE specified in the table of OpenCL Device Queries for clGetDeviceInfo is set to CL_FALSE." << std::endl;
1086 oss <<
"CL_MEM_OBJECT_ALLOCATION_FAILURE:" << std::endl;
1087 oss <<
" * if there is a failure to allocate memory for buffer object." << std::endl;
1091 oss <<
"CL_OUT_OF_RESOURCES:" << std::endl;
1092 oss <<
" * if there is a failure to allocate resources required by the OpenCL implementation on the device." << std::endl;
1096 oss <<
"CL_OUT_OF_HOST_MEMORY:" << std::endl;
1097 oss <<
" * if there is a failure to allocate resources required by the OpenCL implementation on the host." << std::endl;
1101 oss <<
"CL_PROFILING_INFO_NOT_AVAILABLE:" << std::endl;
1102 oss <<
" * if the CL_QUEUE_PROFILING_ENABLE flag is not set for the command-queue, if the execution status of the command identified by event is not CL_COMPLETE or if event is a user event object." << std::endl;
1106 oss <<
"CL_MEM_COPY_OVERLAP:" << std::endl;
1107 oss <<
" * if src_buffer and dst_buffer are the same buffer or subbuffer object and the source and destination regions overlap or if src_buffer and dst_buffer are different sub-buffers of the same associated buffer object and they overlap. The regions overlap if src_offset <= to dst_offset <= to src_offset + size – 1, or if dst_offset <= to src_offset <= to dst_offset + size – 1." << std::endl;
1111 oss <<
"CL_IMAGE_FORMAT_MISMATCH:" << std::endl;
1112 oss <<
" * if src_image and dst_image do not use the same image format." << std::endl;
1116 oss <<
"CL_IMAGE_FORMAT_NOT_SUPPORTED:" << std::endl;
1117 oss <<
" * if the image_format is not supported." << std::endl;
1121 oss <<
"CL_BUILD_PROGRAM_FAILURE:" << std::endl;
1122 oss <<
" * if there is a failure to build the program executable. This error will be returned if clBuildProgram does not return until the build has completed." << std::endl;
1126 oss <<
"CL_MAP_FAILURE:" << std::endl;
1127 oss <<
" * if there is a failure to map the requested region into the host address space. This error cannot occur for image objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR." << std::endl;
1131 oss <<
"CL_MISALIGNED_SUB_BUFFER_OFFSET:" << std::endl;
1132 oss <<
" * if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue." << std::endl;
1136 oss <<
"CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:" << std::endl;
1137 oss <<
" * if the execution status of any of the events in event_list is a negative integer value." << std::endl;
1141 oss <<
"CL_COMPILE_PROGRAM_FAILURE:" << std::endl;
1142 oss <<
" * if there is a failure to compile the program source. This error will be returned if clCompileProgram does not return until the compile has completed." << std::endl;
1146 oss <<
"CL_LINKER_NOT_AVAILABLE:" << std::endl;
1147 oss <<
" * if a linker is not available i.e. CL_DEVICE_LINKER_AVAILABLE specified in the table of allowed values for param_name for clGetDeviceInfo is set to CL_FALSE." << std::endl;
1151 oss <<
"CL_LINK_PROGRAM_FAILURE:" << std::endl;
1152 oss <<
" * if there is a failure to link the compiled binaries and/or libraries." << std::endl;
1156 oss <<
"CL_DEVICE_PARTITION_FAILED:" << std::endl;
1157 oss <<
" * if the partition name is supported by the implementation but in_device could not be further partitioned." << std::endl;
1161 oss <<
"CL_KERNEL_ARG_INFO_NOT_AVAILABLE:" << std::endl;
1162 oss <<
" * if the argument information is not available for kernel." << std::endl;
1166 oss <<
"CL_INVALID_VALUE:" << std::endl;
1167 oss <<
" * This depends on the function: two or more coupled parameters had errors." << std::endl;
1171 oss <<
"CL_INVALID_DEVICE_TYPE:" << std::endl;
1172 oss <<
" * if an invalid device_type is given" << std::endl;
1176 oss <<
"CL_INVALID_PLATFORM:" << std::endl;
1177 oss <<
" * if an invalid platform was given" << std::endl;
1181 oss <<
"CL_INVALID_DEVICE:" << std::endl;
1182 oss <<
" * if devices contains an invalid device or are not associated with the specified platform." << std::endl;
1186 oss <<
"CL_INVALID_CONTEXT:" << std::endl;
1187 oss <<
" * if context is not a valid context." << std::endl;
1191 oss <<
"CL_INVALID_QUEUE_PROPERTIES:" << std::endl;
1192 oss <<
" * if specified command-queue-properties are valid but are not supported by the device." << std::endl;
1196 oss <<
"CL_INVALID_COMMAND_QUEUE:" << std::endl;
1197 oss <<
" * if command_queue is not a valid command-queue." << std::endl;
1201 oss <<
"CL_INVALID_HOST_PTR:" << std::endl;
1202 oss <<
" * This flag is valid only if host_ptr is not NULL. If specified, it indicates that the application wants the OpenCL implementation to allocate memory for the memory object and copy the data from memory referenced by host_ptr.CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR are mutually exclusive.CL_MEM_COPY_HOST_PTR can be used with CL_MEM_ALLOC_HOST_PTR to initialize the contents of the cl_mem object allocated using host-accessible (e.g. PCIe) memory." << std::endl;
1206 oss <<
"CL_INVALID_MEM_OBJECT:" << std::endl;
1207 oss <<
" * if memobj is not a valid OpenCL memory object." << std::endl;
1211 oss <<
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:" << std::endl;
1212 oss <<
" * if the OpenGL/DirectX texture internal format does not map to a supported OpenCL image format." << std::endl;
1216 oss <<
"CL_INVALID_IMAGE_SIZE:" << std::endl;
1217 oss <<
" * if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue." << std::endl;
1221 oss <<
"CL_INVALID_SAMPLER:" << std::endl;
1222 oss <<
" * if sampler is not a valid sampler object." << std::endl;
1226 oss <<
"CL_INVALID_BINARY:" << std::endl;
1227 oss <<
" * The provided binary is unfit for the selected device.if program is created with clCreateProgramWithBinary and devices listed in device_list do not have a valid program binary loaded." << std::endl;
1231 oss <<
"CL_INVALID_BUILD_OPTIONS:" << std::endl;
1232 oss <<
" * if the build options specified by options are invalid." << std::endl;
1236 oss <<
"CL_INVALID_PROGRAM:" << std::endl;
1237 oss <<
" * if program is a not a valid program object." << std::endl;
1241 oss <<
"CL_INVALID_PROGRAM_EXECUTABLE:" << std::endl;
1242 oss <<
" * if there is no successfully built program executable available for device associated with command_queue." << std::endl;
1246 oss <<
"CL_INVALID_KERNEL_NAME:" << std::endl;
1247 oss <<
" * if kernel_name is not found in program." << std::endl;
1251 oss <<
"CL_INVALID_KERNEL_DEFINITION:" << std::endl;
1252 oss <<
" * if the function definition for __kernel function given by kernel_name such as the number of arguments, the argument types are not the same for all devices for which the program executable has been built." << std::endl;
1256 oss <<
"CL_INVALID_KERNEL:" << std::endl;
1257 oss <<
" * if kernel is not a valid kernel object." << std::endl;
1261 oss <<
"CL_INVALID_ARG_INDEX:" << std::endl;
1262 oss <<
" * if arg_index is not a valid argument index." << std::endl;
1266 oss <<
"CL_INVALID_ARG_VALUE:" << std::endl;
1267 oss <<
" * if arg_value specified is not a valid value." << std::endl;
1271 oss <<
"CL_INVALID_ARG_SIZE:" << std::endl;
1272 oss <<
" * if arg_size does not match the size of the data type for an argument that is not a memory object or if the argument is a memory object and arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the __local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler)." << std::endl;
1276 oss <<
"CL_INVALID_KERNEL_ARGS:" << std::endl;
1277 oss <<
" * if the kernel argument values have not been specified." << std::endl;
1281 oss <<
"CL_INVALID_WORK_DIMENSION:" << std::endl;
1282 oss <<
" * if work_dim is not a valid value (i.e. a value between 1 and 3)." << std::endl;
1286 oss <<
"CL_INVALID_WORK_GROUP_SIZE:" << std::endl;
1287 oss <<
" * if local_work_size is specified and number of work-items specified by global_work_size is not evenly divisable by size of work-group given by local_work_size or does not match the work-group size specified for kernel using the __attribute__((reqd_work_group_size(X, Y, Z))) qualifier in program source.if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] *... local_work_size[work_dim – 1] is greater than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in the table of OpenCL Device Queries for clGetDeviceInfo. if local_work_size is NULL and the __attribute__ ((reqd_work_group_size(X, Y, Z))) qualifier is used to declare the work-group size for kernel in the program source." << std::endl;
1291 oss <<
"CL_INVALID_WORK_ITEM_SIZE:" << std::endl;
1292 oss <<
" * if the number of work-items specified in any of local_work_size[0], … local_work_size[work_dim – 1] is greater than the corresponding values specified by CL_DEVICE_MAX_WORK_ITEM_SIZES[0], ... CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim – 1]" << std::endl;
1296 oss <<
"CL_INVALID_GLOBAL_OFFSET:" << std::endl;
1297 oss <<
" * if the value specified in global_work_size + the corresponding values in global_work_offset for any dimensions is greater than the sizeof(size_t) for the device on which the kernel execution will be enqueued." << std::endl;
1301 oss <<
"CL_INVALID_EVENT_WAIT_LIST:" << std::endl;
1302 oss <<
" * if event_wait_list is NULL and num_events_in_wait_list > 0, or event_wait_list is not NULL and num_events_in_wait_list is 0, or if event objects in event_wait_list are not valid events." << std::endl;
1306 oss <<
"CL_INVALID_EVENT:" << std::endl;
1307 oss <<
" * if event objects specified in event_list are not valid event objects." << std::endl;
1311 oss <<
"CL_INVALID_OPERATION:" << std::endl;
1312 oss <<
" * if interoperability is specified by setting CL_CONTEXT_ADAPTER_D3D9_KHR, CL_CONTEXT_ADAPTER_D3D9EX_KHR or CL_CONTEXT_ADAPTER_DXVA_KHR to a non-NULL value, and interoperability with another graphics API is also specified. (only if the cl_khr_dx9_media_sharing extension is supported)." << std::endl;
1316 oss <<
"CL_INVALID_GL_OBJECT:" << std::endl;
1317 oss <<
" * if texture is not a GL texture object whose type matches texture_target, if the specified miplevel of texture is not defined, or if the width or height of the specified miplevel is zero." << std::endl;
1321 oss <<
"CL_INVALID_BUFFER_SIZE:" << std::endl;
1322 oss <<
" * if size is 0.Implementations may return CL_INVALID_BUFFER_SIZE if size is greater than the CL_DEVICE_MAX_MEM_ALLOC_SIZE value specified in the table of allowed values for param_name for clGetDeviceInfo for all devices in context." << std::endl;
1326 oss <<
"CL_INVALID_MIP_LEVEL:" << std::endl;
1327 oss <<
" * if miplevel is greater than zero and the OpenGL implementation does not support creating from non-zero mipmap levels." << std::endl;
1331 oss <<
"CL_INVALID_GLOBAL_WORK_SIZE:" << std::endl;
1332 oss <<
" * if global_work_size is NULL, or if any of the values specified in global_work_size[0], ... global_work_size [work_dim – 1] are 0 or exceed the range given by the sizeof(size_t) for the device on which the kernel execution will be enqueued." << std::endl;
1336 oss <<
"CL_INVALID_PROPERTY:" << std::endl;
1337 oss <<
" * Vague error, depends on the function" << std::endl;
1341 oss <<
"CL_INVALID_IMAGE_DESCRIPTOR:" << std::endl;
1342 oss <<
" * if values specified in image_desc are not valid or if image_desc is NULL." << std::endl;
1346 oss <<
"CL_INVALID_COMPILER_OPTIONS:" << std::endl;
1347 oss <<
" * if the compiler options specified by options are invalid." << std::endl;
1351 oss <<
"CL_INVALID_LINKER_OPTIONS:" << std::endl;
1352 oss <<
" * if the linker options specified by options are invalid." << std::endl;
1356 oss <<
"CL_INVALID_DEVICE_PARTITION_COUNT:" << std::endl;
1357 oss <<
" * if the partition name specified in properties is CL_DEVICE_PARTITION_BY_COUNTS and the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_SUB_DEVICES or the total number of compute units requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device, or the number of compute units requested for one or more sub-devices is less than zero or the number of sub-devices requested exceeds CL_DEVICE_PARTITION_MAX_COMPUTE_UNITS for in_device." << std::endl;
1361 oss <<
"CL_INVALID_PIPE_SIZE:" << std::endl;
1362 oss <<
" * if pipe_packet_size is 0 or the pipe_packet_size exceeds CL_DEVICE_PIPE_MAX_PACKET_SIZE value for all devices in context or if pipe_max_packets is 0." << std::endl;
1366 oss <<
"CL_INVALID_DEVICE_QUEUE:" << std::endl;
1367 oss <<
" * when an argument is of type queue_t when it’s not a valid device queue object." << std::endl;
1371 oss <<
"CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:" << std::endl;
1372 oss <<
" * CL and GL not on the same device (only when using a GPU)." << std::endl;
1376 oss <<
"CL_PLATFORM_NOT_FOUND_KHR:" << std::endl;
1377 oss <<
" * No valid ICDs found" << std::endl;
1381 oss <<
"CL_INVALID_D3D10_DEVICE_KHR:" << std::endl;
1382 oss <<
" * if the Direct3D 10 device specified for interoperability is not compatible with the devices against which the context is to be created." << std::endl;
1386 oss <<
"CL_INVALID_D3D10_RESOURCE_KHR:" << std::endl;
1387 oss <<
" * If the resource is not a Direct3D 10 buffer or texture object" << std::endl;
1391 oss <<
"CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR:" << std::endl;
1392 oss <<
" * If a mem_object is already acquired by OpenCL" << std::endl;
1396 oss <<
"CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR:" << std::endl;
1397 oss <<
" * If a mem_object is not acquired by OpenCL" << std::endl;
1401 oss <<
"CL_INVALID_D3D11_DEVICE_KHR:" << std::endl;
1402 oss <<
" * if the Direct3D 11 device specified for interoperability is not compatible with the devices against which the context is to be created." << std::endl;
1406 oss <<
"CL_INVALID_D3D11_RESOURCE_KHR:" << std::endl;
1407 oss <<
" * If the resource is not a Direct3D 11 buffer or texture object" << std::endl;
1411 oss <<
"CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR:" << std::endl;
1412 oss <<
" * If a mem_object is already acquired by OpenCL" << std::endl;
1416 oss <<
"CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR:" << std::endl;
1417 oss <<
" * If a mem_object is not acquired by OpenCL" << std::endl;
1421 oss <<
"NVidia:" << std::endl;
1422 oss <<
" * Illegal read or write to a buffer" << std::endl;
1426 oss <<
"Unknown OpenCL error" << std::endl;
1477 opencl_manager->
Clean();