介绍
当谈到高性能计算,NVIDIA的CUDA框架无疑是一个强大的工具。OpenC(Open Computing Language)是一个更为通用的解决方案,或者你使用的是非NVIDIA硬件,那么OpenCL是一个极佳的选择。作为一个开放标准,OpenCL支持广泛的硬件平台,包括CPU、GPU、DSP和FPGA。
OpenCL库API
clGetPlatformIDs
获取平台数量
clGetPlatformInfo
获取平台指定info信息
clGetDeviceIDs
获取设备数量
clGetDeviceInfo
获取设备指定info信息
platform_info定义
CL/cl.h
/* cl_platform_info */
#define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903
#define CL_PLATFORM_EXTENSIONS 0x0904
#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905
device_info定义
CL/cl.h
/* cl_device_info */
#define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
...
测试程序
仓库地址LitchiCheng/OpenCL-Optimalization-Test (github.com)
#include <iostream>
#include "string.h"
#include <chrono>
#include <CL/cl.h>
#define API_NUM 4
cl_platform_info platform_info[API_NUM] = {
CL_PLATFORM_PROFILE,
CL_PLATFORM_VERSION,
CL_PLATFORM_NAME,
CL_PLATFORM_VENDOR
};
int main(int argc, char const *argv[])
{
cl_int status = 0;
cl_uint numPlatforms;
cl_platform_id platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
printf("ERROR: Getting Platforms.(clGetPlatformIDs)\n");
return EXIT_FAILURE;
}
printf("\r\n");
printf("clGetPlatformIDs num is %d \r\n", numPlatforms);
if (numPlatforms > 0) {
cl_platform_id *platforms = (cl_platform_id *)malloc(numPlatforms * sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if (status != CL_SUCCESS) {
printf("Error: Getting Platform Ids.(clGetPlatformIDs):%d\n", status);
return -1;
}
for (int i = 0; i < numPlatforms; ++i) {
for(int index=0; index < API_NUM; ++index){
char charbuff[100];
status = clGetPlatformInfo(
platforms[i],
platform_info[index],
sizeof(charbuff),
charbuff,
NULL);
platform = platforms[i];
printf("clGetPlatformInfo %s \r\n", charbuff);
memset(charbuff, 0x00, sizeof(charbuff));
}
}
delete platforms;
}
cl_uint num_device;
cl_device_id device;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_device);
printf("GPU num is %d \r\n", num_device);
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (status != CL_SUCCESS) {
printf("Error: clGetDeviceIDs:%d\n", status);
return -1;
}
cl_uint device_max_compute_units;
status = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint),
&device_max_compute_units, NULL);
if (status != CL_SUCCESS) {
printf("Error: CL_DEVICE_MAX_COMPUTE_UNITS:%d\n", status);
return -1;
}
printf("CL_DEVICE_MAX_COMPUTE_UNITS %d \r\n", device_max_compute_units);
cl_ulong device_global_mem_size;
status = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong),
&device_global_mem_size, NULL);
if (status != CL_SUCCESS) {
printf("Error: CL_DEVICE_GLOBAL_MEM_SIZE:%d\n", status);
return -1;
}
printf("CL_DEVICE_GLOBAL_MEM_SIZE %ld \r\n", device_global_mem_size);
char device_name[100];
status = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
device_name, NULL);
if (status != CL_SUCCESS) {
printf("Error: CL_DEVICE_NAME:%d\n", status);
return -1;
}
printf("CL_DEVICE_NAME %s \r\n", device_name);
return 0;
}
输出结果
sudo ./getDeviceInfo
arm_release_ver of this libmali is 'r18p0-01rel0', rk_so_ver is '4'.
clGetPlatformIDs num is 1
clGetPlatformInfo FULL_PROFILE
clGetPlatformInfo OpenCL 1.2 v1.r18p0-01rel0.ddd394a39c9049aa64d45a44032b5335
clGetPlatformInfo ARM Platform
clGetPlatformInfo ARM
GPU num is 1
CL_DEVICE_MAX_COMPUTE_UNITS 4
CL_DEVICE_GLOBAL_MEM_SIZE 4026908672
CL_DEVICE_NAME Mali-T860
clinfo对比