接触OpenCL 的时间不是很久,很多东西都是现学,有很多不懂地方。
我写了个OpenCL的示例:在float型数组中找到最大值或最小值。
麻烦帮我看一下问题出在哪里?原因是什么? 或者告诉我kernel 函数如何进行调试,比如将输入输出打出来?
#include <stdio.h>
#include <stdlib.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#define MEM_SIZE (32)
#define MAX_SOURCE_SIZE (0x100000)
int main()
{
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj[2] = { NULL };
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
float array[MEM_SIZE];
float result;
printf("array:");
for(int i = 0; i<MEM_SIZE ; i++)
{
array[i] = i;
printf("%f ",array[i]);
}
printf("\n");
FILE *fp;
char fileName[] = "./findmax.cl";
char *source_str;
size_t source_size;
/* 打开文件*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
/* 设备获取 */
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
/* 上下文创建 */
context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
/* 工作队列创建 */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
/* 创建项目 */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
(const size_t *)&source_size, &ret);
/* 编译 */
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
/* 创建内核 */
kernel = clCreateKernel(program, "findmax", &ret);
/* 内存对象创建 */
memobj[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,MEM_SIZE * sizeof(float), array, &ret);
memobj[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,sizeof(float), NULL, &ret);
/* 设置内核参数 */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)memobj[0]);
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)memobj[1]);
/* OpenCL内核执行 */
size_t globalWorkSize = MEM_SIZE ;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
/* 读取Buffer */
ret = clEnqueueReadBuffer(command_queue, memobj[1], CL_TRUE, 0,
sizeof(float),&result, 0, NULL, NULL);
/* 結果显示 */
printf("Result:%f\n",result);
/* 終了処理 */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj[0]);
ret = clReleaseMemObject(memobj[1]);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
kernel
_kernel void vector_add(global const float *a, global float *result)
{
int gid = get_global_id(0);
*result = fmax(a[gid],a[gid+1]);
}