I want to pass OpenCL Mat to the OpenCL Self-Writing Kernel for FGPA (does not support OpenCV OpenCL).
Host-Code:
Mat img = imread( "template.jpg", IMREAD_GRAYSCALE );
Mat output(img.rows, img.cols, CV_8UC1);
cl_kernel kernel = NULL;
kernel = clCreateKernel(program, "copy", &status);
cl_command_queue cmdQueue;
cmdQueue = clCreateCommandQueue(context,devices[0], 0, &status);
cl_mem buffer_img = clCreateBuffer(context,CL_MEM_READ_ONLY, sizeof(uint) * img.cols * img.rows, NULL,&status);
cl_mem buffer_outputimg = clCreateBuffer(context,CL_MEM_WRITE_ONLY, sizeof(uint) * img.cols * img.rows,NULL,&status);
status = clEnqueueWriteBuffer(cmdQueue, buffer_img,CL_FALSE,0,sizeof(uint) * img.cols * img.rows,&img,0,NULL,NULL);
status = clSetKernelArg(kernel,0,sizeof(cl_mem),&buffer_img);
status = clSetKernelArg(kernel,1,sizeof(cl_mem),&buffer_outputimg);
size_t globalWorkSize[2];
globalWorkSize[0] = img.cols;
globalWorkSize[1] = img.rows;
status = clEnqueueNDRangeKernel(cmdQueue,kernel,2,NULL, globalWorkSize, NULL,0, NULL,NULL);
clEnqueueReadBuffer(cmdQueue,buffer_outputimg,CL_TRUE,0,sizeof(uint) * img.cols * img.rows, &output, 0, NULL, NULL);
clFinish(cmdQueue);
Kernel code:
__kernel void copy(__global uchar * input, __global uchar * output)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
output[y * get_global_size(0) + x] = input[y * get_global_size(0) + x] ;
}
When outputting to FPGA, I get a segmentation error, which is probably caused by incorrect processing using Matrix OpenCV.
EDIT : The modified host code proposed by api55 solved the problem:
Mat img = imread( "scene.jpg", IMREAD_GRAYSCALE );
Mat output(img.rows, img.cols, CV_8UC1);
cl_kernel kernel = NULL;
kernel = clCreateKernel(program, "copy", &status);
cl_command_queue cmdQueue;
cmdQueue = clCreateCommandQueue(context,devices[0], 0, &status);
checkError(status, "Failed to create commadnqueue");
cl_mem buffer_img = clCreateBuffer(context,CL_MEM_READ_ONLY, sizeof(uchar) * img.cols * img.rows, NULL,&status);
cl_mem buffer_outputimg = clCreateBuffer(context,CL_MEM_WRITE_ONLY, sizeof(uchar) * img.cols * img.rows,NULL,&status);
checkError(status, "Failed to create buffer_mask");
status = clEnqueueWriteBuffer(cmdQueue, buffer_img,CL_FALSE,0,sizeof(uchar) * img.cols * img.rows,img.data,0,NULL,NULL);
checkError(status, "Failed to enqueue buffer_img");
status = clSetKernelArg(kernel,0,sizeof(cl_mem),&buffer_img);
status = clSetKernelArg(kernel,1,sizeof(cl_mem),&buffer_outputimg);
size_t globalWorkSize[2];
globalWorkSize[0] = img.cols;
globalWorkSize[1] = img.rows;
status = clEnqueueNDRangeKernel(cmdQueue,kernel,2,NULL, globalWorkSize, NULL,0, NULL,NULL);
clEnqueueReadBuffer(cmdQueue,buffer_outputimg,CL_TRUE,0,sizeof(uchar) * img.cols * img.rows, output.data,0,NULL,NULL);
imwrite("output.jpg", output);