Convert RGB to Grayscale Using CUDA

So, I'm trying to write a program that turns RGB images into shades of gray. I got this idea from a given Udacity problem. The problem is that when I write out the kernel in the Udacity web environment, it says that my code works, however, when I try to do it locally on my computer, I have no errors, but my image instead of exiting in shades of gray, comes completely gray. It seems that one gray square is the size of the loaded image. Can you help me find the error in my code, I compared it with the version of Udacity, and I can not find it.

#include "cuda_runtime.h" #include "device_launch_parameters.h" #include <string> #include <cuda.h> #include <stdio.h> #include <opencv\cv.h> #include <opencv\highgui.h> #include <iostream> #define CUDA_ERROR_CHECK #define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) #define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) inline void __cudaSafeCall(cudaError err, const char *file, const int line) { #ifdef CUDA_ERROR_CHECK if (cudaSuccess != err) { fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } #endif return; } inline void __cudaCheckError(const char *file, const int line) { #ifdef CUDA_ERROR_CHECK cudaError err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } err = cudaDeviceSynchronize(); if (cudaSuccess != err) { fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", file, line, cudaGetErrorString(err)); exit(-1); } #endif return; } __global__ void rgb_2_grey(uchar* const greyImage, const uchar4* const rgbImage, int rows, int columns) { int rgb_x = blockIdx.x * blockDim.x + threadIdx.x; //x coordinate of pixel int rgb_y = blockIdx.y * blockDim.y + threadIdx.y; //y coordinate of pixel if ((rgb_x >= columns) && (rgb_y >= rows)) { return; } int rgb_ab = rgb_y*columns + rgb_x; //absolute pixel position uchar4 rgb_Img = rgbImage[rgb_ab]; greyImage[rgb_ab] = uchar((float(rgb_Img.x))*0.299f + (float(rgb_Img.y))*0.587f + (float(rgb_Img.z))*0.114f); } using namespace cv; using namespace std; void Proc_Img(uchar4** h_RGBImage, uchar** h_greyImage, uchar4 **d_RGBImage, uchar** d_greyImage); void RGB_2_Greyscale(uchar* const d_greyImage, uchar4* const d_RGBImage, size_t num_Rows, size_t num_Cols); void Save_Img(); Mat img_RGB; Mat img_Grey; uchar4 *d_rgbImg; uchar *d_greyImg; int main() { uchar4* h_rgbImg; //uchar4* d_rgbImge=0; uchar* h_greyImg; //uchar* d_greyImge=0; Proc_Img(&h_rgbImg, &h_greyImg, &d_rgbImg, &d_greyImg); RGB_2_Greyscale(d_greyImg, d_rgbImg, img_RGB.rows, img_RGB.cols); Save_Img(); return 0; } void Proc_Img(uchar4** h_RGBImage, uchar** h_greyImage, uchar4 **d_RGBImage, uchar** d_greyImage){ cudaFree(0); CudaCheckError(); //loads image into a matrix object along with the colors in BGR format (must convert to rgb). Mat img = imread("C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581.JPG", CV_LOAD_IMAGE_COLOR); if (img.empty()){ cerr << "couldnt open file dumbas..." << "C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581.JPG" << endl; exit(1); } //converts color type from BGR to RGB cvtColor(img, img_RGB, CV_BGR2RGBA); //allocate memory for new greyscale image. //img.rows returns the range of pixels in y, img.cols returns range of pixels in x //CV_8UC1 means 8 bit unsigned(non-negative) single channel of color, aka greyscale. //all three of the parameters allow the create function in the Mat class to determine how much memory to allocate img_Grey.create(img.rows, img.cols, CV_8UC1); //creates rgb and greyscale image arrays *h_RGBImage = (uchar4*)img_RGB.ptr<uchar>(0); //.ptr is a method in the mat class that returns a pointer to the first element of the matrix. *h_greyImage = (uchar*)img_Grey.ptr<uchar>(0); //this is just like a regular array/pointer mem address to first element of the array. This is templated //in this case the compiler runs the function for returning pointer of type unsigned char. for rgb image it is //cast to uchar4 struct to hold r,g, and b values. const size_t num_pix = (img_RGB.rows) * (img_RGB.cols); //amount of pixels //allocate memory on gpu cudaMalloc(d_RGBImage, sizeof(uchar4) * num_pix); //bites of 1 uchar4 times # of pixels gives number of bites necessary for array CudaCheckError(); cudaMalloc(d_greyImage, sizeof(uchar) * num_pix);//bites of uchar times # pixels gives number of bites necessary for array CudaCheckError(); cudaMemset(*d_greyImage, 0, sizeof(uchar) * num_pix); CudaCheckError(); //copy array into allocated space cudaMemcpy(*d_RGBImage, *h_RGBImage, sizeof(uchar4)*num_pix, cudaMemcpyHostToDevice); CudaCheckError(); d_rgbImg = *d_RGBImage; d_greyImg = *d_greyImage; } void RGB_2_Greyscale(uchar* const d_greyImage, uchar4* const d_RGBImage, size_t num_Rows, size_t num_Cols){ const int BS = 16; const dim3 blockSize(BS, BS); const dim3 gridSize((num_Cols / BS) + 1, (num_Rows / BS) + 1); rgb_2_grey <<<gridSize, blockSize>>>(d_greyImage, d_RGBImage, num_Rows, num_Cols); cudaDeviceSynchronize(); CudaCheckError(); } void Save_Img(){ const size_t num_pix = (img_RGB.rows) * (img_RGB.cols); cudaMemcpy(img_Grey.ptr<uchar>(0), d_greyImg, sizeof(uchar)*num_pix, cudaMemcpyDeviceToHost); CudaCheckError(); imwrite("C:\\Users\\Austin\\Pictures\\wallpapers\\IMG_3581GR.JPG", img_Grey); cudaFree(d_rgbImg); cudaFree(d_greyImg); } 

EDIT: I realized that the local var in my main is the same name as the global var, here I edited the code, now I get an error from the visual studio that

d_rgbIme variable d_rgbIme used without initialization

when I already initialized it above. If I set them to zero, I get a CUDA error saying

illegal memory access was detected

I tried to run cuda-memcheck, but then I get an error message that I could not start the file ...

+6
source share
2 answers

I found a mistake thanks to one of the comments of Robert Rovela, he really helped with this! it is in my kernel the if statement should read if ((rgb_x >= columns) || (rgb_y >= rows)) {

+5
source

I worked on the same issue in JCUDA. See if you can use any part of this solution:

 //Read Height and Width of image in Height & Width variables int Width = image.getWidth(); int Height = image.getHeight(); int N = Height * Width; int[] grayScale = new int[N]; //Allocate separate arrays to store Alpha, Red, Green and //Blue values for every pixel int[] redHost = new int[N]; int[] greenHost = new int[N]; int[] blueHost = new int[N]; int[] alphaHost = new int[N]; for(int i=0; i<Height; i++) { for(int j=0; j<Width; j++) { int pixel = image.getRGB(j, i); //Read the ARGB data alphaHost[i*Width+j] = (pixel >> 24) & 0xff; redHost[i*Width+j] = (pixel >> 16) & 0xff; greenHost[i*Width+j] = (pixel >> 8) & 0xff; blueHost[i*Width+j] = (pixel) & 0xff; } } 

/ * Below are the CUDA kernel parameters * /

 Pointer kernelParameters = Pointer.to( Pointer.to(new int[]{N}), //Total size of each array W * H Pointer.to(redDev), // Pointer to redArray on device Pointer.to(greenDev), // Pointer to greenArray on device Pointer.to(blueDev), // Pointer to blueArray on device Pointer.to(Output)); //Pointer to output array 

/ * Below is my RGBToGrayScale.cu..ie CUDA Core * /

 __global__ void RGBtoGrayScale(int N, int *red, int *green, int *blue, int *Output) { int id = blockIdx.x * blockDim.x + threadIdx.x; if(id<N) { Output[id] = (red[id]*0.2989) + (green[id]*0.587) + (blue[id]*0.114); } } 

/ * Get output to host memory * /

 cuMemcpyDtoH(Pointer.to(grayScale), Output, N * Sizeof.INT); 

/ * Record the image with the new RBG values ​​* /

 BufferedImage im = new BufferedImage(Width,Height,BufferedImage.TYPE_BYTE_GRAY); WritableRaster raster = im.getRaster(); for(int i=0;i<Height;i++) { for(int j=0;j<Width;j++) { raster.setSample(j, i, 0, grayScale[i*Width+j]); } } try { ImageIO.write(im,"JPEG",new File("glpattern.jpeg")); } catch (IOException e) { e.printStackTrace(); } 
0
source

Source: https://habr.com/ru/post/973796/


All Articles