I worked on the same issue in JCUDA. See if you can use any part of this solution:
//Read Height and Width of image in Height & Width variables int Width = image.getWidth(); int Height = image.getHeight(); int N = Height * Width; int[] grayScale = new int[N]; //Allocate separate arrays to store Alpha, Red, Green and //Blue values for every pixel int[] redHost = new int[N]; int[] greenHost = new int[N]; int[] blueHost = new int[N]; int[] alphaHost = new int[N]; for(int i=0; i<Height; i++) { for(int j=0; j<Width; j++) { int pixel = image.getRGB(j, i); //Read the ARGB data alphaHost[i*Width+j] = (pixel >> 24) & 0xff; redHost[i*Width+j] = (pixel >> 16) & 0xff; greenHost[i*Width+j] = (pixel >> 8) & 0xff; blueHost[i*Width+j] = (pixel) & 0xff; } }
/ * Below are the CUDA kernel parameters * /
Pointer kernelParameters = Pointer.to( Pointer.to(new int[]{N}), //Total size of each array W * H Pointer.to(redDev), // Pointer to redArray on device Pointer.to(greenDev), // Pointer to greenArray on device Pointer.to(blueDev), // Pointer to blueArray on device Pointer.to(Output)); //Pointer to output array
/ * Below is my RGBToGrayScale.cu..ie CUDA Core * /
__global__ void RGBtoGrayScale(int N, int *red, int *green, int *blue, int *Output) { int id = blockIdx.x * blockDim.x + threadIdx.x; if(id<N) { Output[id] = (red[id]*0.2989) + (green[id]*0.587) + (blue[id]*0.114); } }
/ * Get output to host memory * /
cuMemcpyDtoH(Pointer.to(grayScale), Output, N * Sizeof.INT);
/ * Record the image with the new RBG values * /
BufferedImage im = new BufferedImage(Width,Height,BufferedImage.TYPE_BYTE_GRAY); WritableRaster raster = im.getRaster(); for(int i=0;i<Height;i++) { for(int j=0;j<Width;j++) { raster.setSample(j, i, 0, grayScale[i*Width+j]); } } try { ImageIO.write(im,"JPEG",new File("glpattern.jpeg")); } catch (IOException e) { e.printStackTrace(); }