In the last line
cudaMemcpy(Mtx_on_GPU[i], d_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
you are trying to copy data from the device to the host (NOTE: I assume that you have allocated host memory for pointers Mtx_on_GPU!)
However, pointers are stored in the device’s memory, so you cannot access directly from the host. The string should be
cudaMemcpy(Mtx_on_GPU[i], temp_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
" " :
int ** devicePointersStoredInDeviceMemory;
cudaMalloc( (void**)&devicePointersStoredInDeviceMemory, sizeof(int*)*N);
int* devicePointersStoredInHostMemory[N];
for(int i=0; i<N; i++)
cudaMalloc( (void**)&devicePointersStoredInHostMemory[i], sizeof(int)*SIZE );
cudaMemcpy(
devicePointersStoredInDeviceMemory,
devicePointersStoredInHostMemory,
sizeof(int*)*N, cudaMemcpyHostToDevice);
...
int* hostPointersStoredInHostMemory[N];
for(int i=0; i<N; i++) {
int* hostPointer = hostPointersStoredInHostMemory[i];
int* devicePointer = devicePointersStoredInHostMemory[i];
cudaMemcpy(hostPointer, devicePointer, sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
}
EDIT :
d_ptr - " ". cudaMalloc. , . , int* Mtx_on_GPU[N]; "" N . malloc. , :
int** pointersStoredInDeviceMemory;
cudaMalloc((void**)&pointersStoredInDeviceMemory, sizeof(int*)*N);
int** pointersStoredInHostMemory;
pointersStoredInHostMemory = (void**)malloc(N * sizeof(int*));
int *pointerA = pointersStoredInDeviceMemory[0];
int *pointerB = pointersStoredInHostMemory[0];
,
, , 2 .