CUDA Copy of double copy pointer

I wrote my sample code as follows.

int ** d_ptr;
cudaMalloc( (void**)&d_ptr, sizeof(int*)*N );

int* tmp_ptr[N];
for(int i=0; i<N; i++)
    cudaMalloc( (void**)&tmp_ptr[i], sizeof(int)*SIZE );
cudaMemcpy(d_ptr, tmp_ptr, sizeof(tmp_ptr), cudaMemcpyHostToDevice);

And this code works well, but after starting the kernel, I can’t get the result.

int* Mtx_on_GPU[N];
cudaMemcpy(Mtx_on_GPU, d_ptr, sizeof(int)*N*SIZE, cudaMemcpyDeviceToHost);

At this point, an error occurs with a segment error. But I do not know that I am wrong.

int* Mtx_on_GPU[N];
for(int i=0; i<N; i++)
    cudaMemcpy(Mtx_on_GPU[i], d_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);

This code also has the same error.

I think there are some errors in my code, but I can’t find it all day long.

Give me some tips.

+4
source share
1 answer

In the last line

cudaMemcpy(Mtx_on_GPU[i], d_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);

you are trying to copy data from the device to the host (NOTE: I assume that you have allocated host memory for pointers Mtx_on_GPU!)

However, pointers are stored in the device’s memory, so you cannot access directly from the host. The string should be

cudaMemcpy(Mtx_on_GPU[i], temp_ptr[i], sizeof(int)*SIZE, cudaMemcpyDeviceToHost);

" " :

int ** devicePointersStoredInDeviceMemory;
cudaMalloc( (void**)&devicePointersStoredInDeviceMemory, sizeof(int*)*N);

int* devicePointersStoredInHostMemory[N];
for(int i=0; i<N; i++)
    cudaMalloc( (void**)&devicePointersStoredInHostMemory[i], sizeof(int)*SIZE );

cudaMemcpy(
    devicePointersStoredInDeviceMemory, 
    devicePointersStoredInHostMemory,
    sizeof(int*)*N, cudaMemcpyHostToDevice);

// Invoke kernel here, passing "devicePointersStoredInDeviceMemory"
// as an argument
...

int* hostPointersStoredInHostMemory[N];
for(int i=0; i<N; i++) {
    int* hostPointer = hostPointersStoredInHostMemory[i]; 
    // (allocate memory for hostPointer here!)

    int* devicePointer = devicePointersStoredInHostMemory[i];

    cudaMemcpy(hostPointer, devicePointer, sizeof(int)*SIZE, cudaMemcpyDeviceToHost);
}

EDIT :

d_ptr - " ". cudaMalloc. , . , int* Mtx_on_GPU[N]; "" N . malloc. , :

int** pointersStoredInDeviceMemory;
cudaMalloc((void**)&pointersStoredInDeviceMemory, sizeof(int*)*N);

int** pointersStoredInHostMemory;
pointersStoredInHostMemory = (void**)malloc(N * sizeof(int*));

// This is not possible, because the array was allocated with cudaMalloc:
int *pointerA = pointersStoredInDeviceMemory[0];

// This is possible because the array was allocated with malloc:    
int *pointerB = pointersStoredInHostMemory[0];

,

  • ,
  • ,

, , 2 .

+5

Source: https://habr.com/ru/post/1540223/


All Articles