CudaMemcpyFromSymbol on device variable

Question

CudaMemcpyFromSymbol on device variable

I am trying to apply a kernel function in a variable __device__that, according to the specifications, is in "global memory"

#include <stdio.h>
#include "sys_data.h"
#include "my_helper.cuh"
#include "helper_cuda.h"
#include <cuda_runtime.h>


double X[10] = {1,-2,3,-4,5,-6,7,-8,9,-10};
double Y[10] = {0};
__device__ double DEV_X[10];


int main(void) {
    checkCudaErrors(cudaMemcpyToSymbol(DEV_X, X,10*sizeof(double)));
    vector_projection<double><<<1,10>>>(DEV_X, 10);
    getLastCudaError("oops");
    checkCudaErrors(cudaMemcpyFromSymbol(Y, DEV_X, 10*sizeof(double)));
    return 0;
}

The kernel function is vector_projectiondefined my_helper.cuhas follows:

template<typename T> __global__ void vector_projection(T *dx, int n) {
    int tid;
    tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < n) {
        if (dx[tid] < 0)
            dx[tid] = (T) 0;
    }
}

As you can see, I use cudaMemcpyToSymbolboth cudaMemcpyFromSymbolto transfer data to and from the device. However, I get the following error:

CUDA error at ../src/vectorAdd.cu:19 code=4(cudaErrorLaunchFailure) 
  "cudaMemcpyFromSymbol(Y, DEV_X, 10*sizeof(double))"

Footnote: I can, of course, not use variables __device__and look for something like this that works fine; I just want to see how to do the same (if possible) using variables __device__.

: cuda-memcheck http://pastebin.com/AW9vmjFs. , , :

========= Invalid __global__ read of size 8
=========     at 0x000000c8 in /home/ubuntu/Test0001/Debug/../src/my_helper.cuh:75:void vector_projection<double>(double*, int)
=========     by thread (9,0,0) in block (0,0,0)
=========     Address 0x000370e8 is out of bounds

+2

gpu gpgpu cuda

Pantelis Sopasakis 27 . '14 15:00

1

Robert Crovella · Accepted Answer · 2014-09-27T15:37:58+0000

, :

vector_projection<double><<<1,10>>>(DEV_X, 10);
                                    ^

, , , .

, cudaGetSymbolAddress

, :

$ cat t577.cu
#include <stdio.h>

double X[10] = {1,-2,3,-4,5,-6,7,-8,9,-10};
double Y[10] = {0};
__device__ double DEV_X[10];

template<typename T> __global__ void vector_projection(T *dx, int n) {
    int tid;
    tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < n) {
        if (dx[tid] < 0)
            dx[tid] = (T) 0;
    }
}



int main(void) {
    cudaMemcpyToSymbol(DEV_X, X,10*sizeof(double));
    double *my_dx;
    cudaGetSymbolAddress((void **)&my_dx, DEV_X);
    vector_projection<double><<<1,10>>>(my_dx, 10);
    cudaMemcpyFromSymbol(Y, DEV_X, 10*sizeof(double));
    for (int i = 0; i < 10; i++)
      printf("%d: %f\n", i, Y[i]);
    return 0;
}
$ nvcc -arch=sm_35 -o t577 t577.cu
$ cuda-memcheck ./t577
========= CUDA-MEMCHECK
0: 1.000000
1: 0.000000
2: 3.000000
3: 0.000000
4: 5.000000
5: 0.000000
6: 7.000000
7: 0.000000
8: 9.000000
9: 0.000000
========= ERROR SUMMARY: 0 errors
$

. , :

T *dx = DEV_X;

. , , Unified Memory.

, cuda , . API cuda , , - , , CUDA ( ).

CudaMemcpyFromSymbol on __device__ variable

More articles:

CudaMemcpyFromSymbol on device variable