CUDA numeric error
I just started experimenting with cuda with the next cude
#include "macro.hpp"
#include <algorithm>
#include <iostream>
#include <cstdlib>
//#define double float
//#define double int
int RandomNumber(){return static_cast<double>(rand() % 1000);}
__global__ void sum3(double const* a,
double const* b,
double const* c,
double * result,
unsigned const* n)
{
unsigned i = blockIdx.x;
while(i < (*n))
{
result[i] = (a[i] + b[i] + c[i]);
}
};
int main()
{
static unsigned size = 1e2;
srand(0);
double* a = new double[size];
double* b = new double[size];
double* c = new double[size];
double* result = new double[size];
std::generate(a, a+size, RandomNumber);
std::generate(b, b+size, RandomNumber);
std::generate(c, c+size, RandomNumber);
double* ad, *bd,* cd;
double* resultd;
unsigned * sized;
std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;
cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);
// print_array(a, size);
for(int i = 0; i < 1000; ++i)
{
cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);
sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);
cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);
}
#ifdef PRINT
for( int i = 0; i < size; ++i)
{
std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;
}
#endif
cudaFree(ad);
cudaFree(bd);
cudaFree(cd);
cudaFree(resultd);
delete[] a;
delete[] b;
delete[] c;
delete[] result;
return 0;
}
Compile this on mac book pro without any problems. However, when I try to run this, I get
930, 22,538,899
691, 832,205,23
415, 655,148,120
872, 876,481,985
761, 909,583,619
841, 104,466,917
610, 635,911,52
//more useless numbers
I compared my samples with one in the Cuda By Example, and I see no significant difference other than the type. Any pointer to this problem is appreciated.
while(i < (*n))
{
result[i] = (a[i] + b[i] + c[i]);
}
wrong (endlessly)
this is not true
cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);
&sized is the address of the pointer variable, not the value of the pointer
One number can be passed to the device on the stack, so use
unsigned size
check the return status of your cuda functions, http://www.drdobbs.com/high-performance-computing/207603131
:
double* a = new double[size];
, "a" ,
std::generate(a, a+size, RandomNumber);
std::generate(b, b+size, RandomNumber);
std::generate(c, c+size, RandomNumber);
,
std::generate(*a, (*a)+size, RandomNumber);
std::generate(*b, (*b)+size, RandomNumber);
std::generate(*c, (*c)+size, RandomNumber);
, , .
,
unsigned * sized;
std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
unsigned * sized;
std::cout << cudaMalloc((void*) ad, size*sizeof(double)) << std::endl;
, .