#include #i...">

CUDA numeric error

I just started experimenting with cuda with the next cude

#include "macro.hpp"
#include <algorithm>
#include <iostream>
#include <cstdlib>

//#define double float
//#define double int

int RandomNumber(){return static_cast<double>(rand() % 1000);}

__global__ void sum3(double const* a,
             double const* b,
             double const* c,
             double * result, 
             unsigned const* n)
{    
   unsigned i = blockIdx.x;
   while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}
};


int main()
{

  static unsigned size = 1e2;
  srand(0);
  double* a = new double[size];
  double* b = new double[size];
  double* c = new double[size];
  double* result = new double[size];

  std::generate(a, a+size, RandomNumber);
  std::generate(b, b+size, RandomNumber);
  std::generate(c, c+size, RandomNumber);

  double* ad, *bd,* cd;
  double* resultd;

  unsigned * sized;
  std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;

  cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

  //  print_array(a, size);
  for(int i = 0; i < 1000; ++i)
    {
      cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);      
      sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);
      cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);
    }

#ifdef PRINT
  for( int i = 0; i < size; ++i)
    {
      std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;
    }
#endif

  cudaFree(ad);
  cudaFree(bd);
  cudaFree(cd);
  cudaFree(resultd);

  delete[] a;
  delete[] b;
  delete[] c;
  delete[] result;

  return 0;
}

Compile this on mac book pro without any problems. However, when I try to run this, I get

930, 22,538,899
691, 832,205,23
415, 655,148,120
872, 876,481,985
761, 909,583,619
841, 104,466,917
610, 635,911,52
//more useless numbers

I compared my samples with one in the Cuda By Example, and I see no significant difference other than the type. Any pointer to this problem is appreciated.

+3
source share
2 answers
while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}

wrong (endlessly)

this is not true

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

&sized is the address of the pointer variable, not the value of the pointer

One number can be passed to the device on the stack, so use

unsigned size

check the return status of your cuda functions, http://www.drdobbs.com/high-performance-computing/207603131

+1

:

double* a = new double[size];

, "a" ,

  std::generate(a, a+size, RandomNumber);
  std::generate(b, b+size, RandomNumber);
  std::generate(c, c+size, RandomNumber);

,

  std::generate(*a, (*a)+size, RandomNumber);
  std::generate(*b, (*b)+size, RandomNumber);
  std::generate(*c, (*c)+size, RandomNumber);

, , .

,

 unsigned * sized;
 std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;

 unsigned * sized;
 std::cout << cudaMalloc((void*) ad, size*sizeof(double)) << std::endl;

, .

-1

Source: https://habr.com/ru/post/1763272/


All Articles