Passing a Storable Vector to a C function that allocates stored data to the GPU

I have the following C code:

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>

double enorm(double* v1, int length){
    cublasHandle_t handle;
    double result = 0;
    double* vector;
    cudaMalloc((void**) &vector, length * sizeof(double));
    cublasSetVector(length, sizeof(double), v1, 1, vector,1);
    cublasCreate(&handle);
    cublasDnrm2(handle, length, vector, 1, &result);
    cudaFree(vector);
    return result;
}

double testnorm(double* v1, int len){
    double tmp = 0;
    for(int i = 0; i < len; i++){
        tmp += v1[i]*v1[i];
    }
    return sqrt(tmp);
}

int main() {
    double* a = malloc(2 * sizeof(double));
    a[0] = 3;
    a[1] = 4;
    printf("%.f\n", enorm(a, 2));
    printf("%.f\n", testnorm(a,2));
    return 0;
}

And after the Haskell code that borrows the functions from above:

import qualified Foreign.Ptr as P
import System.IO.Unsafe
import Foreign.C.Types
import qualified Data.Vector.Storable as SV
import Foreign.C.Types

foreign import ccall "enorm" c_enorm :: P.Ptr CDouble -> CInt -> CDouble

foreign import ccall "testnorm" c_testnorm :: P.Ptr CDouble -> CInt -> CDouble

enorm :: SV.Vector CDouble -> CDouble
enorm v1 = unsafePerformIO $ do
  let len = fromIntegral $ SV.length v1
  SV.unsafeWith v1 $ \ptr -> return (c_enorm ptr len)

testnorm :: SV.Vector CDouble -> CDouble
testnorm v1 = unsafePerformIO $ do
  let len = fromIntegral $ SV.length v1
  SV.unsafeWith v1 $ \ptr -> return (c_testnorm ptr len)

main :: IO ()
main = do
  let a = SV.fromList [3,4] :: SV.Vector CDouble
  print $ enorm a
  print $ testnorm a

For some reason, although C will return as expected 5 and 5, the Haskell equivalent returns 0 and 5, which means that even if the array is successfully passed to C, there is some problem copying this data to the GPU . What is the reason for this behavior and how to do it to give the expected result?

C compiled with:

gcc -o cmain blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm

Haskell:

gcc -c -fPIC -o testblas.o blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
gcc -shared -o libtestblas.so  testblas.o
ghc -o main Main.hs -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm -L./ -ltestblas
export LD_LIBRARY_PATH=./; ./main 

EDIT: in Haskell, cudaMalloc returns cudaErrorMemoryAllocation (cannot allocate enough memory), and in C everything ends with success.

+4
source share

Source: https://habr.com/ru/post/1665469/


All Articles