: , API .
, , JIT. , ( ) JCUDA, , API-. ...
, :
#include <math.h>
__device__ float mathop(float &x, float &y, float &z)
{
float res = sin(x) + cos(y) + sqrt(z);
return res;
}
extern __device__ float mathop(float & x, float & y, float & z);
__global__ void kernel(float *xvals, float * yvals, float * zvals, float *res)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
res[tid] = mathop(xvals[tid], yvals[tid], zvals[tid]);
}
PTX, :
$ nvcc -arch=sm_30 -ptx test_function.cu
$ nvcc -arch=sm_30 -ptx test_kernel.cu
$ head -14 test_kernel.ptx
.version 4.2
.target sm_30
.address_size 64
.extern .func (.param .b32 func_retval0) _Z6mathopRfS_S_
JIT, PTX , . , , . API- :
#include <cstdio>
#include <cuda.h>
#define drvErrChk(ans) { drvAssert(ans, __FILE__, __LINE__); }
inline void drvAssert(CUresult code, const char *file, int line, bool abort=true)
{
if (code != CUDA_SUCCESS) {
fprintf(stderr, "Driver API Error %04d at %s %d\n", int(code), file, line);
exit(-1);
}
}
int main()
{
cuInit(0);
CUdevice device;
drvErrChk( cuDeviceGet(&device, 0) );
CUcontext context;
drvErrChk( cuCtxCreate(&context, 0, device) );
CUlinkState state;
drvErrChk( cuLinkCreate(0, 0, 0, &state) );
drvErrChk( cuLinkAddFile(state, CU_JIT_INPUT_PTX, "test_function.ptx", 0, 0, 0) );
drvErrChk( cuLinkAddFile(state, CU_JIT_INPUT_PTX, "test_kernel.ptx" , 0, 0, 0) );
size_t sz;
char * image;
drvErrChk( cuLinkComplete(state, (void **)&image, &sz) );
CUmodule module;
drvErrChk( cuModuleLoadData(&module, image) );
drvErrChk( cuLinkDestroy(state) );
CUfunction function;
drvErrChk( cuModuleGetFunction(&function, module, "_Z6kernelPfS_S_S_") );
return 0;
}
. JCUDA, JIT.