I'm trying to use Alea to speed up the program I'm working on, but I need help.
I need to do a lot of bit matching and bitwise operations with values stored in two arrays.
For each element of my first array, I must perform bitwise and operation with each element of my second array, and then count the bit set to 1 from the result and.
If the result is greater than / equal to a certain value, I need to exit the inner one and move on to the next element of my first array.
The first array is usually large, with millions of elements, the second is usually less than 200,000 elements.
Trying to do all these operations in parallel, here is my code:
[GpuManaged]
private long[] Check(long[] arr1, long[] arr2, int limit)
{
Gpu.FreeAllImplicitMemory(true);
var gpu = Gpu.Default;
long[] result = new long[arr1.Length];
gpu.For(0, arr1.Length, i =>
{
bool found = false;
long b = arr1[i];
for (int i2 = 0; i2 < arr2.Length; i2++)
{
if (LibDevice.__nv_popcll(b & arr2[i2]) >= limit)
{
found = true;
break;
}
}
if (!found)
{
result[i] = b;
}
});
return result;
}
, , , , .
, , - , GPU.
, NVIDIA - GeForce GT 740M.
2 , . , .
private static int[] CheckWithKernel(Gpu gpu, int[] arr1, int[] arr2, int limit)
{
var lp = new LaunchParam(16, 256);
var result = new int[arr1.Length];
try
{
using (var dArr1 = gpu.AllocateDevice(arr1))
using (var dArr2 = gpu.AllocateDevice(arr2))
using (var dResult = gpu.AllocateDevice<int>(arr1.Length))
{
gpu.Launch(Kernel, lp, arr1.Length, arr2.Length, dArr1.Ptr, dArr2.Ptr, dResult.Ptr, limit);
Gpu.Copy(dResult, result);
return result;
}
}
finally
{
Gpu.Free(arr1);
Gpu.Free(arr2);
Gpu.Free(result);
}
}
private static void Kernel(int a1, int a2, deviceptr<int> arr1, deviceptr<int> arr2, deviceptr<int> arr3, int limit)
{
var iinit = blockIdx.x * blockDim.x + threadIdx.x;
var istep = gridDim.x * blockDim.x;
for (var i = iinit; i < a1; i += istep)
{
bool found = false;
int b = arr1[i];
for (var j = 0; j < a2; j++)
{
if (LibDevice.__nv_popcll(b & arr2[j]) >= limit)
{
found = true;
break;
}
}
if (!found)
{
arr3[i] = b;
}
}
}