I am trying to understand the basics but I find it hard. That it why I call for help. I have not found examples, especially with 2D arrays. I have tried to solve it with the following code but the problem remains:
gpu.Launch(512, 512, NeighborhoodKernel, gpuConstValue, gpuMemIn, gpuMemDistIn, gpuMemOut);
gpu.CopyFromDevice(gpuMemOut, resultArray);
[Cudafy]
public static void NeighborhoodKernel(GThread thread, int[] constValues, float[,] dataIn, int[] distInSqr, float[,] dataOut)
{
int nbRows = constValues[0];
int nbCols = constValues[1];
int thresHold = constValues[2];
int radiusNbCells = constValues[3];
int x = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
int y = thread.threadIdx.y + thread.blockIdx.y * thread.blockDim.y;
while (x < nbRows)
{
while (y < nbCols)
{
dataOut[x, y] = GetNeighborhoodValue(x, y, radiusNbCells, distInSqr, nbRows, nbCols, dataIn, thresHold);
thread.SyncThreads();
y += thread.blockDim.y * thread.gridDim.y;
}
x += thread.blockDim.x*thread.gridDim.x;
}
}
[Cudafy]
public static float GetNeighborhoodValue(int i, int j, int radiusNbCells, int[] distInSqr, int nbRows, int nbCols,
float[,] dataIn, int thresHold)
{
float tot = 0;
int maxRadiusNbCellsSqr = radiusNbCells * radiusNbCells;
int relRowNum = -1;
for (int row = i - radiusNbCells; row <= i + radiusNbCells; row++)
{
relRowNum++;
int distIndex = relRowNum * (2 * radiusNbCells + 1) - 1;
if (row >= nbRows)
break;
if (row < 0)
continue;
for (int col = j - radiusNbCells; col <= j + radiusNbCells; col++)
{
distIndex++;
if (col >= nbCols)
break;
if (col < 0)
continue;
if (distInSqr[distIndex] <= maxRadiusNbCellsSqr && dataIn[row,col] > 0.0f)
{
tot += dataIn[row,col];
}
}
}
// Reclassify
return (tot >= thresHold) ? 1.0f : 0.0f;
}