When running the GPU code below I get a black screen. Graphics card: Nvidia Quadro K620.
The code is not complete, but the critical stuff is included.
Problem description: A moving circular window scans over a GIS raster, so that for each cell in the raster a neighborhood is evaluated. This evaluation consists of summing the values for all cells in the neighborhood, if the center cell is larger than zero. If the sum exceeds a certain threshold value, the cell at the center of the window will be assigned value 1, otherwise 0. The result is later saved in a new two-dimensional array that is used to create a new raster.
The source raster in this case is a DotSpatial raster object. Raster size = 6784 rows, 3480 columns. The neighborhood radius is 15 cells long.
The CPU-implementation works as expected. The GPU version however gives a black screen at runtime expect for very small neighborhoods (1-2 cells radius). Could there be some error when trying to access indata in the neighborhood?
(Note: For some reason all plus-symbols in the code below are replaced with + when submittting this post, although it looks prefect in Preview mode)
CPU-implementation:
The code is not complete, but the critical stuff is included.
Problem description: A moving circular window scans over a GIS raster, so that for each cell in the raster a neighborhood is evaluated. This evaluation consists of summing the values for all cells in the neighborhood, if the center cell is larger than zero. If the sum exceeds a certain threshold value, the cell at the center of the window will be assigned value 1, otherwise 0. The result is later saved in a new two-dimensional array that is used to create a new raster.
The source raster in this case is a DotSpatial raster object. Raster size = 6784 rows, 3480 columns. The neighborhood radius is 15 cells long.
The CPU-implementation works as expected. The GPU version however gives a black screen at runtime expect for very small neighborhoods (1-2 cells radius). Could there be some error when trying to access indata in the neighborhood?
(Note: For some reason all plus-symbols in the code below are replaced with + when submittting this post, although it looks prefect in Preview mode)
CPU-implementation:
int threshold = 50;
int radiusNbCells = sourceRaster.CellWidth; // (= 20)
int radiusNbCellsSqr = radiusNbCells * radiusNbCells;
Parallel.For(0, lastRow+1, i =>
{
for (int j = 0; j <= lastCol; j++)
{
if (raster.Value[i, j] > 0)
{
var val = getNeighborhoodValue(i, j, radiusNbCells,
lastRow, lastCol, sourceRaster, thresHold);
resultRaster.Value[i, j] = val;
}
else
{
resultRaster.Value[i, j] = 0;
}
}
});
private double getNeighborhoodValue(int i, int j, int radiusNbCells,
int lastRow, int lastCol, IRaster raster, int thresHold)
{
double tot = 0.0;
int maxRadiusNbCellsSqr = radiusNbCells* radiusNbCells;
// Bounding box = cell row +/- radusNbCells, cell column +/- radusNbCells
for (int row = i - radiusNbCells; row <= i + radiusNbCells; row++)
{
if (row > lastRow || row < 0)
continue;
int y2 = (row - i) * (row - i);
for (int col = j - radiusNbCells; col <= j + radiusNbCells; col++)
{
if (col > lastCol || col < 0)
continue;
if (raster.Value[row, col] > 0.0)
{
var x2 = (col - j)*(col - j);
var distSqr = (x2 + y2);
if (distSqr <= maxRadiusNbCellsSqr)
tot += raster.Value[row, col];
}
}
}
return (tot >= thresHold) ? 1.0 : 0.0;
}
GPU-implementation:public class HabitatModeGpu
{
public IRaster Execute(IRaster raster, int thresHold)
{
var mb = new float[nbRows, nbCols];
var cpuMemIn = new float[nbRows, nbCols];
var resultArray = new float[nbRows, nbCols];
var resultRaster = new Raster(…)
…
int i2 = 0;
for (var i = 0; i <= lastRow; i++)
{
for (var j = 0; j <= lastCol; j++)
{
cpuMemIn[i, j] = (float) raster.Value[i, j];
}
}
var gpu = NewCuda();
var gpuMemIn = gpu.Allocate(mb);
var gpuMemOut = gpu.Allocate(resultArray);
..
gpu.CopyToDevice(cpuMemIn, gpuMemIn);
gpu.Launch(nbRows, 1, NeighborhoodKernel, gpuMemIn, gpuMemOut, radiusNbCells,
lastRow, lastCol, thresHold);
gpu.Synchronize(); // Required??
gpu.CopyFromDevice(gpuMemOut, resultArray);
// Copy to new raster
Parallel.For(0, lastRow+1, (i) =>
{
for (var j = firstCol; j <= lastCol; j++)
{
resultRaster.Value[i, j] = resultArray[i, j];
}
});
gpu.FreeAll();
return resultRaster;
}
public static GPGPU NewCuda()
{
GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);
eArchitecture arch = gpu.GetArchitecture();
CudafyModule km = CudafyTranslator.Cudafy(arch);
gpu.LoadModule(km);
return gpu;
}
[Cudafy]
public static float GetNeighborhoodValue(int i, int j, int radiusNbCells, int lastRow,
int lastCol, float[,] dataIn, int thresHold)
{
float tot = 0;
int radiusNbCellsSqr = radiusNbCells * radiusNbCells;
for (int row = i - radiusNbCells; row <= i + radiusNbCells; row++)
{
if (row > lastRow || row < 0) continue;
int y2 = (row - i) * (row - i);
for (int col = j - radiusNbCells; col <= j + radiusNbCells; col++)
{
if (col > lastCol || col < 0) continue;
if (dataIn[row,col] > 0.0f)
{
int x2 = (col - j)*(col - j);
// Check within circle x2 + y2 = maxRadiusSqr expressed in terms of number
// of cells
int dist2 = (x2 + y2);
if (dist2 <= radiusNbCellsSqr) tot += dataIn[row, col];
}
}
}
// Reclassify
return (tot >= thresHold) ? 1.0f : 0.0f;
}
[Cudafy]
public static void NeighborhoodKernel(GThread gThread, float[,] dataIn, float[,]
dataOut, int radiusNbCells, int lastRow, int lastCol, int thresHold)
{
int x = gThread.blockIdx.x;
int nbCols = lastCol + 1;
for (int y = 0; y <= lastCol; y++)
{
dataOut[x, y] = GetNeighborhoodValue(x, y, radiusNbCells, lastRow,
lastCol, dataIn, thresHold);
}
}