Skip to main content
. Author manuscript; available in PMC: 2014 May 1.
Published in final edited form as: J Parallel Distrib Comput. 2013 Jan 16;73(5):10.1016/j.jpdc.2013.01.001. doi: 10.1016/j.jpdc.2013.01.001

Algorithm 2.

Pseudo code for parallel gridding operation on GPU with the compact binning.

00 __shared__ inElem sharedLocalBin[/*max size*/];
01 outputIdx index = computeOutputIndex(blockIdx, threadIdx);
02 outElem myOutElem = initOutElem(index);
03 int zLo = z0 cutoff;
04 int zHi = z0 + blockDim.z + cutoff;
05 // compute yLo, yHi, xLo, xHi similarly
06 for z = [zLo:zHi]{
07  for y = [yLo:yHi]{
08   for x = [xLo:xHi]{
09    int start = binOffsetArray[z][y][x];
10    int end = binOffsetArray[z][y][x+1];
11    if(threadIdx < end-start){
12     sharedLocalBin[threadIdx] = globalBinArray[start+threadIdx];
13    }
14    __syncthreads();
15    for i=[0:end-start]{
16     if(|sharedLocalBin[i].coords myOutElem.coords| < kernel-width){
17      /*compute the contribution of this input onto the output*/
18 } } } } }
19 globalOutputGrid[index] = myOutElem;