/*
 * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

__global__ 
void d_bilateral_filter(
    float* in, 
    float *out,
    int w, 
    int h, 
    int r
    )
{
    uint x = (blockIdx.x * blockDim.x) + threadIdx.x;
    uint y = (blockIdx.y * blockDim.y) + threadIdx.y;

    if (x < w && y < h) 
    {
        float weightSum = 0.0f;
        float weight;
        float vSum = 0.0f;

        for(int i = -r; i <= r; i++)
        {
            for(int j = -r; j <= r; j++)
            {
                int srcX = (x + j);
                int srcY = (y + i);
                if (srcX < 0 || srcX >= w)
                    continue;
                if (srcY < 0 || srcY >= h)
                    continue;

                float v = in[(x + j) + (y + i) * w];
                weight = 1;
                if (v == 999999)
                    weight = 0;
                vSum += weight * v;
                weightSum += weight;
            }
        }

        out[y * w + x] = vSum / weightSum;
    }
}