/*
 * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

#ifndef NVPARTICLES_GRID_CUDA_H_INCLUDED
#define NVPARTICLES_GRID_CUDA_H_INCLUDED

#include "NvParticlesExports.h"
#include "math_utils.h"
#include "cuda_std_utils.h"
#include "std_utils.h"

#define NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST
#define NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE 32
#define NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_MAX_NEIBS 192
//#define NVPARTICLES_SPATIAL_GRID_USE_THRUST
//#define NVPARTICLES_SPATIAL_GRID_USE_CUDPP
#define NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION 3
#define NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION 256

namespace Easy
{
namespace NvParticles
{

class ParticleGrid;

namespace SpatialGrid
{

//------------------------------------------------------------------------------------------
#if !defined(NVPARTICLES_USE_SORT_CUDPP) && !defined(NVPARTICLES_USE_SORT_THRUST)

#define NVPARTICLES_SPATIAL_GRID_SET_ITEM_CELL_INDEX(spatialGridData, i, itemCellIndex) \
    ((uint2*)spatialGridData.itemCellIndices)[i] = make_uint2(itemCellIndex, i)

#define NVPARTICLES_SPATIAL_GRID_GET_SORTED_ITEM_INDEX(spatialGridData, i) \
	((uint2 *)spatialGridData.itemCellIndices)[i].y

#define NVPARTICLES_SPATIAL_GRID_GET_ITEM_CELL_INDEX(spatialGridData, i) \
	((uint2 *)spatialGridData.itemCellIndices)[i].x

#else

#define NVPARTICLES_SPATIAL_GRID_SET_ITEM_CELL_INDEX(spatialGridData, i, itemCellIndex) \
    spatialGridData.itemCellIndices[i] = itemCellIndex;\
    spatialGridData.sortedItemCellIndices[i] = i

#define NVPARTICLES_SPATIAL_GRID_GET_SORTED_ITEM_INDEX(spatialGridData, i) \
	spatialGridData.sortedItemCellIndices[i]

#define NVPARTICLES_SPATIAL_GRID_GET_ITEM_CELL_INDEX(spatialGridData, i) \
	spatialGridData.itemCellIndices[i]

#endif

//------------------------------------------------------------------------------------------
struct AdjacencyListData
{
	uint adjacencyListMaxItems;
	uint adjacencyListNumItems;
    uint adjacencyListPitch; // pitch, IN ELEMENTS, NOT BYTES
	uint adjacencyListSize;
	uint* adjacencyListPtr;
};

//------------------------------------------------------------------------------------------
struct SpatialGridData
{
    uint *cellFirstItemIndices;
    uint *cellLastItemIndices;
    uint *itemCellIndices;
    uint *sortedItemCellIndices;
    uint *itemUsedCellIndices;
#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)
	AdjacencyListData adjacencyData;
#endif
};

//------------------------------------------------------------------------------------------
struct SpatialGridParameters
{
    uint3 bucketCount;
    vec4f low;
    vec4f high;
    vec4f countPerCell;
    vec4f cellSize;
	mat44f xform;
	mat44f xformInv;
	uint boundaryMode;
#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)
	AdjacencyListData adjacencyData;
#endif
};

//------------------------------------------------------------------------------------------
extern "C" _NvParticlesExport void gridSort(int start, int count, int maxItems, int maxCells, SpatialGridData data, cudaStream_t stream=0);
extern "C" _NvParticlesExport void UploadParams(const SpatialGridParameters* h_parameters, cudaStream_t stream=0);
extern "C" _NvParticlesExport void gridComputeCellRanges(int start, int count, uint* itemCellIndices, uint numCells, uint* out_cellsStartIndex, uint* out_cellsEndIndex, cudaStream_t stream=0);
extern "C" _NvParticlesExport void gridComputeCellStarts(int start, int count, uint* itemCellIndices, uint nCells, uint* out_cellsStartIndex, cudaStream_t stream=0);
extern "C" _NvParticlesExport void gridSortBuffer(int numElements, int elementBytes, void* inData, void* outData, SpatialGrid::SpatialGridData spatialGridData, cudaStream_t stream=0);

//-------------------------------------------------------------------
/// Given a world-position, calculate the quantized cell-location.
/// This can return cell-locations that are out of bounds.
///
template <class T>
struct PosCellFunctor
{
    inline static NVPARTICLES_CUDA_EXPORT
	int3 getCoord(T pos, mat44f xformInv, uint3 cellCount)
    {
        int3 coord;
		vec3f gridPoint = xformInv.multiplyPoint(make_vec3f(pos.x, pos.y, pos.z));
		// floor it by converting to uint
        coord.x = int( ((gridPoint.x+1.0f)*0.5f) * (cellCount.x) );
        coord.y = int( ((gridPoint.y+1.0f)*0.5f) * (cellCount.y) );
        coord.z = int( ((gridPoint.z+1.0f)*0.5f) * (cellCount.z) );
        return coord;
    }

	inline static NVPARTICLES_CUDA_EXPORT
	T getPos(int3 coord, mat44f xform, uint3 cellCount)
	{
		vec3f gridPoint;
        gridPoint.x = (float(coord.x) / cellCount.x) * 2.0f - 1.0f;
        gridPoint.y = (float(coord.y) / cellCount.y) * 2.0f - 1.0f;
        gridPoint.z = (float(coord.z) / cellCount.z) * 2.0f - 1.0f;
		gridPoint = xform.multiplyPoint(gridPoint);
		T pos;
		pos.x = gridPoint.x;
		pos.y = gridPoint.y;
		pos.z = gridPoint.z;
		return pos;
	}

    inline static NVPARTICLES_CUDA_EXPORT
	int3 getCoord(T pos, vec4f low, vec4f countPerCell)
    {
        int3 coord;
        coord.x = int(floorf((pos.x - low.x) * countPerCell.x));
        coord.y = int(floorf((pos.y - low.y) * countPerCell.y));
        coord.z = int(floorf((pos.z - low.z) * countPerCell.z));
        return coord;
    }

    inline static NVPARTICLES_CUDA_EXPORT
	int3 getCoord(T pos, vec4f low, vec4f high, uint3 cellCount)
    {
        vec4f countPerCell;
        vec4f gridSize;
		gridSize.x = (high.x-low.x);
		gridSize.y = (high.y-low.y);
		gridSize.z = (high.z-low.z);
        countPerCell.x = (float)cellCount.x / gridSize.x;
        countPerCell.y = (float)cellCount.y / gridSize.y;
        countPerCell.z = (float)cellCount.z / gridSize.z;
		return Coord(pos, low, countPerCell);
    }

	inline static NVPARTICLES_CUDA_EXPORT
	T getPos(int3 coord, vec4f low, vec4f countPerCell)
	{
		T pos;
		pos.x = low.x + coord.x / countPerCell.x;
		pos.y = low.y + coord.y / countPerCell.y;
		pos.z = low.z + coord.z / countPerCell.z;
		return pos;
	}

	inline static NVPARTICLES_CUDA_EXPORT
	T getPos(int3 coord, uint3 cellCount, vec4f low, vec4f high)
	{
        vec4f gridSize;
		gridSize.x = (high.x-low.x);
		gridSize.y = (high.y-low.y);
		gridSize.z = (high.z-low.z);
		vec4f countPerCell;
        countPerCell.x = (float)cellCount.x / gridSize.x;
        countPerCell.y = (float)cellCount.y / gridSize.y;
        countPerCell.z = (float)cellCount.z / gridSize.z;
		return getPos(coord, low, countPerCell);
		/*
		float3 normCoord;
		normCoord.x = float(coord.x)/(cellCount.x-1) - 0.001f;
		normCoord.y = float(coord.y)/(cellCount.y-1) - 0.001f;
		normCoord.z = float(coord.z)/(cellCount.z-1) - 0.001f;
		float3 pos;
		pos.x = low.x + normCoord.x * gridSize.x;
		pos.y = low.y + normCoord.y * gridSize.y;
		pos.z = low.z + normCoord.z * gridSize.z;
		return pos;*/
	}
};

//-------------------------------------------------------------------
/// Get the hash-id from cell-position.
/// This has the option of clamping or wrapping out-of-bounds cells.
/// It also has a faster version which requires a power-of-two grid resolution.
///
template <bool IS_PERIODIC, bool IS_POWER_OF_TWO>
struct CellHashFunctor
{
    inline static NVPARTICLES_CUDA_EXPORT
	uint getHash(int3 coord, uint3 cellCount)
    {
		int gx, gy, gz;
		if (IS_PERIODIC)
		{
			int gsx = int(cellCount.x);
			int gsy = int(cellCount.y);
			int gsz = int(cellCount.z);

			if (IS_POWER_OF_TWO)
			{
	 			//power of 2 wrapping...
	 			gx = coord.x & (gsx-1);
	 			gy = coord.y & (gsy-1);
	 			gz = coord.z & (gsz-1);
			}
			else
			{
				// use slow modulus operation...
				gx = coord.x % gsx;
				gy = coord.y % gsy;
				gz = coord.z % gsz;
			}

			while(gx < 0)
				gx += gsx;
			while(gy < 0)
				gy += gsy;
			while(gz < 0)
				gz += gsz;
		}
		else
		{
			// clamp to the bounds...
            gx = max(0, gx);
            gy = max(0, gy);
            gz = max(0, gz);
			gx = min(coord.x, int(cellCount.x)-1);
			gy = min(coord.y, int(cellCount.y)-1);
			gz = min(coord.z, int(cellCount.z)-1);
		}

		// calculate the hash from the coordinate.
        return ((gz * cellCount.y) * cellCount.x) + (gy * cellCount.x) + gx;
    }

    inline static NVPARTICLES_CUDA_EXPORT
	int3 getCoord(uint hash, uint3 cellCount)
    {
		if (IS_POWER_OF_TWO)
		{
			int x = hash & (cellCount.x-1);
			int y = (hash / cellCount.x) & (cellCount.y-1);
			int z = hash / (cellCount.x*cellCount.y);
			return make_int3(x, y, z);
		}
		else
		{
			int x = hash % cellCount.x;
			int y = (hash / cellCount.x) % cellCount.y;
			int z = hash / (cellCount.x*cellCount.y);
			return make_int3(x, y, z);
		}
	}
};

}
}
}

#endif // NVPARTICLES_GRID_CUDA_H_INCLUDED
