/*
 * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

#include "gl_utils.h"
#include "math_utils.h"
#include "cuda_utils.h"
#include "std_utils.h"

#include "NvParticlesGrid.h"
#include "NvParticlesParticleSolverImpl.h"
#include "NvParticlesProfiler.h"
#include <float.h>

namespace Easy
{
namespace NvParticles
{

inline float4 ceil(const float4 v)
{
    return make_float4(::ceil(v.x), ::ceil(v.y), ::ceil(v.z), ::ceil(v.w));
}

//------------------------------------------------------------------------------------------
ParticleGrid::ParticleGrid()
{
    _init();
}
//------------------------------------------------------------------------------------------
void ParticleGrid::_init()
{
    drawLabels = false;
    _isGridDataCopiedToHost = false;

    // grid variables
    maxParticles = 0;
    numCells = 0;
    numAllocatedCells = 0;
    num_occupied_cells = 0;
    bucketCount = make_uint3(1);
    bucketSize = make_vec4f(1.0);

	adjacencyListMaxItems = 0;
	adjacencyListNumItems = 0;
    adjacencyListPitch = 0;

	xform = mat44f::identity();
}
//------------------------------------------------------------------------------------------
bool ParticleGrid::setup(int n)
{
    maxParticles = n;

    bool rc = true;

#if !defined(NVPARTICLES_SPATIAL_GRID_USE_CUDPP) && !defined(NVPARTICLES_SPATIAL_GRID_USE_THRUST)
    // using radixsort (KeyValuePair)
    // double the length for ping-pong radix computation
    rc &= d_particlesBucketIdMem.Allocate(Cu::Buffer::CUDA, 2 * sizeof(uint2) * n, 0, "d_itemCellIndicesWithSorted");
    d_particlesBucketIdMem.Clear(-1);
    rc &= h_particlesBucketIdMem.Allocate(Cu::Buffer::HOST, sizeof(uint2) * n, 0, "h_itemCellIndicesWithSorted");
    h_particlesBucketIdMem.Clear(-1);

#else
    rc &= d_particlesBucketIdMem.Allocate(Cu::Buffer::CUDA, sizeof(uint) * n, 0, "d_itemCellIndices");
    d_particlesBucketIdMem.Clear(-1);
    rc &= h_particlesBucketIdMem.Allocate(Cu::Buffer::HOST, d_particlesBucketIdMem.Size(), 0, "h_itemCellIndices");
    h_particlesBucketIdMem.Clear(-1);

    rc &= d_particlesSortedIndexMem.Allocate(Cu::Buffer::CUDA, sizeof(uint)*n, 0, "d_sortedItemCellIndices");
    d_particlesSortedIndexMem.Clear(-1);
    rc &= h_particlesSortedIndexMem.Allocate(Cu::Buffer::HOST, d_particlesSortedIndexMem.Size(), 0, "h_sortedItemCellIndices");
    h_particlesSortedIndexMem.Clear(-1);

    rc &= d_itemUsedCells.Allocate(Cu::Buffer::CUDA, sizeof(uint) * n, 0, "d_itemUsedCells");
    d_itemUsedCells.Clear(-1);
    rc &= h_itemUsedCells.Allocate(Cu::Buffer::HOST, d_itemUsedCells.Size(), 0, "h_itemUsedCells");
    h_itemUsedCells.Clear(-1);

#endif

#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)

#if defined(NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE)

	adjacencyListNumItems = maxParticles;
	adjacencyListMaxItems = NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_MAX_NEIBS;
	adjacencyListMaxItems = max(adjacencyListMaxItems, NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE);

	assert(adjacencyListMaxItems%NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE == 0);

	adjacencyListPitch = adjacencyListMaxItems * NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE;
	adjacencyListSize = adjacencyListMaxItems*(adjacencyListNumItems/NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE+1)*NVPARTICLES_SPATIAL_GRID_ADJACENCY_LIST_INTERLEAVE;

	rc &= d_neighborsBuffer.Allocate(Cu::Buffer::CUDA, sizeof(uint)*adjacencyListSize, 0, "d_neighbors");
    d_neighborsBuffer.Clear(-1);
#else
	// neighbor lists

	adjacencyListNumItems = maxParticles;
	adjacencyListMaxItems = ::min(maxParticles, 64);
    adjacencyListPitch = adjacencyListMaxItems;

	adjacencyListSize = adjacencyListNumItems*adjacencyListPitch;

    rc &= d_neighborsBuffer.Allocate(Cu::Buffer::CUDA, sizeof(uint)*adjacencyListSize, 0, "d_neighbors");
    d_neighborsBuffer.Clear(-1);
#endif

#endif

    return rc;
}

//------------------------------------------------------------------------------------------
ParticleGrid::~ParticleGrid()
{

}

//------------------------------------------------------------------------------------------
/// setup a new cell volume based around the particle bounds and the bucketSize
/// (i.e. quantize to bucketSize - this is easier to read on the screen)
///
bool ParticleGrid::setCellSize(boundingbox4f bbox, vec4f s, int nLevels)
{
    assert(s.x > 0 && s.y > 0 && s.z > 0);

    bucketSize = s;
	particles_bbox = bbox;
	vec4f bboxSize;

	const bool quantize = false;

	if (quantize)
	{
		// quantize to bucketSize...
		particles_bbox.low.x = floor(bbox.low.x/bucketSize.x) * bucketSize.x;
		particles_bbox.low.y = floor(bbox.low.y/bucketSize.y) * bucketSize.y;
		particles_bbox.low.z = floor(bbox.low.z/bucketSize.z) * bucketSize.z;
		particles_bbox.high = ceil(bbox.high/bucketSize)*bucketSize;
		bboxSize = particles_bbox.high - particles_bbox.low;

		bucketCount = make_uint3((uint)ceilf(bboxSize.x/bucketSize.x),
								 (uint)ceilf(bboxSize.y/bucketSize.y),
								 (uint)ceilf(bboxSize.z/bucketSize.z));
	}
	else
	{
		// expand bucketSize to fit the bounds precisely...
		/*particles_bbox.low.x = floor(bbox.low.x/bucketSize.x) * bucketSize.x;
		particles_bbox.low.y = floor(bbox.low.y/bucketSize.y) * bucketSize.y;
		particles_bbox.low.z = floor(bbox.low.z/bucketSize.z) * bucketSize.z;
		particles_bbox.high = ceil(bbox.high/bucketSize)*bucketSize;*/
		bboxSize = particles_bbox.high - particles_bbox.low;

		bucketCount.x = (uint)floor(bboxSize.x/bucketSize.x);
		bucketCount.y = (uint)floor(bboxSize.y/bucketSize.y);
		bucketCount.z = (uint)floor(bboxSize.z/bucketSize.z);

		bucketSize.x = bboxSize.x / (bucketCount.x);
		bucketSize.y = bboxSize.y / (bucketCount.y);
		bucketSize.z = bboxSize.z / (bucketCount.z);
	}

	// ensure at least 3 buckets in each dimension,
    // because we search adjacent cells.
	/// WHY IS THIS A PROBLEM?
	// if we have 1 cell, and we use wrapping for the cell-coord, then we might take the wrong cell?
    if (bucketCount.x < NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION)
        bucketCount.x = NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION;
    if (bucketCount.y < NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION)
        bucketCount.y = NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION;
    if (bucketCount.z < NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION)
        bucketCount.z = NVPARTICLES_SPATIAL_GRID_MIN_RESOLUTION;

    // clamp to max resolution...
	bool clamped = false;
    if (bucketCount.x > NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION)
	{
        bucketCount.x = NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION;
		bucketSize.x = bboxSize.x / (bucketCount.x-1);
		clamped = true;
	}
    if (bucketCount.y > NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION)
	{
        bucketCount.y = NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION;
		bucketSize.y = bboxSize.y / (bucketCount.y-1);
		clamped = true;
	}
    if (bucketCount.z > NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION)
	{
        bucketCount.z = NVPARTICLES_SPATIAL_GRID_MAX_RESOLUTION;
		bucketSize.z = bboxSize.z / (bucketCount.z-1);
		clamped = true;
	}

	if (quantize)
	{
		// just in case we clamped the bucketCount, we have to quantize it again!
		particles_bbox.low.x = floor(bbox.low.x/bucketSize.x) * bucketSize.x;
		particles_bbox.low.y = floor(bbox.low.y/bucketSize.y) * bucketSize.y;
		particles_bbox.low.z = floor(bbox.low.z/bucketSize.z) * bucketSize.z;
		particles_bbox.high = ceil(bbox.high/bucketSize)*bucketSize;
	}

    return _ensureCellVolume(bucketCount, nLevels);
}

//------------------------------------------------------------------------------------------
/// Allocate memory for cell volume.
/// We allocate (sx*sy*sz)*(1<<level) for each level(0 - maxSubdivision)
///
bool ParticleGrid::_ensureCellVolume(uint3 bucketCount, int nLevels)
{
    bool rc = true;
    this->bucketCount = bucketCount;

    numCells = 0;
    for(int l=0; l<nLevels; ++l)
        numCells += (bucketCount.x * bucketCount.y * bucketCount.z) * (1<<l);
    numCells++; // for extra cell.

    assert(numCells > 0);

    // only reallocate if we need to.
    if (numCells > numAllocatedCells)
    {
        // reset this so we can use it to check for an allocation error.
        numAllocatedCells = 0;

        if(rc)
            rc &= h_cellsParticleStartMem.Allocate(Cu::Buffer::HOST, numCells * sizeof(uint));
        if(rc)
            rc &= h_cellsParticleEndMem.Allocate(Cu::Buffer::HOST, numCells * sizeof(uint));

        if(rc)
            rc &= d_cellsParticleStartMem.Allocate(Cu::Buffer::CUDA, numCells * sizeof(uint));
        if(rc)
            rc &= d_cellsParticleEndMem.Allocate(Cu::Buffer::CUDA, numCells * sizeof(uint));

        if(rc)
        {
            h_cellsParticleStartMem.Clear(-1);
            d_cellsParticleStartMem.Clear(-1);
            numAllocatedCells = numCells;
        }

        return true;
    }
    return false;
}

//------------------------------------------------------------------------------------------
// if the data has changed on the device, then copy it back to the host
void ParticleGrid::copyGridDataToHost(long stream)
{
    if (!_isGridDataCopiedToHost && numAllocatedCells>0)
    {
        NVPARTICLES_SCOPED_TIMER("ParticleGrid: transfer grid data", stream);

        h_particlesBucketIdMem.Copy(d_particlesBucketIdMem, 0, 0, -1, Cu::Buffer::CopyOptions().SetStream((long)stream));
        h_cellsParticleStartMem.Copy(d_cellsParticleStartMem, 0, 0, -1, Cu::Buffer::CopyOptions().SetStream((long)stream));
        h_cellsParticleEndMem.Copy(d_cellsParticleEndMem, 0, 0, -1, Cu::Buffer::CopyOptions().SetStream((long)stream));
        h_particlesSortedIndexMem.Copy(d_particlesSortedIndexMem, 0, 0, -1, Cu::Buffer::CopyOptions().SetStream((long)stream));

        num_occupied_cells = numCells;

        _isGridDataCopiedToHost = true;
    }
}

//------------------------------------------------------------------------------------------
SpatialGrid::SpatialGridData ParticleGrid::deviceData()
{
    SpatialGrid::SpatialGridData d;
    d.itemCellIndices = (uint*)d_particlesBucketIdMem.Data();
    d.cellFirstItemIndices = (uint*)d_cellsParticleStartMem.Data();
    d.cellLastItemIndices = (uint*)d_cellsParticleEndMem.Data();
    d.sortedItemCellIndices = (uint*)d_particlesSortedIndexMem.Data();
#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)
	d.adjacencyData.adjacencyListMaxItems = adjacencyListMaxItems;
	d.adjacencyData.adjacencyListNumItems = adjacencyListNumItems;
	d.adjacencyData.adjacencyListPitch = adjacencyListPitch;
	d.adjacencyData.adjacencyListPtr = (uint*)d_neighborsBuffer.Data();
	d.adjacencyData.adjacencyListSize = adjacencyListSize;
#endif
    return d;
}

//------------------------------------------------------------------------------------------
SpatialGrid::SpatialGridData ParticleGrid::hostData()
{
    copyGridDataToHost();
    SpatialGrid::SpatialGridData d;
    d.itemCellIndices = (uint*)h_particlesBucketIdMem.Data();
    d.cellFirstItemIndices = (uint*)h_cellsParticleStartMem.Data();
    d.cellLastItemIndices = (uint*)h_cellsParticleEndMem.Data();
    d.sortedItemCellIndices = (uint*)h_particlesSortedIndexMem.Data();
#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)
	d.adjacencyData.adjacencyListMaxItems = adjacencyListMaxItems;
	d.adjacencyData.adjacencyListNumItems = adjacencyListNumItems;
	d.adjacencyData.adjacencyListPitch = adjacencyListPitch;
	d.adjacencyData.adjacencyListPtr = NULL;
#endif
    return d;
}

//------------------------------------------------------------------------------------------
const SpatialGrid::SpatialGridParameters* ParticleGrid::getParameters()
{
    SpatialGrid::SpatialGridParameters d;
    d.bucketCount = bucketCount;
    d.low = particles_bbox.low;
    d.high = particles_bbox.high;
    d.cellSize = bucketSize;
    d.countPerCell.x = float(d.bucketCount.x) / (d.high.x-d.low.x);
    d.countPerCell.y = float(d.bucketCount.y) / (d.high.y-d.low.y);
    d.countPerCell.z = float(d.bucketCount.z) / (d.high.z-d.low.z);
	d.xform = xform;
	d.xformInv = xform.inverseAffine();
	d.boundaryMode = boundaryMode;
#if defined(NVPARTICLES_SPATIAL_GRID_USE_ADJACENCY_LIST)
	d.adjacencyData.adjacencyListMaxItems = adjacencyListMaxItems;
	d.adjacencyData.adjacencyListNumItems = adjacencyListNumItems;
	d.adjacencyData.adjacencyListPitch = adjacencyListPitch;
	d.adjacencyData.adjacencyListPtr = NULL;
#endif
    parameters = d;
    return &parameters;
}

//------------------------------------------------------------------------------------------
void ParticleGrid::render(bool cells, bool bbox, bool bounds)
{
    ParticleGrid *grid = this;

    if (!_isGridDataCopiedToHost)
    {
        //printf("requires CopyGridDataToHost!");
        return;
    }

    uint *h_cells_particle_start = (uint *)h_cellsParticleStartMem.Data();
    uint *h_cells_particle_end = (uint *)h_cellsParticleEndMem.Data();
    uint *h_particlesBucketId = (uint *)h_particlesBucketIdMem.Data();
    uint *h_sortedIndices = (uint *)h_particlesSortedIndexMem.Data();

    // ----------------------------------
    // draw particle bounds
    if (bbox)
    {
        glColor3f(1, 0.2, 0.1);

        gl::drawWireBox((float*)&grid->particles_bbox.low, (float*)&grid->particles_bbox.high);

		glPushMatrix();
		glMultMatrixf((GLfloat*)&xform);
		gl::drawWireCube(1);
		glPopMatrix();
    }

    if (cells)
    {
        glEnable(GL_BLEND);
        glBlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA);

        glColor4f(1,1,0,1);
        //glEnable(GL_LINE_STIPPLE);
        //glLineStipple(1, 0x0101);

        for (uint itemCellIndex=0; itemCellIndex<grid->numCells; ++itemCellIndex)
        {
            int firstItem = h_cells_particle_start[itemCellIndex];
            if (firstItem == -1)
				continue;

			int3 coord = SpatialGrid::CellHashFunctor<true,false>::getCoord(itemCellIndex, bucketCount);
            //float4 low = make_float4(coord.x, coord.y, coord.z, 0) * grid->bucketSize + grid->particles_bbox.low;
			vec4f low = SpatialGrid::PosCellFunctor<vec4f>::getPos(coord, xform, bucketCount);
            vec4f high = low + grid->bucketSize;

            gl::drawWireBox((float*)&low, (float*)&high);
        }

        //glDisable(GL_LINE_STIPPLE);
        glDisable(GL_BLEND);
    }
}

//------------------------------------------------------------------------------------------
void ParticleGrid::dump(int nItems, int step, long stream)
{
    if (1)
    {
        NVPARTICLES_CUDA_SAFE_CALL(cudaStreamSynchronize((cudaStream_t)stream));
#if !defined(NVPARTICLES_SPATIAL_GRID_USE_CUDPP) && !defined(NVPARTICLES_SPATIAL_GRID_USE_THRUST)
        d_particlesBucketIdMem.DumpAs<uint2>("uniformGrid.items[cell,oldIdx]",nItems, step);
#else
        d_particlesBucketIdMem.DumpAs<uint>("uniformGrid.items[cell]", nItems, step);
        d_particlesSortedIndexMem.DumpAs<uint>("uniformGrid.items[oldIdx]", nItems, step);
#endif
        dump();
    }
}

//------------------------------------------------------------------------------------------
void ParticleGrid::dump()
{
    if (!_isGridDataCopiedToHost)
    {
        return;
    }

    uint *h_cells_particle_start = (uint *)h_cellsParticleStartMem.Data();
    uint *h_cells_particle_end = (uint *)h_cellsParticleEndMem.Data();

    fprintf(stderr,"---------------------------------------\n");
    fprintf(stderr,"ParticleGrid Dump:\n\n");

    STDERR(maxParticles);

    fprintf(stderr,"Occupied Cells (total=%d:\n",numCells);
    for (uint i=0; i<numCells; ++i)
    {
        int cell_index = i;
        int p_start = h_cells_particle_start[cell_index];
        if (p_start != -1)
        {
            int p_end = h_cells_particle_end[cell_index];
            int count = p_end - p_start;
            STDERR4(cell_index,p_start,p_end,count);
        }
    }
    fprintf(stderr,"---------------------------------------\n");
}

//------------------------------------------------------------------------------------------
}
}


