/*
 * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */

//-----------------------------------------------------------------------------------
/// Kernel arguments.
///
struct IntegrateKernelParams
{
	uint numParticles;
	ParticleData inData;
	ParticleData outData;
	SpatialGrid::SpatialGridData spatialGridData;

	IntegrateKernelParams()
	{
	}

	IntegrateKernelParams(
		const uint _numParticles,
		const ParticleData _inData,
		const ParticleData _outData,
        const SpatialGrid::SpatialGridData _spatialGridData
        )
		:
			numParticles(_numParticles),
			inData(_inData),
			outData(_outData),
			spatialGridData(_spatialGridData)
	{
	}
};

__device__ __constant__	IntegrateKernelParams d_IntegrateKernelParams;
static IntegrateKernelParams h_IntegrateKernelParams;

#ifdef NVPARTICLES_KERNEL_ARG
#error NVPARTICLES_KERNEL_ARG already defined!
#endif
#ifdef __CUDA_ARCH__
#define NVPARTICLES_KERNEL_ARG(x) d_IntegrateKernelParams.x
#else
#define NVPARTICLES_KERNEL_ARG(x) h_IntegrateKernelParams.x
#endif

//-----------------------------------------------------------------------------------
enum ColorSource
{
    COLOR_SOURCE_NONE=0,
    COLOR_SOURCE_COLOR,
    COLOR_SOURCE_ID,
    COLOR_SOURCE_VELOCITY,
    COLOR_SOURCE_AGE,
    COLOR_SOURCE_FORCE,
    COLOR_SOURCE_PRESSURE,
    COLOR_SOURCE_DENSITY,
	COLOR_SOURCE_TAG,
    NUM_COLOR_SOURCES=9
};

//-----------------------------------------------------------------------------------
inline NVPARTICLES_CUDA_EXPORT
bool wrapParticle(vec3f& P)
{
    bool wrapped = false;
	P = NVPARTICLES_SYSTEM_PARAM(boundaryMatrixInv).multiplyPoint(P);

	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPXPOS(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.x > NVPARTICLES_UNITBOXLEN)
        {
			P.x -= NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }
	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPXNEG(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.x < -NVPARTICLES_UNITBOXLEN)
        {
			P.x += NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }
	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPYPOS(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.y > NVPARTICLES_UNITBOXLEN)
        {
			P.y -= NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }
	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPYNEG(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.y < -NVPARTICLES_UNITBOXLEN)
        {
			P.y += NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }
	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPZPOS(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.z > NVPARTICLES_UNITBOXLEN)
        {
			P.z -= NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }
	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_WRAPZNEG(NVPARTICLES_SYSTEM_PARAM(boundaryMode)))
		while (P.z < -NVPARTICLES_UNITBOXLEN)
        {
			P.z += NVPARTICLES_UNITBOXLEN*2;
            wrapped = true;
        }

	P = NVPARTICLES_SYSTEM_PARAM(boundaryMatrix).multiplyPoint(P);
    return wrapped;
}

//-----------------------------------------------------------------------------------
/// Calculate color from buffers.
///
inline NVPARTICLES_CUDA_EXPORT
float4 makeColor(int coloringGradient, int coloringSource, float colorScale, float4 P4, float4 V4, uint tag, float4 C4, uint id, float birthTime, float pressure, float density, float3 force)
{
    float4 color = make_float4(1);
    switch(coloringSource)
    {
    case COLOR_SOURCE_TAG:
    {
		if(tag & NVPARTICLES_WCSPH_TAG_SURFACE)
			color = NVPARTICLES_PALETTE_COLOR(0);
		else
			color = NVPARTICLES_PALETTE_COLOR(2);
        color.w = 1;
    }
    break;

    case COLOR_SOURCE_ID:
    {
        float colorScalar = float(id) / NVPARTICLES_SYSTEM_PARAM(maxParticles);
        colorScalar *= colorScale;
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = make_float4(colorScalar);
        color.w = 1;
    }
    break;
    case COLOR_SOURCE_AGE:
    {
        float colorScalar = (NVPARTICLES_SYSTEM_PARAM(time)-(birthTime)) / NVPARTICLES_SYSTEM_PARAM(lifespan);
        colorScalar *= colorScale;
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = computeColorGradient(coloringGradient, colorScalar);
    }
    break;
    case COLOR_SOURCE_PRESSURE:
    {
        float colorScalar = (pressure - NVPARTICLES_SYSTEM_PARAM(restPressure));
        colorScalar *= colorScale / (2000.0f);
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = computeColorGradient(coloringGradient, colorScalar);
    }
    break;
    case COLOR_SOURCE_DENSITY:
    {
        float colorScalar = (density / NVPARTICLES_SYSTEM_PARAM(restDensity)) * 0.5;
        colorScalar *= powf(colorScalar,3) * colorScale * 10;
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = computeColorGradient(coloringGradient, colorScalar);
    }
    break;
    case COLOR_SOURCE_VELOCITY:
    {
        float colorScalar = (fabs(V4.x)+fabs(V4.y)+fabs(V4.z)) / 3.0f;
        colorScalar *= colorScale / (100*NVPARTICLES_SYSTEM_PARAM(smoothingLength));
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = computeColorGradient(coloringGradient, colorScalar);
    }
    break;
    case COLOR_SOURCE_FORCE:
    {
        float colorScalar = (force.x+force.y+force.z)/3.0f;
        colorScalar *= colorScale / 80.0f;
        colorScalar = clamp(colorScalar, 0.0f, 1.0f);
        color = computeColorGradient(coloringGradient, colorScalar);
    }
    break;
    case COLOR_SOURCE_COLOR:
    {
        float4 c = C4 * colorScale;
		// remove alpha for testing purposes; black with no alpha is hard to debug!
		c.w = 1.0;
    }
	break;
    }

    return color;
}

//-----------------------------------------------------------------------------------
/// euler-leapfrog method.
///
template<int colSource, int colGrad, bool useXsph>
__global__
void IntegrateKernel()
{
    int index = (blockIdx.x * blockDim.x) + threadIdx.x;
    if (index >= NVPARTICLES_KERNEL_ARG(numParticles))
        return;

    float4 P4 = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), position, index);
    float4 V4 = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), velocity, index);
    uint id = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), id, index);
    float birthTime = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), birthTime, index);
    float4 staticC4 = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), staticColor, id);
    float density = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), density, index);
    float3 P = make_float3(P4.x, P4.y, P4.z);
    float3 V = make_float3(V4.x, V4.y, V4.z);
    float3 internalForce = make_float3(NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), force, index));
    uint tag = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), tag, index);
#ifdef NVPARTICLES_WCSPH_USE_LEAPFROG_EULER
    float3 Veval = make_float3(NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), veleval, index));
#else
    float3 Veval = V;
#endif
#if defined(NVPARTICLES_WCSPH_USE_XSPH)
    float4 meanVelocity = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), xsphVelocity, index);
#endif

    const float dt = NVPARTICLES_SYSTEM_PARAM(deltaTime);
    uint seed = NVPARTICLES_SYSTEM_PARAM(seed)+id;

    float3 accel = internalForce;

    // update accel from global-force accerlations...
    DefaultForceIterator globalAccelerationsIter;
    globalAccelerationsIter.P = P;
    globalAccelerationsIter.V = V;
    iterateForces<DefaultForceIterator>(globalAccelerationsIter, index);
    accel += (globalAccelerationsIter.result);

	// add gravity force.
    accel.y += -NVPARTICLES_SYSTEM_PARAM(gravity);

    DefaultCollisionIterator<1> collisionIt;
    collisionIt.P = make_float4(P.x, P.y, P.z, P4.w);
    collisionIt.V = Veval;
    iteratePrimitives< DefaultCollisionIterator<1> >(collisionIt, index);

    accel += collisionIt.outForce;

#if !defined(NVPARTICLES_WCSPH_USE_WALL_WEIGHT)
	// if either solid or periodic
	if ((NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_TYPE(NVPARTICLES_SYSTEM_PARAM(boundaryMode)) & 3))
	{
		// ensure we do not go through the wall.
		vec3f outContact;
		vec3f outNormal;
		float penetration = computeWallPenetration(
								make_vec3f(P.x,P.y,P.z),
								NVPARTICLES_SYSTEM_PARAM(particleRestDistance)/(2*NVPARTICLES_SYSTEM_PARAM(invScale)),
								outContact, outNormal
								);

		if (penetration >= NVPARTICLES_EPSILON)
		{
			float3 N = vec3fToFloat3(outNormal);
			float3 force = (NVPARTICLES_SYSTEM_PARAM(boundaryStiffness) * (penetration * NVPARTICLES_SYSTEM_PARAM(invScale)) - NVPARTICLES_SYSTEM_PARAM(boundaryDamping) * dot(Veval, N)) * N;
            //float3 force = (NVPARTICLES_SYSTEM_PARAM(boundaryStiffness) * (penetration * NVPARTICLES_SYSTEM_PARAM(invScale))) * N;
			accel += force;
		}
	}
#endif

    // limit acceleration...
    float forceLenPow2 = dot(accel, accel);
    if (forceLenPow2 > NVPARTICLES_SYSTEM_PARAM(cache.velocityLimitPow2) )
        accel *= NVPARTICLES_SYSTEM_PARAM(velocityLimit) / sqrtf(forceLenPow2);

    // integrate velocity...

#if defined(NVPARTICLES_WCSPH_USE_LEAPFROG_EULER)
    // V(t+1/2) = V(t-1/2) + A(t) dt
    // V = v(t-1/2)
    float3 Vnext = V + accel * dt;
    // V(t+1) = [V(t-1/2) + V(t+1/2)] * 0.5
    Veval = (V + Vnext) * 0.5f;
    V = Vnext;
#else // use euler explicit...
    V += accel * dt;
#endif

    // integrate position...

#if defined(NVPARTICLES_WCSPH_USE_XSPH)
    if (useXsph)
    {
		P.x += ((V.x + NVPARTICLES_SYSTEM_PARAM(xsphFactor) * meanVelocity.x) * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
		P.y += ((V.y + NVPARTICLES_SYSTEM_PARAM(xsphFactor) * meanVelocity.y) * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
		P.z += ((V.z + NVPARTICLES_SYSTEM_PARAM(xsphFactor) * meanVelocity.z) * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
    }
    else
#endif
    {
		P.x += (V.x * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
		P.y += (V.y * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
		P.z += (V.z * dt) / NVPARTICLES_SYSTEM_PARAM(invScale);
    }

    float mass = P4.w;

    /// HACK:
    // this hacks the mass to make it a density scaling factor for the sphere shader.
    // this is then stored in P.w for the shader to access.
    if (mass != 0)
    {
        mass = (density - NVPARTICLES_SYSTEM_PARAM(densityThreshold)) / (NVPARTICLES_SYSTEM_PARAM(restDensity) - NVPARTICLES_SYSTEM_PARAM(densityThreshold));
        mass = min(1.f, mass);
        mass = max(0.f, mass);
        mass = 1 / mass;
    }

	int kill = 0;

    if (1)
    {

        // increment the particle's lifetime.
        const float lifeTime = NVPARTICLES_SYSTEM_PARAM(time) - birthTime;
	    const float lifespan = NVPARTICLES_SYSTEM_PARAM(lifespan) + random(seed) * NVPARTICLES_SYSTEM_PARAM(lifespanRandom);

        if (NVPARTICLES_SYSTEM_PARAM(lifespanMode) != 0 && lifeTime > lifespan)
		{
			kill = 1;
		}

		if (isnan(P.x) || isnan(V.x) || mass <= 0.0f || density < NVPARTICLES_SYSTEM_PARAM(densityThreshold))
		{
            kill = 1;
		}
    }

	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_TYPE(NVPARTICLES_SYSTEM_PARAM(boundaryMode)) == 2)
	{
		// wrap the particles...

        vec3f Pw = make_vec3f(P.x, P.y, P.z);
        bool wrapped = wrapParticle(Pw);
		P.x = Pw.x;
		P.y = Pw.y;
		P.z = Pw.z;
        /*
        if (NVPARTICLES_KERNEL_ARG(outData).field)
        {
            float minSurfaceHeight = 0;
            int3 cell = ComputeHeightFieldIterator::posToCell(make_float4(P.x, P.y, P.z ,mass));
            cell.x = clamp(cell.x, 0, (int)NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).x-1);
            cell.z = clamp(cell.z, 0, (int)NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).z-1);
            minSurfaceHeight = NVPARTICLES_KERNEL_ARG(outData).field[cell.x+cell.z*NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).x];

            if (wrapped)
            {
                // check if this prticle is part of a splash, or if it is the base ocean...
                if ((P.y - minSurfaceHeight)*NVPARTICLES_SYSTEM_PARAM(invScale) > NVPARTICLES_SYSTEM_PARAM(smoothingLength))
                    kill = 1;
            }
        }
        */
        if (wrapped)
        {
            if ((tag&NVPARTICLES_WCSPH_TAG_SURFACE))
            {
                // a bit crude, but much simpler.
                // this method kills all surface particles that cross the border.
                // we MUST replenish the amount of particles that die, or the water level will decrease.
                kill = 1;
            }
        }
	}

    if (kill)
    {
        P = make_float3(0);
        mass = 0;
        V = make_float3(0);
        Veval = make_float3(0);
        /*
        if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_TYPE(NVPARTICLES_SYSTEM_PARAM(boundaryMode)) == 2)
	    {
            /// HACK:
            // move it to a not-too-unstable location at a jittered place on the bottom of the boundary!
            vec3f Pb = make_vec3f(random(seed)*2-1, -1, random(seed)*2-1);
            vec3f Pw = NVPARTICLES_SYSTEM_PARAM(boundaryMatrix).multiplyPoint(Pb);
            P = make_float3(Pw.x, Pw.y, Pw.z);
            mass = 3;
        }*/
    }

    if(1)
    {
        float4 outP4 = make_float4(P.x, P.y, P.z, mass);
        float4 outV4 = make_float4(V.x, V.y, V.z, V4.w);

		int outIndex = index;//NVPARTICLES_KERNEL_ARG(spatialGridData).sortedItemCellIndices[index];

        NVPARTICLES_KERNEL_ARG(outData).position[outIndex]	= outP4;
        NVPARTICLES_KERNEL_ARG(outData).velocity[outIndex]	= outV4;
#if defined(NVPARTICLES_WCSPH_USE_LEAPFROG_EULER)
        NVPARTICLES_KERNEL_ARG(outData).veleval[outIndex]	= make_float4(Veval);
#else
        NVPARTICLES_KERNEL_ARG(outData).veleval[outIndex]	= make_float4(V);
#endif
        NVPARTICLES_KERNEL_ARG(outData).birthTime[outIndex]	= birthTime;

        NVPARTICLES_KERNEL_ARG(outData).id[outIndex] = id;

        if (1)
        {
			if (colSource != COLOR_SOURCE_NONE)
			{
	            // update the dynamic color.

                float pressure = 0.0f;
                if (colSource == COLOR_SOURCE_PRESSURE)
                {
#if defined(NVPARTICLES_WCSPH_STORE_PRESSURE)
                    pressure = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), pressure, index);
#else
                    pressure = computePressure(density);
#endif
                }

		        NVPARTICLES_KERNEL_ARG(outData).color[outIndex] = makeColor(colGrad, colSource, NVPARTICLES_SYSTEM_PARAM(colorScale), outP4, outV4, tag, staticC4, id, birthTime, pressure, density, accel);
			}
			else
			{
				NVPARTICLES_KERNEL_ARG(outData).color[outIndex] = NVPARTICLES_SYSTEM_FETCH_NOTEX(NVPARTICLES_KERNEL_ARG(inData), color, index);
			}
        }
    }
}

//------------------------------------------------------------------------------------------
template<int colSource, int colGrad>
void integratePass(bool useXsph, int start, int numParticles, ParticleData inData, ParticleData outData, SpatialGrid::SpatialGridData spatialGridData, const InternalParameters* parameters, cudaStream_t stream=0)
{
    if(numParticles == 0)
        return;

    uint numThreads, numBlocks;
#if defined(NVPARTICLES_HAS_FERMI)
    computeGridSize(numParticles, 128, numBlocks, numThreads);
#else
    computeGridSize(numParticles, 128, numBlocks, numThreads);
#endif

	h_IntegrateKernelParams = IntegrateKernelParams(numParticles, inData, outData, spatialGridData);
	NVPARTICLES_CUDA_SAFE_CALL( cudaMemcpyToSymbolAsync(d_IntegrateKernelParams, &h_IntegrateKernelParams, sizeof(IntegrateKernelParams), 0, cudaMemcpyHostToDevice, stream));

	if (useXsph)
	{
		IntegrateKernel<colSource, colGrad, true> <<<numBlocks, numThreads, 0, stream>>>();
	}
	else
	{
		IntegrateKernel<colSource, colGrad, false> <<<numBlocks, numThreads, 0, stream>>>();
	}
    NVPARTICLES_CUDA_CHECK_ERROR("Kernel execution failed: IntegrateKernel");
}

//-----------------------------------------------------------------------------------
/// wrap the particles method.
///
__global__
void computeWrapKernel(const int nParticles,
                                ParticleData data
                               )
{
    const int index = (blockIdx.x * blockDim.x) + threadIdx.x;
    if (index >= nParticles)
        return;

	const int outIndex = index;

	float4 position = NVPARTICLES_SYSTEM_FETCH_NOTEX(data, position, index);
    uint tag = NVPARTICLES_SYSTEM_FETCH_NOTEX(data, tag, index);
    float4 velocity = NVPARTICLES_SYSTEM_FETCH_NOTEX(data, velocity, index);
    float4 veleval = NVPARTICLES_SYSTEM_FETCH_NOTEX(data, veleval, index);
    //uint id = NVPARTICLES_SYSTEM_FETCH_NOTEX(data, id, index);

    if (position.w == 3)
    {
        // This is a new particle...
        position.w = 1;

        // Ensure it did not start inside a collision primitive.
        CollisionTestIterator collisionIt;
        collisionIt.P = position;
        iteratePrimitives< CollisionTestIterator >(collisionIt, index);
        if (collisionIt.hits)
        {
            position = make_float4(0);
        	data.position[outIndex] = position;
            return;
        }
    }

    int kill = 0;

	if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_TYPE(NVPARTICLES_SYSTEM_PARAM(boundaryMode)) == 2)
	{
        vec3f Pw = make_vec3f(position.x, position.y, position.z);
        bool wrapped = wrapParticle(Pw);
        position.x = Pw.x;
		position.y = Pw.y;
		position.z = Pw.z;

        /*
        if (data.field)
        {
            float minSurfaceHeight = 0;
            int3 cell = ComputeHeightFieldIterator::posToCell(make_float4(position.x, position.y, position.z, position.w));
            cell.x = clamp(cell.x, 0, (int)NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).x-1);
            cell.z = clamp(cell.z, 0, (int)NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).z-1);
            minSurfaceHeight = data.field[cell.x+cell.z*NVPARTICLES_SPATIAL_GRID_PARAM(bucketCount).x];

            if (wrapped)
            {
                // check if this particle is part of a splash, or if it is the base ocean...
                if ((Pw.y - minSurfaceHeight)*NVPARTICLES_SYSTEM_PARAM(invScale) > NVPARTICLES_SYSTEM_PARAM(smoothingLength))
                {
                    kill = 1;
                }
            }
        }*/

        if (wrapped)
        {
            if ((tag&NVPARTICLES_WCSPH_TAG_SURFACE))
            {
                kill = 1;
            }
        }
    }

    if (kill)
    {
        position = make_float4(0);
        velocity = make_float4(0);
        veleval = make_float4(0);
        /*
        if (NVPARTICLES_SPATIAL_GRID_BOUNDARYMODE_TYPE(NVPARTICLES_SYSTEM_PARAM(boundaryMode)) == 2)
	    {
            /// HACK:
            // move it to a not-too-unstable location at a jittered place on the bottom of the boundary!
            uint seed = NVPARTICLES_SYSTEM_PARAM(seed) + id;
            vec3f Pb = make_vec3f(random(seed)*2-1, -1, random(seed)*2-1);
            vec3f Pw = NVPARTICLES_SYSTEM_PARAM(boundaryMatrix).multiplyPoint(Pb);
            position = make_float4(Pw.x, Pw.y, Pw.z, 3);
        }*/
    }

	data.position[outIndex] = position;
	data.velocity[outIndex] = velocity;
	data.veleval[outIndex] = veleval;
}

//------------------------------------------------------------------------------------------
void computeWrap(int nParticles, ParticleData data, cudaStream_t stream=0)
{
    if(nParticles == 0)
        return;

    uint nThreads, nBlocks;
#if defined(NVPARTICLES_HAS_FERMI)
    computeGridSize(nParticles, 256, nBlocks, nThreads);
#else
    computeGridSize(nParticles, 128, nBlocks, nThreads);
#endif

	computeWrapKernel <<<nBlocks, nThreads, 0, stream>>>(
		nParticles,
		data
	);
    NVPARTICLES_CUDA_CHECK_ERROR("Kernel execution failed: computeWrapKernel");
}

//------------------------------------------------------------------------------------------
#undef NVPARTICLES_KERNEL_ARG
