/* ---------------------------------------------------------------------------
 * This software is in the public domain, furnished "as is", without technical
 * support, and with no warranty, express or implied, as to its usefulness for
 * any purpose.

 * Author: Wil Braithwaite.
 *
 */

#include "CuBuffer.h"
#include "gl_utils.h"
#include "cuda_utils.h"
#include <cuda_gl_interop.h>


#define USE_CUDA3 1

namespace Easy
{
namespace Cu
{
//------------------------------------------------------------------------------------------
bool Buffer::AllocateVBO(size_t s, unsigned int target, unsigned int vboId, int flags, const char* _name)
{
    // don't reallocate if we don't need to
    //if (owner && size == s)
    //	return;

    assert(!data.mapped_from);
    if (data.size == s)
        return true;
    Free();

    assert(target);
    data.vboTarget = target;

    if(!vboId)
    {
        data.isExternal = false;
        glGenBuffers(1, &data.vboId);

        glBindBuffer(data.vboTarget, data.vboId);
        glBufferData(data.vboTarget, s, 0, GL_DYNAMIC_DRAW);
        glBindBuffer(data.vboTarget, 0);
    }
    else
    {
        data.isExternal = true;
        // we are using a pre-created vbo (this must be in a shareable context!)
        data.vboId = vboId;
    }

#if USE_CUDA3 // > cuda3.0
    //unsigned int flags = cudaGraphicsRegisterFlagsNone;//cudaGraphicsMapFlagsWriteDiscard;
    cudaGraphicsResource* graphicsResource = (cudaGraphicsResource*)data.graphicsResource;
    if(! NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsGLRegisterBuffer(&graphicsResource, data.vboId, flags)) )
    {
        printf("Buffer -- Failed to register BufferObject[%d] (%ul) with CUDA\n", data.vboId, (int)s);
        if(!data.isExternal)
            glDeleteBuffers(1, &data.vboId);
        data.vboId = 0;
        return false;
    }

    /// because there is a bug that means this flag doesn't come through!
    int mapFlags = cudaGraphicsMapFlagsNone;
    if (flags & cudaGraphicsRegisterFlagsWriteDiscard)
        mapFlags |= cudaGraphicsMapFlagsWriteDiscard;
    if (flags & cudaGraphicsRegisterFlagsReadOnly)
        mapFlags |= cudaGraphicsMapFlagsReadOnly;

    NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsResourceSetMapFlags(graphicsResource, mapFlags));

    data.graphicsResource = (void*)graphicsResource;

#else
    if(! NVPARTICLES_CUDA_SAFE_CALL(cudaGLRegisterBufferObject(data.vboId)))
    {
        printf("Buffer -- Failed to register BufferObject[%d] (%ul) with CUDA\n", data.vboId, (int)s);
        if(!data.isExternal)
            glDeleteBuffers(1, &data.vboId);
        data.vboId = 0;
        return false;
    }
    //NVPARTICLES_CUDA_SAFE_CALL(cudaGLSetBufferObjectMapFlags(data.vboId, cudaGLMapFlagsWriteDiscard));    // CUDA writes, GL consumes
#endif
    data.type = VBO;
    data.size = s;
    owner = true;

    if(_name)
        strncpy(name, _name, 64);

    return true;
}
//------------------------------------------------------------------------------------------
void Buffer::FreeVBO()
{
    if (data.vboId)
    {
#if USE_CUDA3 // > cuda3.0
        NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsUnregisterResource((cudaGraphicsResource*)data.graphicsResource));
#else
        NVPARTICLES_CUDA_SAFE_CALL(cudaGLUnregisterBufferObject(data.vboId));
#endif
        if(!data.isExternal)
            glDeleteBuffers(1, &data.vboId);

        assert(glCheckErrors());
        data.vboId = 0;
    }
}
//------------------------------------------------------------------------------------------
bool Buffer::CreateMapFromVBOTo(Buffer &d, const MapOptions& options) const
{
    if (d.data.type == HOST)
    {
        glBindBufferARB(data.vboTarget, data.vboId);

        int glAccess = GL_READ_WRITE;
        if(options.access==MAP_ACCESS_READ_ONLY)
            glAccess = GL_READ_ONLY;
        else if(options.access==MAP_ACCESS_WRITE_ONLY)
            glAccess = GL_WRITE_ONLY;

        d.data.buffer = glMapBuffer(data.vboTarget, glAccess);
        glBindBufferARB(data.vboTarget, 0);
        assert(glCheckErrors());

        if (d.data.buffer)
        {
            ///data.mapped_to = &d;
            d.data.mapped_from = (Buffer*)this;
            d.data.size = data.size;
            d.owner = true;
            //if(debugging)
            //    printf("Buffer --   Mapping VBO[%d]  to  HOST[%p]\n", data.vboId, d.data.buffer);

            d.data.mapOptions = options;
            return true;
        }
    }
    else if (d.data.type == CUDA)
    {
        cudaStream_t stream = (cudaStream_t)options.cuStream;
        if(options.cuStream == -1)
            stream = (cudaStream_t)0;

#if USE_CUDA3 // > cuda3.0
        cudaGraphicsResource* graphicsResource = (cudaGraphicsResource*)(data.graphicsResource);
        NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsMapResources(1, &graphicsResource, stream));
        data.graphicsResource = (void*)graphicsResource;
        NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsResourceGetMappedPointer((void**)&d.data.buffer, (size_t*)&d.data.size, graphicsResource));
#else
        NVPARTICLES_CUDA_SAFE_CALL(cudaGLMapBufferObjectAsync(&d.data.buffer, data.vboId, stream));
#endif

        if(options.cuStream == -1)
            cudaStreamSynchronize(stream);

        if (d.data.buffer)
        {
            ///data.mapped_to = &d;
            d.data.mapped_from = (Buffer*)this;
            d.data.size = data.size;
            d.owner = true;
            //if(debugging)
            //    printf("Buffer --   Mapping VBO[%d]  to  CUDA[%p]\n", data.vboId, d.data.buffer);

            d.data.mapOptions = options;
            return true;
        }
    }
    return false;
}
//------------------------------------------------------------------------------------------
void Buffer::DeleteMapFromVBOTo(Buffer &d)
{
    const Buffer* mapSrc = d.data.mapped_from;

    //if(debugging)
    //    printf("Buffer -- Unmapping VBO[%d] from %s[%p]\n", mapSrc->data.vboId, typeNames[d.data.type], d.data.buffer);

    if (d.data.type == HOST)
    {
        glBindBufferARB(mapSrc->data.vboTarget, mapSrc->data.vboId);
        if(!glCheckErrors())
        {
            std::cout << "Cu::Buffer - ERROR - Failed to bind vbo[" << mapSrc->data.vboId << "] to " << mapSrc->data.vboTarget << std::endl;
            abort();
        }
        glUnmapBuffer(mapSrc->data.vboTarget);
        glBindBufferARB(mapSrc->data.vboTarget, 0);
        assert(glCheckErrors());
    }
    else if (d.data.type == CUDA)
    {
        cudaStream_t stream = (cudaStream_t)d.data.mapOptions.cuStream;
        if(d.data.mapOptions.cuStream == -1)
            stream = (cudaStream_t)0;

#if USE_CUDA3 // > cuda3.0
        cudaGraphicsResource* graphicsResource = (cudaGraphicsResource*)(mapSrc->data.graphicsResource);
        assert(NVPARTICLES_CUDA_SAFE_CALL(cudaGraphicsUnmapResources(1, &graphicsResource, stream)));
        mapSrc->data.graphicsResource = (void*)graphicsResource;
#else
        NVPARTICLES_CUDA_SAFE_CALL(cudaGLUnmapBufferObjectAsync(mapSrc->data.vboId, stream));
#endif

        if(d.data.mapOptions.cuStream == -1)
            cudaStreamSynchronize(stream);
    }
    ///data.mapped_to = NULL;

    d.data.buffer = 0;
    d.data.mapped_from = 0;
    d.data.size = 0;
    d.owner = false;
}
//------------------------------------------------------------------------------------------
bool Buffer::CopyToVBOFrom(const Buffer &source, size_t d_offset, size_t s_offset, size_t count, const CopyOptions& options)
{
    // what are we copying from?
    switch (source.data.type)
    {
    case HOST:
    {
        glBindBuffer(data.vboTarget, data.vboId);

        if(count == Size())
        {
            // we are replacing the data so
            glBufferDataARB(data.vboTarget, count, NULL, data.vboUsage);
        }
        void *ptr = glMapBuffer(data.vboTarget, GL_WRITE_ONLY);
        if (ptr)
        {
            memcpy((char *)ptr+d_offset, (char *)source.data.buffer+s_offset, count);
            glUnmapBuffer(data.vboTarget);
            return true;
        }
        return false;
    }
    case CUDA:
    {
        // copy it to the host first, then to the VBO
        Buffer hmem;
        hmem.AllocateHost(count);
        if (hmem.CopyToHostFrom(source, 0, s_offset, count,options))
            return CopyToVBOFrom(hmem, d_offset, 0, count,options);
    }
    default:
        return false;
    }
    return false;
}
//------------------------------------------------------------------------------------------
}
}
