/* ---------------------------------------------------------------------------
 * This software is in the public domain, furnished "as is", without technical
 * support, and with no warranty, express or implied, as to its usefulness for
 * any purpose.

 * Author: Wil Braithwaite.
 *
 */

#include "gl_utils.h"
#include "CuBuffer.h"
#include "cuda_utils.h"
#include <sstream>
#include <cuda_gl_interop.h>

namespace Easy
{
namespace Cu
{

const int Buffer::MAX_PAGES = 3;
int Buffer::debugging = 0;

///------------------------------------------------------------------------------------------
/// options
///------------------------------------------------------------------------------------------
const Buffer::CopyOptions Buffer::DefaultCopyOptions;
const Buffer::MapOptions Buffer::DefaultMapOptions;
const Buffer::ClearOptions Buffer::DefaultClearOptions;

//------------------------------------------------------------------------------------------
Buffer::ClearOptions::ClearOptions()
{
    SetStream(-1);
}
//------------------------------------------------------------------------------------------
Buffer::ClearOptions& Buffer::ClearOptions::SetStream(long v)
{
    cuStream = v;
    return *this;
}
//------------------------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& s, const Buffer::ClearOptions& v)
{
    s << "stream:" << v.cuStream << " ";
    return s;
}
//------------------------------------------------------------------------------------------
Buffer::CopyOptions::CopyOptions()
{
    SetStream(-1);
}
//------------------------------------------------------------------------------------------
Buffer::CopyOptions& Buffer::CopyOptions::SetStream(long v)
{
    cuStream = v;
    return *this;
}
//------------------------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& s, const Buffer::CopyOptions& v)
{
    s << "stream:" << v.cuStream << " ";
    return s;
}
//------------------------------------------------------------------------------------------
Buffer::MapOptions::MapOptions()
{
    SetStream(-1);
    SetAccess(MAP_ACCESS_READ_WRITE);
}
//------------------------------------------------------------------------------------------
Buffer::MapOptions& Buffer::MapOptions::SetAccess(int v)
{
    access = v;
    return *this;
}
//------------------------------------------------------------------------------------------
Buffer::MapOptions& Buffer::MapOptions::SetStream(long v)
{
    cuStream = v;
    return *this;
}
//------------------------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& s, const Buffer::MapOptions& v)
{
    s << "stream:" << v.cuStream << " ";
    s << "access:" << v.access << " ";
    return s;
}
///------------------------------------------------------------------------------------------
///
///------------------------------------------------------------------------------------------

const char* Buffer::typeNames[] = {"NONE", "HOST","VBO","CUDA"};
//------------------------------------------------------------------------------------------
void Buffer::Init()
{
    data.mapped_from = 0; // are we mapped from other memory
    data.pinned = false;
    data.size = 0;
    data.buffer = 0;
    data.vboId = 0;
    data.vboTarget = 0;
	data.vboUsage = 0;
    data.graphicsResource = 0;
    owner = false;
    data.isExternal = false;
    memset(name, 0, 64);
}
//------------------------------------------------------------------------------------------
Buffer::Buffer(const Buffer &s)
    : owner(s.owner), data(s.release())
{
}
//------------------------------------------------------------------------------------------
Buffer::BufferData Buffer::release() const
{
    //owner = false;
    return data;
}
//------------------------------------------------------------------------------------------
bool Buffer::BufferData::operator==(const Buffer::BufferData& s) const
{
    if (size != s.size || buffer != s.buffer || vboId != s.vboId)
        return false;
    return true;
}
//------------------------------------------------------------------------------------------
Buffer& Buffer::operator=(const Buffer &s)
{
    if (&s != this)
    {
        if (!(data == s.data))
        {
            Free();
            //owner = s.owner;
        }
        else if (s.owner)
        {
            //owner = true;
        }
        data = s.release();
    }
    return *this;
}
//------------------------------------------------------------------------------------------
Buffer::Buffer(MemType _type, size_t _size, void* _buffer, bool _owner)
{
    Init();
    data.type = _type;

    if (_buffer)
    {
        assert(_size > 0);
        data.buffer = _buffer;
        data.size = _size;
        owner = _owner;
    }
    else if (_size > 0)
        Allocate(data.type, _size);
}
//------------------------------------------------------------------------------------------
Buffer::~Buffer()
{
    Unmap();
    Free();
}
//------------------------------------------------------------------------------------------
unsigned int Buffer::Vbo() const
{
    if(data.type == VBO)
        return data.vboId;
    return 0;
}
//------------------------------------------------------------------------------------------
void* Buffer::Data() const
{
    if(data.type != VBO)
        return data.buffer;
    return 0;
}
//------------------------------------------------------------------------------------------
bool Buffer::Clear(int v, const ClearOptions& options)
{
    if (data.size == 0)
        return false;

    switch (data.type)
    {
    case HOST:
        memset(data.buffer,v,data.size);
        break;
    case CUDA:
        if(options.cuStream != -1)
            NVPARTICLES_CUDA_SAFE_CALL(::cudaMemsetAsync(data.buffer, v, data.size, (cudaStream_t)options.cuStream));
        else
            NVPARTICLES_CUDA_SAFE_CALL(::cudaMemset(data.buffer, v, data.size));
        break;
    case VBO:
    {
        Buffer d_buf(CUDA);
        CreateMapFromVBOTo(d_buf, MapOptions().SetStream(options.cuStream));
        d_buf.Clear(v, options);
        DeleteMapFromVBOTo(d_buf);
    }
    break;
    default:
        return false;
        break;
    }
    return true;
}
//------------------------------------------------------------------------------------------
bool Buffer::Dump(const char *title, size_t maxCount, size_t step) const
{
    if (title)
        fprintf(stderr,"%s\n",title);

    if(maxCount == size_t(-1))
        maxCount = Size();

    if (data.type == HOST)
    {
        std::cerr << *this << std::endl;
        //printf("Buffer( %s, %d, %p, %s):\n", typeNames[data.type], data.size, data.buffer, ((owner)?"owner":"ref"));

        //fprintf(stderr,"[%d] %s\n",data.size, ((owner)?"owner":"ref"));

        int cursorX = 0;
        int cursorY = 0;
        size_t i;
        int lastLineCount;
        for (i=0; i<maxCount; i+=step)
        {
            if (i!=0 && (i&31) == 0)
            {
                // print in ascii mode
                lastLineCount = cursorX;
                for (; cursorX < 32; ++cursorX)
                    fprintf(stderr,"   ");
                fprintf(stderr, "| ");
                for (cursorX = 0; cursorX < lastLineCount; ++cursorX)
                {
                    char c = ((char *)data.buffer)[cursorY*32+cursorX];//(i-1)-lastLineCount+cursorX]
                    if(c < 48)
                        c = '.';
                    fprintf(stderr,"%c",c);
                }

                fprintf(stderr,"\n");
                cursorX = 0;
                cursorY++;
            }
            fprintf(stderr,"%02x ",((unsigned char *)data.buffer)[i]);
            cursorX++;
        }
        lastLineCount = cursorX;
        //CT(lastLineCount);

        for (; cursorX < 32; ++cursorX)
            fprintf(stderr,"   ");
        fprintf(stderr, "| ");
        for (cursorX = 0; cursorX < lastLineCount; ++cursorX)
        {
            char c = ((char *)data.buffer)[cursorY*32+cursorX];//(i-1)-lastLineCount+cursorX]
            if(c < 48)
                c = '.';
            fprintf(stderr,"%c",c);
        }
        fprintf(stderr,"\n");
        return true;
    }
    else
    {
        //MemoryMapper<char> h_buf(HOST, *this);
        Buffer h_buf(HOST, size_t(maxCount));
        h_buf.Copy(*this);
        return h_buf.Dump(title, size_t(maxCount));
    }
    return false;
}

//------------------------------------------------------------------------------------------
bool Buffer::Allocate(MemType t, size_t bytes, int flags, const char* _name)
{
    bool rc= false;
    switch (t)
    {
    case HOST:
        rc = AllocateHost(bytes, flags);
        break;
    case CUDA:
        rc = AllocateCUDA(bytes);
        break;
    case VBO:
        rc = AllocateVBO(bytes, GL_ARRAY_BUFFER, 0, flags, name);
        break;
    default:
        return false;
    }

    if(_name)
        strncpy(name, _name, 64);

    if(rc)
    {
        if(debugging)
            std::cerr << "Buffer -- Allocated: " << *this << std::endl;
    }
    return rc;
}

//------------------------------------------------------------------------------------------
bool Buffer::Free()
{
    if (data.mapped_from)
        return false;

    if (!owner)
    {
        Init();
        return true;
    }

    if(debugging)
        std::cerr << "Buffer --   Freeing: " << *this << std::endl;

    switch (data.type)
    {
    case HOST:
        FreeHost();
        break;
    case CUDA:
        FreeDevice();
        break;
    case VBO:
        FreeVBO();
        break;
    default:
        return false;
    }

    Init();
    return true;
}

//------------------------------------------------------------------------------------------
size_t Buffer::Copy(const Buffer &source, size_t d_offset, size_t s_offset, size_t count, const CopyOptions& options)
{
    //assert(data.size);

    if (count == (size_t)-1)
        count = std::min(data.size, source.data.size);

    if(count == 0)
        return 0;

    if(source.data == data)
        return 0; // no need to copy!

    if ((d_offset+count) > data.size)
    {
        std::cerr << *this << (Stringf(" Outside of Dest Buffer range (size: %d, n: %d, d_offset: %d)", data.size, count, d_offset)) << std::endl;
        ///??? if(count < count-d_offset)
        count = count-d_offset;
    }

    if ((s_offset+count) > source.data.size)
    {
        std::cerr << *this << (Stringf(" Outside of Source Buffer range (size: %d, n: %d, s_offset: %d)", source.data.size, count, s_offset)) << std::endl;
        ///??? if(count < count-s_offset)
        count = count-s_offset;
    }

    if(debugging)
        printf("Buffer --   Copying: %s to %s (count=%d)\n", source.ShortName().c_str(), ShortName().c_str(), int(count));

    switch (data.type)
    {
    case HOST:
        if(!CopyToHostFrom(source,d_offset,s_offset,count,options))
            return -1;
        break;
    case CUDA:
        if(!CopyToDeviceFrom(source,d_offset,s_offset,count,options))
            return -1;
        break;
    case VBO:
        if(!CopyToVBOFrom(source,d_offset,s_offset,count,options))
            return -1;
        break;
    default:
        return -1;
    }
    return count;
}

//------------------------------------------------------------------------------------------
std::string Buffer::ShortName() const
{
    std::stringstream ss(std::stringstream::out);
    if(name[0])
        ss << name << ": ";
    ss << typeNames[data.type] << '[';
    if(Vbo())
        ss << data.vboId;
    else
        ss << data.buffer;
    ss << "]";
    return ss.str();
}

//------------------------------------------------------------------------------------------
bool Buffer::CopyToHostFrom(const Buffer &source, size_t d_offset, size_t s_offset, size_t count, const CopyOptions& options)
{
    if(source.data.buffer == data.buffer)
        return false;

    if(debugging > 1)
    {
        std::cerr << "CopyToHostFrom: To: " << *this << std::endl;
        std::cerr << "From: " << source << std::endl;
        std::cerr << d_offset << ", " << s_offset << ", " << count << ", " << options << std::endl;
    }

    // what are we copying from?
    switch (source.data.type)
    {
    case HOST:
        memcpy((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count);
        return true;

    case CUDA:
        if(options.cuStream != -1)
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpyAsync((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyDeviceToHost, (cudaStream_t)options.cuStream));
        else
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpy((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyDeviceToHost));
        return true;

    case VBO:
    {
        glBindBuffer(source.data.vboTarget, source.data.vboId);
        void *ptr = glMapBuffer(source.data.vboTarget, GL_READ_ONLY);
        if (ptr)
        {
            memcpy((char *)data.buffer+d_offset, (char *)ptr+s_offset, count);
            glUnmapBuffer(source.data.vboTarget);
        }
        return true;
    }
    default:
        return false;
    }
    return false;
}

//------------------------------------------------------------------------------------------
bool Buffer::CopyToDeviceFrom(const Buffer &source, size_t d_offset, size_t s_offset, size_t count, const CopyOptions& options)
{
    if(source.data.buffer == data.buffer)
        return false;

    // what are we copying from?
    switch (source.data.type)
    {
    case CUDA:
        if(options.cuStream != -1)
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpyAsync((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyDeviceToDevice, (cudaStream_t)options.cuStream));
        else
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpy((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyDeviceToDevice));
        return true;

    case HOST:
        if(options.cuStream != -1)
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpyAsync((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyHostToDevice, (cudaStream_t)options.cuStream));
        else
            NVPARTICLES_CUDA_SAFE_CALL(cudaMemcpy((char *)data.buffer+d_offset, (char *)source.data.buffer+s_offset, count, cudaMemcpyHostToDevice));
        return true;

    case VBO:
        {
            Cu::Buffer mapBuf(CUDA);
            mapBuf.Map(source);
            CopyToDeviceFrom(mapBuf, d_offset, s_offset, count, options);
        }
        break;

    default:
        return false;
    }
    return false;
}

//------------------------------------------------------------------------------------------
bool Buffer::AllocateHost(size_t s, int flags)
{
    // don't reallocate if we don't need to
    //if (owner && size == s)
    //	return;

    assert(!data.mapped_from);
    assert(s > 0);
    Free();

    if (flags&HOST_PINNED)
    {
        int device;
        cudaDeviceProp prop;
        cudaGetDevice(&device);
        cudaGetDeviceProperties(&prop, device);
        assert(prop.canMapHostMemory);

        int cuflags = 0;
        if (flags&HOST_WRITE_COMBINED)
            cuflags |= cudaHostAllocWriteCombined;
        cuflags |= cudaHostAllocPortable;

        if (flags&HOST_MAPPABLE)
        {
            //NVPARTICLES_CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceMapHost));
            cuflags |= cudaHostAllocMapped;
        }

        if (flags&HOST_PORTABLE)
        {
            cuflags |= cudaHostAllocPortable;
        }

        NVPARTICLES_CUDA_SAFE_CALL(cudaHostAlloc(&data.buffer, s, cuflags));
        if (data.buffer)
        {
            data.type = HOST;
            data.pinned = true;
            data.size = s;
            owner = true;
            //STDERR5(type, buffer, size, pinned, flags);
            return true;
        }
    }
    else
    {
#if defined(_WIN32)
        data.buffer = _aligned_malloc(s, 4096);
#else
        assert(posix_memalign(&data.buffer, 4096, s) == 0);
#endif
        if (data.buffer)
        {
            data.type = HOST;
            data.size = s;
            owner = true;
            return true;
        }
    }

    return false;
}
//------------------------------------------------------------------------------------------
void Buffer::FreeHost()
{
    if (data.buffer)
    {
        if (data.pinned)
        {
            NVPARTICLES_CUDA_SAFE_CALL(cudaFreeHost(data.buffer));
        }
        else
        {
#if defined(_WIN32)
            _aligned_free(data.buffer);
#else
            free(data.buffer);
#endif
        }
    }
}
//------------------------------------------------------------------------------------------
bool Buffer::AllocateCUDA(size_t s)
{
    assert(!data.mapped_from);
    assert(data.type == CUDA || data.type == NONE);
    assert(s > 0);
    if (data.size == s)
        return true;
    Free();
    NVPARTICLES_CUDA_SAFE_CALL(cudaMalloc(&data.buffer, s));
    if (data.buffer)
    {
        data.type = CUDA;
        data.size = s;
        owner = true;
        return true;
    }
    return false;
}
//------------------------------------------------------------------------------------------
void Buffer::FreeDevice()
{
    if (data.buffer)
    {
        if(!NVPARTICLES_CUDA_SAFE_CALL(cudaFree(data.buffer)))
            STDERR(*this);
        data.buffer = 0;
    }
}
//------------------------------------------------------------------------------------------
bool Buffer::CreateMapFromDeviceTo(Buffer &d, const MapOptions& /*options*/) const
{
    if (d.data.type == CUDA)
    {
        d = *this;
        d.data.mapped_from = (Buffer*)this;
        // restore ownership!
        // d.owner = owner;
        // owner = false;
        return true;
    }
    else if(d.data.type == VBO)
    {
        //if(debugging)
            printf("Buffer --   ERROR -- Unable to map from CUDA memory to VBO. Try other way around!\n");
        return false;
    }
    else if(d.data.type == HOST)
    {
        if(debugging)
            printf("Buffer -- WARNING -- Unable to map from CUDA memory to HOST. Copying instead!\n");
        d.AllocateHost(Size(), 0);
        d.CopyToHostFrom(*this,0,0,Size(),DefaultCopyOptions);
        d.data.mapped_from = (Buffer*)this;
        return true;
    }
    return false;
}

//------------------------------------------------------------------------------------------
bool Buffer::CreateMapFromHostTo(Buffer &d, const MapOptions& /*options*/) const
{
    if (d.data.type == CUDA)
    {
        unsigned int flags = 0;
        cudaError_t err = cudaHostGetDevicePointer(&d.data.buffer, data.buffer, flags);
        if (err != cudaSuccess)
        {
            printf("Buffer -- WARNING -- Unable to map memory. Pinning it and retrying...\n");
			err = cudaHostRegister(data.buffer, data.size, cudaHostRegisterPortable );
			if (err != cudaSuccess)
			{
	            printf("Buffer --   ERROR -- Unable to pin memory!\n");
	            return false;
			}
			else
			{
				err = cudaHostGetDevicePointer(&d.data.buffer, data.buffer, flags);
				if (err != cudaSuccess)
				{
					printf("Buffer --   ERROR -- Unable to map memory!\n");
		            return false;
				}
			}
        }

        d.data.mapped_from = (Buffer*)this;
        d.data.size = data.size;
        d.owner = true;
        //if(debugging)
        //    printf("Buffer --   Mapping HOST[%p]  to  CUDA[%d]\n", d.data.buffer, d.data.size);

        return true;
    }
    else
    {
        d = *this;
        return true;
    }
    return false;
}

//------------------------------------------------------------------------------------------
void Buffer::DeleteMapFromHostTo(Buffer &d)
{
    //const Buffer* mapSrc = d.data.mapped_from;
    if (d.data.type == CUDA)
    {
        // nothing to do.
    }
    d.Init();
}

//------------------------------------------------------------------------------------------
void Buffer::DeleteMapFromCudaTo(Buffer &d)
{
    Buffer* mapSrc = d.data.mapped_from;
    if (d.data.type == HOST)
    {
        if(d.data.mapOptions.access != MAP_ACCESS_READ_ONLY)
        {
            /// TO-DO: only if data written to it!
            mapSrc->Copy(d);
        }
        d.FreeHost();
    }
    d.Init();
}

//------------------------------------------------------------------------------------------
Buffer &Buffer::Map(const Buffer &s, const MapOptions& options)
{
    // assert we are not mapped from somewhere
    assert(!data.mapped_from);
    // assert we are unallocated
    assert(data.size == 0);
    // assert the source is not mapped to somewhere else
    ///assert(!s.data.mapped_to);

    //Buffer *this_ptr = this;
    bool rc = false;
    switch (s.data.type)
    {
    case HOST:
        rc = s.CreateMapFromHostTo(*this,options);
        break;
    case CUDA:
        rc = s.CreateMapFromDeviceTo(*this,options);
        break;
    case VBO:
        rc = s.CreateMapFromVBOTo(*this,options);
        break;
    default:
        break;
    }

    if(debugging)
    {
        if(rc)
            printf("Buffer --   Mapping: %s to %s\n", s.ShortName().c_str(), ShortName().c_str());
        else
            printf("Buffer --   Failed to map: %s to %s\n", s.ShortName().c_str(), ShortName().c_str());
    }

    return (*this);
}

//------------------------------------------------------------------------------------------
bool Buffer::Unmap()
{
    if (!data.mapped_from)
        return false;

    //STDERR(mapped_from);

    if(debugging)
        std::cerr << "Buffer -- Unmapping: " << data.mapped_from->ShortName() << std::endl;

    if (owner)
    {
        switch (data.mapped_from->data.type)
        {
        case HOST:
            DeleteMapFromHostTo(*this);
            break;
        case CUDA:
            DeleteMapFromCudaTo(*this);
            break;
        case VBO:
            DeleteMapFromVBOTo(*this);
            break;

    default:
        return false;
        }
    }

    data.buffer = 0;
    data.mapped_from = 0;
    data.size = 0;
    owner = false;
    return true;
}

//------------------------------------------------------------------------------------------
std::ostream& operator<<(std::ostream& s, const Buffer& v)
{
    if(v.Name()[0])
        s << v.Name() << ": ";

    s << Buffer::typeNames[v.data.type] << "[";
    if(v.Vbo())
    {
        s << v.data.vboId;
    }
    else
    {
        s << v.data.buffer;

        if(v.data.pinned)
            s << ", pinned";
    }
    s << "] ";

    s << "x" << v.data.size;

    s << " (";
    s << ((v.owner)?"owned":"");
    s << ((v.data.mapped_from)?",mapped":"");
    s << ")";

    return s;
}

//------------------------------------------------------------------------------------------
}
}
