#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pcixmem.h>
#include "memmap.h"
#include "xhibutil.h"

#ifdef THREAD_SAFE_XHIBLIB
#include  <pthread.h>
pthread_mutex_t xhibmutex = PTHREAD_MUTEX_INITIALIZER;
#endif /* THREAD_SAFE_XHIBLIB */
static int mutexlock(void);
static int mutexunlock(void);

/* XHIB local function(s) */
static void testrun(int devid);

/* XHIB local variable(s) */
static UINT64 *dmarbufp[NPCIXMEM]; /* DMA read buffer */
static unsigned long dmarbufpa[NPCIXMEM]; /* its physical addres */
static UINT64 *dmawbufp[NPCIXMEM]; /* DMA write buffer */
static unsigned long dmawbufpa[NPCIXMEM]; /* its physical addres */

static UINT32 *xhib[NPCIXMEM];
static UINT64 *dblbuf[NPCIXMEM];

static int nclusters = 1;
static int firstcluster = 0;
static void (*SendMCfunc[NPCIXMEM])(int devid, int size, UINT64 *buf);
static int (*RecvMCfunc[NPCIXMEM])(int devid, int size, UINT64 *buf);

/*
 * alloc xhib[cluster0] to xhib[nclusters]
 */
void
xhib_alloc(int xhib0, int nxhibs)
{
    int i;

    if (nxhibs <= 0) {
	fprintf(stderr, "invalid # of XHIB (%d). abort.\n",
		nxhibs);
	exit(1);
    }
    if (xhib0 < 0) {
	fprintf(stderr, "invalid XHIB ID (%d). abort.\n",
		xhib0);
	exit(1);
    }
    if (xhib0 + nxhibs > NPCIXMEM) {
	fprintf(stderr, "too large XHIB ID (=%d). abort.\n",
		nxhibs);
	exit(1);
    }
    nclusters = nxhibs;
    firstcluster = xhib0;
    fprintf(stderr, "# of clusters: %d  (allocated xhib[", nclusters);
    for (i = xhib0; i < xhib0+nxhibs; i++) {
	fprintf(stderr, "%d", i);
	if (i < xhib0+nxhibs-1) {
	    fprintf(stderr, ", ");
	}
    }
    fprintf(stderr, "])\n");
}

void
xhib_set_nclusters(int n)
{
    if (0 < n && n <= NPCIXMEM)
    {
	nclusters = n;
	fprintf(stderr, "# of clusters: %d\n", nclusters);
    }
    else
    {
	nclusters = 1;
	fprintf(stderr,
		"inappropriate # of clusters (=%d)\nuse default (=1).",
		n);
    }
}

int
xhib_get_nclusters(void)
{
    return (nclusters);
}

/* interfaces for single cluster */
void
xhib_open(UINT64 **dmar, UINT64 **dmaw, UINT64 **backend)
{
    xhib_openMC(firstcluster, dmar, dmaw, backend);
}

void
xhib_close(void)
{
    xhib_closeMC(firstcluster);
}

int
xhib_recvMC(int devid, int size, UINT64 *buf)
{
    return RecvMCfunc[devid](devid, size, buf);
}

void
xhib_dmar(int size, UINT64 *buf)
{
    xhib_dmarMC(firstcluster, size, buf);
}

void
xhib_dmaw(int size, UINT64 *buf)
{
    xhib_dmawMC(firstcluster, size, buf);
}

/*
 * open XHIB
 * returns the DMA buffers to dmar and dmaw
 */
void
xhib_openMC(int devid, UINT64 **dmar, UINT64 **dmaw, UINT64 **backend)
{
    static int firstcall = 1;

    if (nclusters+firstcluster <= devid) {
	fprintf(stderr,
		"xhib_openMC(): too large devid(= %d).\n",
		devid);
	exit(1);
    }
    if (firstcluster > devid) {
	fprintf(stderr,
		"xhib_openMC(): too small devid(= %d).\n",
		devid);
	exit(1);
    }

    while (-1 == (long int)(xhib[devid] = TBopen(devid, backend, dblbuf))) {
	sleep(1);
    }
    mutexlock();
    if (firstcall) {
	firstcall = 0;
	/*
	 * dmarbufp[devid]: virtual address of DMA read buffer
	 * dmarbufpa[devid]: its physical address
	 *
	 * dmawbufp[devid]: virtual address of DMA write buffer
	 * dmawbufpa[devid]: its physical address
	 */
	TBgetDmaInfo(devid,
		     &(dmarbufpa[devid]), &(dmarbufp[devid]),
		     &(dmawbufpa[devid]), &(dmawbufp[devid]));
    }
    mutexunlock();
    fprintf(stderr, "xhib%d: dmarbufp: 0x%016lx dmarbufpa %016lx "
	    "dmawbufp: 0x%016lx dmawbufpa %016lx\n",
	    devid, dmarbufp[devid], dmarbufpa[devid],
	    dmawbufp[devid], dmawbufpa[devid]);

    TBmemWrite(devid, DMA0LSB, dmarbufpa[devid]); /* DMA0 start address LSB in PCI space */
    TBmemWrite(devid, DMA1LSB, dmawbufpa[devid]); /* DMA1 start address LSB in PCI space */
    *dmar = dmarbufp[devid];
    *dmaw = dmawbufp[devid];

    /*
     * initialize local registers
     */

    /* initialize DMA0MSB, DMA1MSB  (start address bit 63:32) */
    TBmemWrite(devid, DMA0MSB, 0x0);
    xhib_set_test_modeMC(devid, TESTMODE_NONE);
    SendMCfunc[devid] = xhib_dmarMC;
    RecvMCfunc[devid] = xhib_dmawMC;
}

/* close XHIB */
void
xhib_closeMC(int devid)
{
    TBterm(devid);
}

void
xhib_set_test_modeMC(int devid, int mode)
{
    TBmemWrite(devid, MAILBOX, mode);
}

void
xhib_set_sendfuncMC(int devid, int func)
{
    switch (func) {
      case SENDFUNC_PIOW:
	  SendMCfunc[devid] = xhib_piowMC;
	  break;
      case SENDFUNC_DMAR:
	  SendMCfunc[devid] = xhib_dmarMC;
	  break;
      default:
	  break;
    }
}

void
xhib_sendMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
  SendMCfunc[devid](devid, size, buf);
}

void
xhib_piowMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
    int i, j;
    int s = PIOWBUFSIZE-1;

    //    fprintf(stderr, "==========> xhib_piowMC size: %d\n", size);

    for (i = 0; i < size; i += s) {
      if (i + s > size) {
	s = size - i;
      }
      for (j = 0; j < s; j++) {
	dblbuf[devid][j] = buf[i + j];
      }

      //      _mm_mfence();
      //      usleep(100000);
      TBmemWrite(devid, DMAMISC, (s<<12));
      //      usleep(100000);
      //      _mm_mfence();
      //      usleep(100000);
      TBmemWrite(devid, DMAMISC, (1<<27) | (s<<12));
      //      _mm_mfence();
      //      usleep(100000);

#if 0
      _mm_mfence();
      TBmemWrite(devid, DMAMISC, (1<<27) | (s<<12));
      _mm_mfence();

      _mm_mfence();
      TBmemWrite(devid, DMAMISC, (s<<12));
      _mm_mfence();
      TBmemWrite(devid, DMAMISC, (1<<27));
      _mm_mfence();
#endif
      // write '1' to XHIB local register 'swap_sram'
      // write data size to 'sram_wcnt'
    }
}

/* DMA read (host -> pcixsys) using DMA0 channel */
void
xhib_dmarMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
    unsigned long misc, val;

    if (size > XHIBRAMWORDS) {
	fprintf(stderr, "xhib_dmarMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    /*
     * DMA read fails on new core (ver 7.0.5) for unknown reason, if
     * data size is smaller than 16 bytes. That's why the below is
     * necessary.
     */
    // !!!
    if (size < 2) {
	int ii;
	for (ii = size; ii < 8; ii++) {
	    buf[ii] = 0; // fill buf with dummy data
	}
	size = 8;
    }

    val = dmarbufpa[devid];
    TBmemWrite(devid, DMA0LSB, val);
    TBmemWrite(devid, DMA0SIZE, size*8); /* size in byte */
    TBmemWrite(devid, DMA0CMD, 0x00e4);
    /* command (mem read block, no scatter-gather, 64-bit, low priority)
     * 
     * 7-4: PCI transfer command
     *   3: scatter-gather
     *   2: 64-bit mode
     *   1: priority
     *   0: reserved
     */  
    //    __mm_memfence();

    /*
     * wait till the DMA transfer completes
     * dmadone(1:0) is mapped to DMADONE(1:0)
     */
    do {
	misc = TBmemRead(devid, DMAMISC);
    } while ((misc & 0x10000000) == 0); /* wait until dmadone0 is asserted */
}

/*
 * DMA read (host -> pcixsys) using DMA0 channel
 * Warning:
 * this is just for test on ref_design. very inefficient.
 */
void
xhib_dmarMC0(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
    unsigned long misc, val;

    if (size > XHIBRAMWORDS) {
	fprintf(stderr, "xhib_dmarMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    /*
     * DMA read fails on new core (ver 7.0.5) for unknown reason, if
     * data size is smaller than 16 bytes. That's why the below is
     * necessary.
     */
    if (size < 2) {
	int ii;
	for (ii = size; ii < 2; ii++) {
	    buf[ii] = 0; // fill buf with dummy data
	}
	size = 2;
    }

    val = dmarbufpa[devid];
    TBmemWrite(devid, DMA0LSB, val);
    TBmemWrite(devid, DMA0SIZE, size*8); /* size in byte */
    TBmemWrite(devid, DMA0CMD, 0x00e4);
    /* command (mem read block, no scatter-gather, 64-bit, low priority)
     * 
     * 7-4: PCI transfer command
     *   3: scatter-gather
     *   2: 64-bit mode
     *   1: priority
     *   0: reserved
     */  
    //    __mm_memfence();

    sleep(3);
}

/* DMA write (host <- pcixsys) using DMA1 channel */
int
xhib_dmawMC(int devid, int size, UINT64 *buf)
{
    int ret = 0; /* -1 on error */
    unsigned long misc, val;

    if (size > XHIBRAMWORDS) {
	fprintf(stderr, "xhib_dmawMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    val = dmawbufpa[devid];
    TBmemWrite(devid, DMA1LSB, val);
    TBmemWrite(devid, DMA1SIZE, size*8); /* size in byte */
    TBmemWrite(devid, DMA1CMD, 0x00f4);
    /* command (mem write block, no scatter-gather, 64-bit, low priority)
     * 
     * 7-4: PCI transfer command
     *   3: scatter-gather
     *   2: 64-bit mode
     *   1: priority
     *   0: reserved
     */  

    /*
     * wait till the DMA transfer completes
     * dmadone(1:0) is mapped to DMADONE(1:0)
     */
    do {
	misc = TBmemRead(devid, DMAMISC);
    } while ((misc & 0x20000000) == 0); /* wait until dmadone1 is asserted */

    return ret;
}


/*
 * DMA write (host <- pcixsys) using DMA1 channel
 * Warning:
 * this is just for test on ref_design. very inefficient.
 */
int
xhib_dmawMC0(int devid, int size, UINT64 *buf)
{
    int ret = 0; /* -1 on error */
    unsigned long misc, val;

    if (size > XHIBRAMWORDS) {
	fprintf(stderr, "xhib_dmawMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    val = dmawbufpa[devid];
    TBmemWrite(devid, DMA1LSB, val);
    TBmemWrite(devid, DMA1SIZE, size*8); /* size in byte */
    TBmemWrite(devid, DMA1CMD, 0x00f4);
    /* command (mem write block, no scatter-gather, 64-bit, low priority)
     * 
     * 7-4: PCI transfer command
     *   3: scatter-gather
     *   2: 64-bit mode
     *   1: priority
     *   0: reserved
     */  

    sleep(1);
    return ret;
}

/* start DMA write (host <- pcixsys) using DMA1 channel */
int
xhib_start_dmawMC(int devid, int size, UINT64 *buf)
{
    unsigned val, valread;

    if (size > XHIBRAMWORDS) {
	fprintf(stderr, "xhib_start_dmawMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    /* start address LSB in PCI space */
    val =dmawbufpa[devid];
    TBmemWrite(devid, DMA1LSB, val);
#if 1
    valread = TBmemRead(devid, DMA1LSB);
    if (val != valread) {
	fprintf(stderr, "xhib_start_dmawMC val: %x valread: %x\n", val, valread);
	while (val != valread) {
	    TBmemWrite(devid, DMA1LSB, val);
	    valread = TBmemRead(devid, DMA1LSB);
	}
    }
#endif    
    TBmemWrite(devid, DMA1MSB, 0x0); /* start address MSB in local space */
    TBmemWrite(devid, DMA1SIZE, size*8); /* size in byte */
    TBmemWrite(devid, DMA1CMD, 0x00f4);
    /* command (mem write block, no scatter-gather, 64-bit, low priority)
     * 
     * 7-4: PCI transfer command
     *   3: scatter-gather
     *   2: 64-bit mode
     *   1: priority
     *   0: reserved
     */  

    return 0;
}


/*
 * wait completion of the DMA write (host <- pcixsys) transaction
 * kicked off by xhib_start_dmawMC()
 */
int
xhib_finish_dmawMC(int devid)
{
    int ntry, sizeleft;
    int ret = 0; /* -1 on error */
    UINT32 cmd;

    ntry = 0;
    cmd = 0xffffffff;
    do {
      sizeleft = TBmemRead(devid, DMA1SIZE);
      cmd = TBmemRead(devid, DMA1CMD);
    } while ((cmd & 0xf0000)||(sizeleft>0)); /* m_dma0_status(7:4) is mapped to DMA1CMD(19:16) */

    return ret;
}

static int
mutexlock(void)
{
    int status = 0;
#ifdef THREAD_SAFE_XHIBLIB
    status = pthread_mutex_lock(&xhibmutex); 
#endif /* THREAD_SAFE_XHIBLIB */
    return (status);
}

static int
mutexunlock(void)
{
    int status = 0;
#ifdef THREAD_SAFE_XHIBLIB
    status = pthread_mutex_unlock(&xhibmutex); 
#endif /* THREAD_SAFE_XHIBLIB */
    return (status);
}
