#ifndef G5NBAPI_C
#define G5NBAPI_C

// local functions prototypes:
static void         read_neighbor_listMC(int devid);
static int          decode_neighbor_listMC(int devid);

static int Nblist[NHIBMAX][NFOMAX][NBMEMSIZE*NCHIPMAX]; // list of jindex. value -1 indicates empty room.
static int Nblength[NHIBMAX][NFOMAX]; // value -1 indicates the list is overflown.
static UINT32 Nboverflow[NHIBMAX][NFOMAX];

int
g5_get_nbmemsize(void)
{
    int ic;
    int nbms = 0;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
	nbms += g5_get_nbmemsizeMC(ic);
    }
    return nbms;
}

int
g5_get_nbmemsizeMC(int devid)
{
    if (!Nbmemsize_is_set_by_user[devid]) {
	g5_set_nbmemsizeMC(devid, NBMEMSIZE * g5_nchip[devid]);
    }

    return Nbmemsize[devid] * g5_nchip[devid];
}

int
g5_set_nbmemsize(int size)
{
    int ic;
    int nbms = 0;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
        nbms += g5_set_nbmemsizeMC(ic, size);
    }

    return (nbms);
}

/*
 * Set the upper limit of neighbors to be stored. It is set to the
 * maximum number (= NBMEMSIZE * g5_nchip[devid]) by default. You can
 * make it smaller so that the amount of data transfer is reduced.
 */
int
g5_set_nbmemsizeMC(int devid, int size)
{
    Nbmemsize_is_set_by_user[devid] = 1;

    if (NBMEMSIZE * g5_nchip[devid] < size) {
	size = NBMEMSIZE * g5_nchip[devid];
	WARN(2, "Warning: too large size given to g5_set_nbmemsizeMC()."
	     "set neighbor memory size to the maximum possible: %d\n", size);
    }
    else if (size < 0) {
	size = NBMEMSIZE * g5_nchip[devid];
	WARN(2, "Warning: negative size given to g5_set_nbmemsizeMC()."
	     "set neighbor memory size to the maximum possible: %d\n", size);
    }

    Nbmemsize[devid] = size / g5_nchip[devid];
    recalculate_iobuf_attributes(devid);
    set_reg(devid, Ipregaddr[devid], Ipsize[devid]<<16 | g5_npipes[devid]);
    set_reg(devid, Foregaddr[devid], Fosize[devid]<<16 | g5_npipes[devid]);

    return Nbmemsize[devid] * g5_nchip[devid];
}

/*
 * Reads the neighbor list into the internal work space of the host
 * computer. Returns 1 if the list has overflown. Returns 0 otherwise.
 */
int
g5_read_neighbor_list(void)
{
    int ic;
    int overflow = 0;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
        overflow |= g5_read_neighbor_listMC(ic);
    }

    return (overflow);
}

int
g5_read_neighbor_listMC(int devid)
{
    static int firstcall = 1;

    if (g5_nbsearch[devid] == 0) {
        if (firstcall == 1) {
            firstcall = 0;
            WARN(2, "Warning: neighbor search function is not implemented in this revision "
                 "of G5 pipeline. g5_read_neighbor_listMC() has no effect.\n");
        }
        return;
    }

    read_neighbor_listMC(devid);
    return decode_neighbor_listMC(devid);
}


/*
 * Set index of the neighbor particles of the 'ip'-th particle into
 * 'list'. Returns length of 'list' if it is not overflown.  Returns
 * -len if 'list' is overflown, where len is the number of valid
 * neighbors stored in 'list'.
 */
int
g5_get_neighbor_list(int ip, int *list)
{
    int ic, i, j0, len, len_ic, ovflw;

    j0 = 0;
    len = 0;
    ovflw = 0;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
        len_ic = g5_get_neighbor_listMC(ic, ip, list + len);

        if (len_ic < 0) {
            ovflw = 1;
            len_ic *= -1;
        }

        for (i = 0; i < len_ic; i++) {
            if (list[len + i] < 0) {
                list[len + i] -= j0;
            }
            else {
                list[len + i] += j0;
            }
        }
        len += len_ic;
	j0 += Nbodies[ic];
    }
    if (ovflw) {
        len *= -1;
    }

    return len;
}

int
g5_get_neighbor_listMC(int devid, int ip, int *list)
{
    int k, len = 0;
    int nbmemsize = g5_get_nbmemsizeMC(devid);

    if (ip >= g5_get_number_of_pipelinesMC(devid)) {
        fprintf(stderr, "g5_get_neighbor_listMC: too large ip (%d). abort.\n", ip);
        exit(1);
    }

    for (k = 0; k < nbmemsize; k++) {
        if (Nblist[devid][ip][k] == -1) continue;  /* value '-1' is used to indicate Nblist is empty.
						      note that '0' is now used as index for the first j-particle. */
        list[len] = Nblist[devid][ip][k];
        len++;
    }

    if (Nboverflow[devid][ip]) {
	len = -len;
    }

    if (len != Nblength[devid][ip]) {
        fprintf(stderr,
		"g5_get_neighbor_listMC: list length mismatch.\n"
		"len:%d  Nblength[%d][%d]:%d\n",
                len, devid, ip, Nblength[devid][ip]);
        exit(1);
    }

    return len;
}

void
g5_set_h(int ni, double *h)
{
    int ic;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
	g5_set_hMC(ic, ni, h);
    }
}


void
g5_set_h_to_all(double h)
{
    int ic;

    for (ic = 0; ic < hib_ndevice(); ic++) {
	if (g5_cards[ic] == 0) continue;
	g5_set_h_to_allMC(ic, h);
    }
}

void
g5_set_h_to_allMC(int devid, double h)
{
    static firstcall = 1;

    if (g5_nbsearch[devid] == 0) {
        if (firstcall == 1) {
            firstcall = 0;
            WARN(2, "Warning: neighbor search function is not implemented in this revision "
                 "of G5 pipeline. g5_set_h_to_allMC() has no effect.\n");
        }
        return;
    }

    if (!Nbmemsize_is_set_by_user[devid]) {
	g5_set_nbmemsizeMC(devid, NBMEMSIZE * g5_nchip[devid]);
    }

    int i;
    int ih = convert_double_to_grape_log(h * Xscale[devid]);

    for (i = 0; i < Nipmax[devid]; i++) {
        Ih[devid][i] = ih;
    }
}

void
g5_set_hMC(int devid, int ni, double *h)
{
    static firstcall = 1;

    if (g5_nbsearch[devid] == 0) {
        if (firstcall == 1) {
            firstcall = 0;
            WARN(2, "Warning: neighbor search function is not implemented in this revision "
                 "of G5 pipeline. g5_set_hMC() has no effect.\n");
        }
        return;
    }

    if (!Nbmemsize_is_set_by_user[devid]) {
	g5_set_nbmemsizeMC(devid, NBMEMSIZE * g5_nchip[devid]);
    }

    int i;
    double xs = Xscale[devid];

    if (ni > Nipmax[devid]) {
        fprintf(stderr, "g5_set_hMC: too large ni (%d). abort.\n", ni);
        exit(1);
    }

    for (i = 0; i < ni; i++) {
        Ih[devid][i] = convert_double_to_grape_log(h[i] * xs);
    }
}



/*
 *
 * local functions
 *
 */


/*
 * retrieve neighbor list from Wbuf
 * retrieve nb lists
 *
 *    address 0x0003 [05:00] overflow flags
 *                   [17:06] nb00 chip0
 *                   [29:18] nb00 chip1
 *                   [41:30] nb00 chip2
 *                   [53:42] nb00 chip3
 *    address 0x0004 [11:00] nb00 chip4
 *                   [23:12] nb00 chip5
 *                   [35:24] nb01 chip0
 *                   [47:36] nb01 chip1
 *                   [59:48] nb01 chip2
 *    address 0x0005 [11:00] nb01 chip3
 *                   ....
 *    address 0x001f [11:00] nb23 chip1
 *                   [23:12] nb23 chip2
 *                   [35:24] nb23 chip3
 *                   [47:36] nb23 chip4
 *                   [59:48] nb23 chip5
 */

static void
read_neighbor_listMC(int devid)
{
    int i;
    int ni = Nretrieved[devid];
    int nbmemsize = g5_get_nbmemsizeMC(devid);
    int novflwbit0, novflwbit; // width of overflow flags. nblist follows this flags.
    int ovflwmask;

    switch (g5_model[devid]) {
      case 1:
      case 8:
        ovflwmask = 0x1;
        novflwbit0 = 1;
        break;
      case 2:
      case 3:
      case 6:
        ovflwmask = 0x3f;
        novflwbit0 = 6;
        break;
    }

    // retrieve overflow flags
    UINT64 *nbpointer = (UINT64 *)(Wbuf[devid] + 0 + FORCEFOSIZE * 2); // address of overflow flags.
    UINT64 *nbpointer0 = nbpointer;
    for (i = 0; i < ni; i++) {

        novflwbit = novflwbit0;
        Nboverflow[devid][i] = ovflwmask & nbpointer[0];
#if 0
	fprintf(stderr, "Noverflow[%d][%d] : 0x%08x\n\n", devid, i, Nboverflow[devid][i]);
#endif
        int addr = 0;
	int nbufread = 0;
	int nbufwrote = 0;
        int isread;
        int nbuf_to_be_written = nbmemsize;
        if (Nbmemsize[devid] < NBMEMSIZE) {
            nbuf_to_be_written += g5_nchip[devid]; // user set nbmemsize smaller than NBMEMSIZE.
                                                   // need to retrieve (nbmemsize+1) NBs to check overflow.
        }

	while (nbufwrote < nbuf_to_be_written) {
            isread = 0;
	    switch (g5_model[devid]) {
	      case 1:
	      case 6:
	      case 8:
		Nblist[devid][i][nbufwrote++] = 0xfff & (nbpointer[addr] >> novflwbit);
                isread = 1;
		break;
	      case 2:
		if (nbufread % NCHIPMAX < 3) {
		    Nblist[devid][i][nbufwrote++] = 0xfff & (nbpointer[addr] >> novflwbit);
                    isread = 1;
                }
		break;
	      case 3:
		if (nbufread % NCHIPMAX >= 3) {
		    Nblist[devid][i][nbufwrote++] = 0xfff & (nbpointer[addr] >> novflwbit);
                    isread = 1;
		}
		break;
	    }

#if 0
            if (isread) {
                fprintf(stderr, "Nblist[%d][%d][%2d] := nbpointer[%d] >> %2d    : % 5d    nbufread:%d chipid:%d    ",
                        devid, i, nbufwrote-1, addr, novflwbit,
                        Nblist[devid][i][nbufwrote-1], nbufread, nbufread%NCHIPMAX+1);
            }
            else {
                fprintf(stderr, "discard                                              nbufread:%d chipid:%d    ",
                        nbufread, nbufread%NCHIPMAX+1);
            }
            fprintf(stderr, "nbpointer0[%d][%d] >> %2d    : % 5d\n",
                    devid, ((ulong)(nbpointer+addr) - (ulong)nbpointer0) / 8, novflwbit,
                    0xfff & (nbpointer[((ulong)(nbpointer+addr) - (ulong)nbpointer0) / 8] >> novflwbit));
#endif
	    nbufread++;
            novflwbit += 12;
            if (novflwbit + 12 >= 64) {
                addr++;
                novflwbit = 0;
            }
        }

        int tail = nbmemsize;
        while (tail < nbuf_to_be_written) {
            if (Nblist[devid][i][tail] != 0) {
#if 0
                int ofsave = Nboverflow[devid][i];
                Nboverflow[devid][i] |= 1 << (tail - nbmemsize);
                if (ofsave != Nboverflow[devid][i]) {
                    fprintf(stderr, "Noverflow[%d][%d] : 0x%08x bit %d changed\n\n",
                            devid, i, Nboverflow[devid][i], tail - nbmemsize);
                }
#else
                Nboverflow[devid][i] |= 1 << (tail - nbmemsize);
#endif
            }
            tail++;
        }

        nbpointer += Fosize[devid]; // address of nb buffer for (i+1)-th particle.
    }
}

static int
decode_neighbor_listMC(int devid)
{
    int i, ii, ic;
    int overflow = 0;
    int offset_of_chip[NCHIPMAX];
    int ni = Nretrieved[devid];
    int nbmemsize = g5_get_nbmemsizeMC(devid);

    for (ic = 0; ic < g5_nchip[devid]; ic++) {
	offset_of_chip[ic] = Nbodies[devid] * ic;
    }

    // decode jindex
    for (i = 0; i < ni; i++) {
        int len = 0;

        for (ii = 0; ii < nbmemsize; ii++) {
            Nblist[devid][i][ii]--; // unoffset by 1
            if (Nblist[devid][i][ii] == -1) continue; // buffer is empty.
            len++;
	    Nblist[devid][i][ii]  = Nblist[devid][i][ii] * g5_nchip[devid] + (ii % g5_nchip[devid]);
#if 0
	    fprintf(stderr, "Nblist[%d][%d][%d]:%d\n",
		    devid, i, ii, Nblist[devid][i][ii], g5_nchip[devid]);
#endif
        }
	Nblength[devid][i] = Nboverflow[devid][i] ? -len : len;
        overflow |= Nboverflow[devid][i];
    }
#if 0
    for (i = 0; i < ni; i++) {
	fprintf(stderr, "Nblength[%d][%3d]:%d  Nboverflow[%d][%3d]:%d\n",
		devid, i, Nblength[devid][i], devid, i, Nboverflow[devid][i]);
    }
#endif

    return (overflow ? 1 : 0);
}

#endif // G5NBAPI_C
