// Values below should match the ones given in ./pg2g6/g6.pg2 or ./pg2g6nb/g6nb.pg2.
//
// scaling for xi, xj, force, and jerk.
#define XSHIFT  (51)
#define FSHIFT  (48)
#define PSHIFT  (48)
#define JSHIFT  (32)

// neighbour list attributes.
#define NBMEMSIZE   (64) // maximum number of neighbours per pipe.

// max # of FOUTs obtained by a single run on a single device.
// g6_npipes() should return this or smaller.
#define NFOMAX  (80)


typedef struct {
    double (*x)[3];
    double (*v)[3];
    double *eps2;
    double *h2;
    int *index;
} Ipin_t;

typedef struct {
    double t;
    double etainv;
} Cin_t;

static int Backend = 0;           // should set to BACKEND_G6 or _G6nb.
static int Nip[NHIBMAX];          // # of i-particles set by g6_set_ni()
static int NipAllocated[NHIBMAX]; // # of i-particles Ipin[] is allocated for.
static Ipin_t Ipin[NHIBMAX];
static Cin_t Cin[NHIBMAX];

// the latest fout retrieved.
static double Acc[NHIBMAX][NFOMAX][3];
static double Phi[NHIBMAX][NFOMAX];
static double Jerk[NHIBMAX][NFOMAX][3];
static int    Nbovflw[NHIBMAX][NFOMAX];
static int    Nblist[NHIBMAX][NFOMAX][NBMEMSIZE];
static int    Nnb[NHIBMAX][NFOMAX];
static double Nnbr2[NHIBMAX][NFOMAX];
static int    FoutRead[NHIBMAX] = {0};            // the latest fout are retrieved or not.
static int    NbSortMode = 1;                     // neighbour list sort mode.



#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
static int warn_level = 2;

static void realloc_ipin(int devid, Ipin_t *ip, int req_n);
static void init_vars(void);
static int get_force_etc0(int devid, double acc[][3], double jerk[][3], double phi[], int nnbindex[], double rad[]);
static void get_fout(int devid);
static int get_force0(int devid, double acc[][3], double jerk[][3], double phi[], int nnbindex[], double rad[]);

static void set_xunitMC(int devid, int newxunit);

static void die_not_implemented_in_g6pipe(char *apiname);
static int nbindex_a_lt_b(const void *arg0, const void *arg1);

/*
 * reallocate buf of type Ipin_t.
 *   ip    : a pointer to the buf.
 *   req_n : requested size (in # of particles).
 *
 * the size actually allocated is updated to NipAllocated[devid]
 * (larger or equal to req_n).
 */

static void
realloc_ipin(int devid, Ipin_t *ip, int req_n)
{
    int i;
    int page = 4096;
    int n = (req_n / page + 1) * page;

    if (NipAllocated[devid] <= req_n) {
        NipAllocated[devid] = n;
    }
    ip->x      = realloc(ip->x,      n * sizeof(double) * 3);
    ip->v      = realloc(ip->v,      n * sizeof(double) * 3);
    ip->eps2   = realloc(ip->eps2,   n * sizeof(double));
    ip->h2     = realloc(ip->h2,     n * sizeof(double));
    ip->index  = realloc(ip->index,  n * sizeof(int));
}

static void
init_vars(void)
{
    static int firstcall = 1;
    int i;

    if (!firstcall) return;
    firstcall = 0;

    if (Backend == BACKEND_G6) {
        warn_level = pg2g6_get_warn_level();
    }
    else {
        warn_level = pg2g6nb_get_warn_level();
    }
    for (i = 0; i < NHIBMAX; i++) {
        NipAllocated[i] = 0;
        memset(Ipin + i, 0, sizeof(Ipin_t));
    }
}

void
g6_open(int devid)
{
    static int firstcall = 1;
    static int setup_done[NHIBMAX];

    if (firstcall) {
        int i;
        firstcall = 0;
        for (i = 0; i < NHIBMAX; i++) {
            setup_done[i] = 0;
        }
    }

    if (setup_done[devid]) { 
        if (Backend == BACKEND_G6) {
            pg2g6_openMC(devid);
        }
        else {
            pg2g6nb_openMC(devid);
        }
    }
    else {
        // open for the 1st time. 
        // at this point, we don't know whether the backend is G6 or G6nb.
        unsigned int binfo, productid, backendid, revision, npipe;
        pg2g6_device_info_t devinfo;
        int compatible = 0;

        setup_done[devid] = 1;
        pg2g6_openMC(devid);    // for the 1st time, open as a G6PIPE anyway.
        pg2g6_device_infoMC(devid, &devinfo);
        binfo = devinfo.boardinfo;
        productid = (binfo >> 28) & 0xf;
        backendid = (binfo >> 20) & 0xf;
        revision  = (binfo >> 16) & 0xf;
        npipe     = binfo & 0xff;

        switch (productid) {
          case PRODUCT_G9C:
            WARN(2,"GRAPE-9 ");
            break;
          default:
            WARN(0, "\ndevice[%d], productid:%d, is not a GRAPE-9.\n",
                 devid, productid);
            exit(2);
        }

        WARN(2, "pipeline logic of device[%d]:", devid);
        switch (backendid) {
          case BACKEND_G6:
            WARN(2, "G6PIPE\n");
            Backend = backendid;
            compatible = 1;
            break;
          case BACKEND_G6nb:
            WARN(2, "G6nbPIPE\n");
            Backend = backendid;
            compatible = 1;
            break;
          case BACKEND_G5:
            WARN(2, "G5PIPE\n");
            break;
          case BACKEND_G5nb:
            WARN(2, "G5nbPIPE\n");
            break;
          case BACKEND_EMPTY:
            WARN(0, "no pipeline logic configured in the device.\n");
        break;
          default:
            WARN(2, "unknown\n");
        }
        if (!compatible) {
            WARN(0,"device[%d] is incompatible with G6 API.\n", devid);
            exit(1);
        }
        WARN(3, "npipes:%d \n", npipe);
        pg2g6_closeMC(devid); // close anyway.

        init_vars();
        set_xunitMC(devid, XSHIFT); // user app is not permitted to change scaling.
        if (Backend == BACKEND_G6) { // open G6PIPE again.
            pg2g6_openMC(devid);
        }
        else {
            pg2g6nb_openMC(devid); // open again, but as G6nbPIPE this time.
            g6_set_eta(devid, 1e100); // large enough value will do.
        }
    }
}

void
g6_close(int devid)
{
    if (Backend == BACKEND_G6) {
        pg2g6_closeMC(devid);
    }
    else {
        pg2g6nb_closeMC(devid);
    }
}

void
g6_set_tunit(int newtunit)
{
    // 2^(63-newunit) gives the max value of t.
#if 0 // !!! to be enabled when the new pipeline logic with int64 ti is ready.
    double tmax = 1 << (63 - newtunit);
    if (Backend == BACKEND_G6) {
        pg2g6_set_range_tiMC(devid, -tmax, tmax);
    }
    else {
        pg2g6nb_set_range_tiMC(devid, -tmax, tmax);
    }
#endif
}

static void
set_xunitMC(int devid, int newxunit)
{
    // 2^(63-newunit) gives the max value of x.
    double xmax = 1 << (63 - newxunit);
    double xmin = - xmax;
    double xsize = xmax - xmin;

    // double xscale = pow(2.0, 64) / xsize;
    double xscale = 1.0; // !!!

    double vscale = xscale;
    double eps2scale = xscale * xscale;
    double h2scale = xscale * xscale;
    double mscale = 1.0;
    double ascale = xscale * xscale / mscale;
    double pscale = xscale / mscale;
    double jscale = ascale;
    double nnbr2scale = 1.0;

    if (Backend == BACKEND_G6) {
        pg2g6_set_scale_tiMC(devid, 1.0);
        pg2g6_set_scale_mjMC(devid, mscale);
        pg2g6_set_range_xjMC(devid, xmin, xmax);
        pg2g6_set_scale_vjMC(devid, vscale);
        pg2g6_set_scale_tjMC(devid, 1.0);
        pg2g6_set_scale_acc0by2MC(devid, 1.0);
        pg2g6_set_scale_jerk0by6MC(devid, 1.0);

        pg2g6_set_range_xiMC(devid, xmin, xmax);
        pg2g6_set_scale_viMC(devid, vscale);
        pg2g6_set_scale_epsi2MC(devid, eps2scale);
        pg2g6_set_scale_accMC(devid, ascale);
        pg2g6_set_scale_jerkMC(devid, jscale);
    }
    else {
        pg2g6nb_set_scale_tiMC(devid, 1.0);
        pg2g6nb_set_scale_mjMC(devid, mscale);
        pg2g6nb_set_range_xjMC(devid, xmin, xmax);
        pg2g6nb_set_scale_vjMC(devid, vscale);
        pg2g6nb_set_scale_tjMC(devid, 1.0);
        pg2g6nb_set_scale_acc0by2MC(devid, 1.0);
        pg2g6nb_set_scale_jerk0by6MC(devid, 1.0);

        pg2g6nb_set_range_xiMC(devid, xmin, xmax);
        pg2g6nb_set_scale_viMC(devid, vscale);
        pg2g6nb_set_scale_epsi2MC(devid, eps2scale);
        pg2g6nb_set_scale_hi2MC(devid, h2scale);
        pg2g6nb_set_scale_etainvMC(devid, 1.0);
        pg2g6nb_set_scale_accMC(devid, ascale);
        pg2g6nb_set_scale_potMC(devid, pscale);
        pg2g6nb_set_scale_jerkMC(devid, jscale);
        pg2g6nb_set_scale_nnbr2MC(devid, nnbr2scale);
    }
}

void
g6_set_xunit(int newxunit)
{
    // nop.
#if 0
    xunit = newxunit;
    xunit2 = 2*xunit;
    xscale = pow(2.0,(double)xunit);
    xscaleinv = 1.0/xscale;
    xscale2 = xscale * xscale;
#endif
}

int
g6_set_j_particle(int devid, int address,
                  int index,
                  double tj, /* particle time */
                  double dtj, /* not used */
                  double mass,
                  double a2by18[3], /* not used */
                  double a1by6[3], /* a1dot divided by 6 */
                  double aby2[3], /* a divided by 2 */
                  double v[3], /* velocity */
                  double x[3] /* position */)
{
    // offset by one in order to distinguish null entry in the neighbour list.
    int index1 = index + 1;

    if (Backend == BACKEND_G6) {
        pg2g6_set_jpMC(devid, address, 1,
                       &mass, (double (*)[3])x, (double (*)[3])v,
                       &tj, (double (*)[3])aby2, (double (*)[3])a1by6, &index1);
    }
    else {
        pg2g6nb_set_jpMC(devid, address, 1,
                         &mass, (double (*)[3])x, (double (*)[3])v,
                         &tj, (double (*)[3])aby2, (double (*)[3])a1by6, &index1);
    }
}

int
g6_set_j_particle_mxonly(int  devid,
                         int address,
                         int index,
                         double mass,
                         double x[3] /* position */)
{
    double tj = 0.0;
    double dtj = 1.0;
    double a2by18[3];
    double a1by6[3];
    double aby2[3];
    double v[3];
    int k;
    for (k = 0; k < 3; k++) {
        a2by18[k] = 0.0;
        a1by6[k] = 0.0;
        aby2[k] = 0.0;
        v[k] = 0.0;
    }
    return g6_set_j_particle(devid, address, index,
                             tj, dtj, mass, a2by18, a1by6, aby2, v, x);
}

void
g6_set_ti(int devid, double ti)
{
    Cin[devid].t = ti;
}

void
g6_set_eta(int devid, double eta)
{
    die_not_implemented_in_g6pipe("g6_set_eta()");
    Cin[devid].etainv = 1.0 / eta;
}

void
g6calc_firsthalf(int devid, 
                 int nj,  
                 int ni,  
                 int index[],  
                 double xi[][3],  
                 double vi[][3],  
                 double fold[][3],
                 double jold[][3],  
                 double phiold[],  
                 double eps2,   
                 double h2[])  
{
    int ii;

    for (ii= 0; ii< ni; ii++){
        g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2, h2[ii]);
    }
    g6_set_nip(devid, ni);
    g6_set_njp(devid, nj);
}

void
g6calc_firsthalf0(int devid,
                  int nj,
                  int ni,
                  int index[],
                  double xi[][3],
                  double vi[][3], 
                  double fold[][3],
                  double jold[][3],
                  double phiold[],
                  double *eps2,
                  double h2[],
                  int mode)
{
    int ii;

    if (mode == 0) {
        for (ii= 0; ii< ni; ii++){
            g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2[ii], h2[ii]);
        }
    }
    else {
        for (ii= 0; ii< ni; ii++){
            g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2[0], h2[ii]);
        }
    }
    g6_set_nip(devid, ni);
    g6_set_njp(devid, nj);
}

int
g6calc_lasthalf(int devid,
                int nj,
                int ni,
                int index[],
                double xi[][3],
                double vi[][3],
                double eps2,
                double h2[], 
                double acc[][3],
                double jerk[][3],
                double pot[])     
{
    g6_get_force(devid, acc, jerk, pot, NULL);
    return 0;
}

int
g6calc_lasthalf0(int devid,
                 int nj,
                 int ni,
                 int index[],
                 double xi[][3],
                 double vi[][3],
                 double *eps2,
                 double h2[],
                 double acc[][3],
                 double jerk[][3],
                 double pot[],
                 int mode)
{
    g6_get_force(devid, acc, jerk, pot, NULL);
    return 0;
}

int
g6calc_lasthalf2(int devid,
                 int nj,
                 int ni,
                 int index[],
                 double xi[][3],
                 double vi[][3],
                 double eps2,
                 double h2[], 
                 double acc[][3],
                 double jerk[][3],
                 double pot[],     
                 int nnbindex[])
{
    g6_get_force_etc(devid, acc, jerk, pot, nnbindex, NULL);
    return 0;
}

int
g6_read_neighbour_list(int devid)
{
    int i, overflown = 0;

    die_not_implemented_in_g6pipe("g6_read_neighbour_list()");

    get_fout(devid);

    for (i = 0; i < Nip[devid]; i++) {
        if (Nbovflw[devid][i]) {
            overflown = 1;
            break;
        }
    }

    return overflown; // return 1 if & only if any of Nblist[devid][i] are overflown.
}

int
g6_get_neighbour_list(int devid,
                      int ipipe,
                      int maxlength,
                      int *nblen,
                      int nbl[])
{
    int i, ovflw, len;
    int imax = maxlength < NBMEMSIZE ? maxlength : NBMEMSIZE;
    int indexi, indexj;

    die_not_implemented_in_g6pipe("g6_get_neighbour_list()");

    for (i = 0, len = 0; i < imax; i++) {
        nbl[i] = -2; // initial dummy value. just for debugging purpose.
    }


    indexi = Ipin[devid].index[ipipe] - 1;
    for (i = 0, len = 0; i < imax; i++) {
        indexj = Nblist[devid][ipipe][i] - 1; // values stored in Nblist are offset by one.
        if (indexj == -1) break; // the value -1 indicates the end of the list.
        if (indexi == indexj) continue; // remove self.
        nbl[len] = indexj;
        len++;
    }
    *nblen = len;

    if (NbSortMode) {
        qsort(nbl, len, sizeof(int), nbindex_a_lt_b);
    }

    if (imax < NBMEMSIZE && i == imax && Nblist[devid][ipipe][i] != 0) {
        ovflw = 1; // the list is not overflown, but 'nbl[maxlength]'
                   // is too small to store the entire list.
    }
    else {
        //        fprintf(stderr, "Nbovflw[%d][%d]:%x   len:%d\n", devid, ipipe, Nbovflw[devid][ipipe], len);
        ovflw = Nbovflw[devid][ipipe] ? 1 : 0; // Nbovflw is not 1 but 0xffffffff when overflown.
    }
    return ovflw;
}

static int
nbindex_a_lt_b(const void *arg0, const void *arg1)
{
    int a = *(int *)arg0;
    int b = *(int *)arg1;
    if (a < b) {
        return -1;
    }
    else if (a > b) {
        return +1;
    }
    else {
        return 0;
    }
}

void
g6_set_neighbour_list_sort_mode(int mode)
{
    die_not_implemented_in_g6pipe("g6_set_neighbour_list_sort_mode()");
    NbSortMode = mode;
}

int
g6_get_neighbour_list_sort_mode(void)
{
    die_not_implemented_in_g6pipe("g6_get_neighbour_list_sort_mode()");
    return NbSortMode;
}


/*
 * return the size of the neighbour list buffer per pipe.
 */
int
g6_get_nbmax(devid)
{
    die_not_implemented_in_g6pipe("g6_get_nbmemsize()");

    // Nblist[] always includes indice of the particle itself,
    // which is removed from the list passed on to the user.
    // Thus, the max size of the list is smaller than NBMEMSIZE by one.
    return (NBMEMSIZE - 1);
}

/*
 * returns the number of FOUTs obtained by a single run on a single device.
 * this function does not take 'devid' argument for historical reason,
 * therefore, it returns the smallest value among all devices for safaty.
 */
int
g6_npipes(void)
{
    if (Backend == BACKEND_G6) {
        return pg2g6_get_number_of_pipelines();
    }
    else {
        return pg2g6nb_get_number_of_pipelines();
    }
}

/*
 * an API for internal use only.
 */
int
g6_get_number_of_real_pipelines(int devid)
{
    int npipe;

    if (Backend == BACKEND_G6) {
        npipe = pg2g6_get_number_of_pipelinesMC(devid);
    }
    else {
        npipe = pg2g6nb_get_number_of_pipelinesMC(devid);
    }

    if (NFOMAX < npipe) {
        WARN(0, "g6_nrealpipes(): # of pipe (=%d) exceeds NFOMAX (=%d).\n",
             npipe, NFOMAX);
        exit(1);
    }

    return npipe;
}

void
g6_set_nip(int devid, int nip)
{
    Nip[devid] = nip;
}

/*
 * set the number of JPs and trigger the run.
 */
void
g6_set_njp(int devid, int njp)
{
    Ipin_t *ip = Ipin + devid;

    if (Backend == BACKEND_G6) {
        pg2g6_set_nMC(devid, njp); // this must precede pg2g6_set_ipMC(). otherwise pg2g6_set_ipMC() quit immediately.
        pg2g6_set_coeffMC(devid, Cin[devid].t);
        pg2g6_set_ipMC(devid, Nip[devid],
                       ip->x, ip->v, ip->eps2,
                       ip->index);
        pg2g6_runMC(devid);
    }
    else {
        pg2g6nb_set_nMC(devid, njp);
        pg2g6nb_set_coeffMC(devid, Cin[devid].t, Cin[devid].etainv);
        pg2g6nb_set_ipMC(devid, Nip[devid],
                         ip->x, ip->v, ip->eps2,
                         ip->index, ip->h2);
        pg2g6nb_runMC(devid);
    }
    FoutRead[devid] = 0;
}

void
g6_set_i_particle_scales_from_real_value(int devid,
                                         int address,
                                         double acc[3],
                                         double jerk[3],
                                         double phi,
                                         double jfactor,
                                         double ffactor)
{
    // nop
}

void
g6_set_i_particle(int devid, int address,
                  int index,
                  double x[3], /* position */
                  double v[3], /* velocity */
                  double eps2,
                  double h2)
{
    int i, k;
    Ipin_t *ip = Ipin + devid;

    realloc_ipin(devid, Ipin + devid, address);

    i = address;
    for (k = 0; k < 3; k++) {
        ip->x[i][k] = x[k];
        ip->v[i][k] = v[k];
    }
    ip->index[i] = index + 1; // offset by one in order to distinguish null entry in the neighbour list.
    ip->eps2[i] = eps2;
    ip->h2[i] = h2;
}

int
g6_get_force(int devid,
             double acc[][3],
             double jerk[][3],
             double phi[],
             int flag[])
{
    int nnbindex[NFOMAX];
    double rad[NFOMAX];
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
    return 0;
}

int
g6_get_force_etc(int devid,
                     double acc[][3],
                     double jerk[][3],
                     double phi[],
                     int nnbindex[],
                     int flag[])
{
    double *rad = NULL;
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
    return 0;
}

/*
 * This function is defined only to implement g6_get_force_etc_all().
 * A user should not call this directly. Use g6_get_force_etc() instead.
 */
static int
get_force_etc0(int devid,
               double acc[][3],
               double jerk[][3],
               double phi[],
               int nnbindex[],
               double rad[])
{
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
}


/*
 * retrieve the latest fout into Acc[][], Jerk[][] etc.
 */
static void
get_fout(int devid)
{
    int dummybuf[NFOMAX];

    if (!FoutRead[devid]) {
        if (Backend == BACKEND_G6) {
            pg2g6_get_foutMC(devid, Nip[devid], Acc[devid], Jerk[devid]);
        }
        else {
            pg2g6nb_get_foutMC(devid, Nip[devid], Acc[devid], Phi[devid], Jerk[devid],
                               Nbovflw[devid], Nblist[devid],
                               dummybuf, Nnb[devid], 
                               dummybuf, Nnbr2[devid]);
        }
    }
    FoutRead[devid] = 1;
}

static int
get_force0(int devid,
           double acc[][3],
           double jerk[][3],
           double phi[],
           int nnbindex[],
           double rad[])
{
    int i, k;
    double ascale = pow(2.0, -FSHIFT);
    double pscale = -pow(2.0, -PSHIFT);
    double jscale = pow(2.0, -JSHIFT);

    get_fout(devid);

    for (i = 0; i < Nip[devid]; i++) {
        for (k = 0; k < 3; k++) {
            acc[i][k] = ascale * Acc[devid][i][k];
        }
        for (k = 0; k < 3; k++) {
            jerk[i][k] = jscale * Jerk[devid][i][k];
        }
        if (Backend == BACKEND_G6nb) {
            phi[i] = pscale * Phi[devid][i];
            nnbindex[i] = Nnb[devid][i] - 1; // values stored in Nnb are offset by one.
            rad[i] = sqrt(Nnbr2[devid][i]);
        }
    }
}

int
g6_getnjmax(int devid)
{
    if (Backend == BACKEND_G6) {
        return pg2g6_get_jmemsizeMC(devid);
    }
    else {
        return pg2g6nb_get_jmemsizeMC(devid);
    }
}

void
g6_initialize_jp_buffer(int devid, int N)
{
    // nop.
}

void
g6_set_overflow_flag_test_mode(int force_test_mode,int jerk_test_mode, int pot_test_mode)
{
    // nop.
}

void
g6_flush_jp_buffer(int devid)
{
    // nop.
}

void
g6_reinitialize(int devid)
{
    // nop.
}

static void
die_not_implemented_in_g6pipe(char *apiname)
{
    if (Backend == BACKEND_G6) {
        WARN(0, "Warning: %s is called, but "
             "is not implemented in G6PIPE.\n", apiname);
        exit(1);
    }
}
