// scaling for xi, xj, force, jerk are fixed by hardware.
// these values should match the ones given in ./pg2g6/g6.pg2.
#define XUNIT   (51)
#define FSHIFT  (48)
#define PSHIFT  (48)
#define JSHIFT  (32)

// max # of forces returned from the device at once. g6_npipe() returns this.
#define NFOMAX  (80)

typedef struct {
    double (*x)[3];
    double (*v)[3];
    double *eps2;
    double (*acc)[3];
    double (*jerk)[3];
    int *index;
} Ipin_t;

typedef struct {
    double t;
} Cin_t;

static int Nip[NHIBMAX];          // # of i-particles set by g6_set_ni()
static int NipAllocated[NHIBMAX]; // # of i-particles Ipin[] is allocated for.
static Ipin_t Ipin[NHIBMAX];
static Cin_t Cin[NHIBMAX];

#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
static int warn_level = 2;

static int realloc_ipin(Ipin_t *ip, int req_n);
static void init_vars(void);
static int get_force0(int devid, double acc[][3], double jerk[][3], double phi[], int nnbindex[], double rad[]);
static int get_force_etc0(int devid, double acc[][3], double jerk[][3], double phi[], int nnbindex[], double rad[]);

static void set_xunitMC(int devid, int newxunit);

/*
 * reallocate buf of type Ipin_t.
 *   ip    : a pointer to the buf.
 *   req_n : requested size (in # of particles).
 *
 * returns the size actually allocated
 * (larger or equal to req_n).
 */
static int
realloc_ipin(Ipin_t *ip, int req_n)
{
    int i;
    int page = 4096;
    int n = (req_n / page + 1) * page;

    ip->x      = realloc(ip->x,      n * sizeof(double) * 3);
    ip->v      = realloc(ip->v,      n * sizeof(double) * 3);
    ip->eps2   = realloc(ip->eps2,   n * sizeof(double));
    ip->acc    = realloc(ip->acc,    n * sizeof(double) * 3);
    ip->jerk   = realloc(ip->jerk,   n * sizeof(double) * 3);
    ip->index  = realloc(ip->index,  n * sizeof(int));

    return n;
}

static void
init_vars(void)
{
    static int firstcall = 1;
    int i;

    if (!firstcall) return;
    firstcall = 0;

    warn_level = pg2g6_get_warn_level();
    for (i = 0; i < NHIBMAX; i++) {
        NipAllocated[i] = 0;
        memset(Ipin + i, 0, sizeof(Ipin_t));
    }
}

void
g6_open(int devid)
{
    static int firstcall = 1;
    static int setup_done[NHIBMAX];

    if (firstcall) {
        int i;
        firstcall = 0;
        for (i = 0; i < NHIBMAX; i++) {
            setup_done[i] = 0;
        }
    }

    init_vars();
    pg2g6_openMC(devid);

    if (!setup_done[devid]) {
        unsigned int binfo, productid, backendid, revision, npipe;
        pg2g6_device_info_t devinfo;
        int compatible = 0;

        setup_done[devid] = 1;
        pg2g6_device_infoMC(devid, &devinfo);
        binfo = devinfo.boardinfo;
        productid = (binfo >> 28) & 0xf;
        backendid = (binfo >> 20) & 0xf;
        revision  = (binfo >> 16) & 0xf;
        npipe     = binfo & 0xff;

        switch (productid) {
          case PRODUCT_G9C:
            WARN(2,"GRAPE-9 ");
            break;
          default:
            WARN(0, "\ndevice[%d], productid:%d, is not a GRAPE-9.\n",
                 devid, productid);
            exit(2);
        }

        WARN(2, "pipeline logic of device[%d]:", devid);
        switch (backendid) {
          case BACKEND_G6:
            WARN(2, "G6PIPE\n");
            compatible = 1;
            break;
          case BACKEND_G5:
            WARN(2, "G5PIPE\n");
            break;
          case BACKEND_EMPTY:
            WARN(0, "no pipeline logic configured in the device.\n");
        break;
          default:
            WARN(2, "unknown\n");
        }
        if (!compatible) {
            WARN(0,"device[%d] is incompatible with G6 API.\n", devid);
            exit(1);
        }
        WARN(3, "npipes:%d \n", npipe);
    }

    set_xunitMC(devid, XUNIT); // user app is not permitted to change scaling.
}

void
g6_close(int devid)
{
    pg2g6_closeMC(devid);
}

void
g6_set_tunit(int newtunit)
{
    // 2^(63-newunit) gives the max value of t.
#if 0 // !!! to be enabled when the new pipeline logic with int64 ti is ready.
    double tmax = 1 << (63 - newtunit);
    pg2g6_set_range_tiMC(devid, -tmax, tmax);
#endif
}

static void
set_xunitMC(int devid, int newxunit)
{
    // 2^(63-newunit) gives the max value of x.
    double xmax = 1 << (63 - newxunit);
    double xmin = - xmax;
    double xsize = xmax - xmin;

    // double xscale = pow(2.0, 64) / xsize;
    double xscale = 1.0; // !!!

    double vscale = xscale;
    double eps2scale = xscale * xscale;
    double mscale = 1.0;
    double ascale = xscale * xscale / mscale;
    double jscale = ascale;

    pg2g6_set_scale_tiMC(devid, 1.0);
    pg2g6_set_scale_mjMC(devid, mscale);
    pg2g6_set_range_xjMC(devid, xmin, xmax);
    pg2g6_set_scale_vjMC(devid, vscale);
    pg2g6_set_scale_tjMC(devid, 1.0);
    pg2g6_set_scale_acc0by2MC(devid, 1.0);
    pg2g6_set_scale_jerk0by6MC(devid, 1.0);

    pg2g6_set_range_xiMC(devid, xmin, xmax);
    pg2g6_set_scale_viMC(devid, vscale);
    pg2g6_set_scale_epsi2MC(devid, eps2scale);
    pg2g6_set_scale_accMC(devid, ascale);
    pg2g6_set_scale_jerkMC(devid, jscale);
}

void
g6_set_xunit(int newxunit)
{
    // nop.
}

int
g6_set_j_particle(int devid, int address,
                  int index,
                  double tj, /* particle time */
                  double dtj, /* not used */
                  double mass,
                  double a2by18[3], /* not used */
                  double a1by6[3], /* a1dot divided by 6 */
                  double aby2[3], /* a divided by 2 */
                  double v[3], /* velocity */
                  double x[3] /* position */)
{
    pg2g6_set_jpMC(devid, address, 1,
                   &mass, (double (*)[3])x, (double (*)[3])v,
                   &tj, (double (*)[3])aby2, (double (*)[3])a1by6, &index);
}

int
g6_set_j_particle_mxonly(int  devid,
                         int address,
                         int index,
                         double mass,
                         double x[3] /* position */)
{
    double tj = 0.0;
    double dtj = 1.0;
    double a2by18[3];
    double a1by6[3];
    double aby2[3];
    double v[3];
    int k;
    for (k = 0; k < 3; k++) {
        a2by18[k] = 0.0;
        a1by6[k] = 0.0;
        aby2[k] = 0.0;
        v[k] = 0.0;
    }
    return g6_set_j_particle(devid, address, index,
                             tj, dtj, mass, a2by18, a1by6, aby2, v, x);
}

void
g6_set_ti(int devid, double ti)
{
    Cin[devid].t = ti;
}

void
g6calc_firsthalf(int devid, 
                 int nj,  
                 int ni,  
                 int index[],  
                 double xi[][3],  
                 double vi[][3],  
                 double fold[][3],
                 double jold[][3],  
                 double phiold[],  
                 double eps2,   
                 double h2[])  
{
    int ii;

    for (ii= 0; ii< ni; ii++){
        g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2, h2[ii]);
    }
    g6_set_nip(devid, ni);
    g6_set_njp(devid, nj);
}

void
g6calc_firsthalf0(int devid,
                  int nj,
                  int ni,
                  int index[],
                  double xi[][3],
                  double vi[][3], 
                  double fold[][3],
                  double jold[][3],
                  double phiold[],
                  double *eps2,
                  double h2[],
                  int mode)
{
    int ii;

    if (mode == 0) {
        for (ii= 0; ii< ni; ii++){
            g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2[ii], h2[ii]);
        }
    }
    else {
        for (ii= 0; ii< ni; ii++){
            g6_set_i_particle(devid, ii, index[ii], xi[ii], vi[ii], eps2[0], h2[ii]);
        }
    }
    g6_set_nip(devid, ni);
    g6_set_njp(devid, nj);
}

int
g6calc_lasthalf(int devid,
                int nj,
                int ni,
                int index[],
                double xi[][3],
                double vi[][3],
                double eps2,
                double h2[], 
                double acc[][3],
                double jerk[][3],
                double pot[])     
{
    g6_get_force(devid, acc, jerk, pot, NULL);
    return 0;
}

int
g6calc_lasthalf0(int devid,
                 int nj,
                 int ni,
                 int index[],
                 double xi[][3],
                 double vi[][3],
                 double *eps2,
                 double h2[],
                 double acc[][3],
                 double jerk[][3],
                 double pot[],
                 int mode)
{
    g6_get_force(devid, acc, jerk, pot, NULL);
    return 0;
}

int
g6calc_lasthalf2(int devid,
                 int nj,
                 int ni,
                 int index[],
                 double xi[][3],
                 double vi[][3],
                 double eps2,
                 double h2[], 
                 double acc[][3],
                 double jerk[][3],
                 double pot[],     
                 int nnbindex[])
{
    g6_get_force_etc(devid, acc, jerk, pot, nnbindex, NULL);
    return 0;
}

int
g6_read_neighbour_list(int devid)
{
    // to be written.
}

int
g6_get_neighbour_list(int devid,
                      int ipipe,
                      int maxlength,
                      int *nblen,
                      int nbl[])
{
    // to be written.
}

void
g6_set_neighbour_list_sort_mode(int mode)
{
    // nop.
}

int
g6_get_neighbour_list_sort_mode(void)
{
    // nop.
}

int
g6_npipesMC(int devid)
{
    int npipe = pg2g6_get_number_of_pipelinesMC(devid);

    if (NFOMAX < npipe) {
        WARN(0, "g6_npipesMC(): # of pipe (=%d) exceeds NFOMAX (=%d).\n",
             npipe, NFOMAX);
        exit(1);
    }

    return npipe;
}

int
g6_npipes(void)
{
    return NFOMAX;
}

void
g6_set_nip(int devid, int nip)
{
    Nip[devid] = nip;
}

void
g6_set_njp(int devid, int njp)
{
    Ipin_t *ipin = Ipin + devid;
    pg2g6_set_nMC(devid, njp); // this must precede pg2g6_set_ipMC(). otherwise pg2g6_set_ipMC() quit immediately.
    pg2g6_set_coeffMC(devid, Cin[devid].t);
    pg2g6_set_ipMC(devid, Nip[devid],
                   ipin->x, ipin->v, ipin->eps2,
                   ipin->index);
    pg2g6_runMC(devid);
}

void
g6_set_i_particle_scales_from_real_value(int devid,
                                         int address,
                                         double acc[3],
                                         double jerk[3],
                                         double phi,
                                         double jfactor,
                                         double ffactor)
{
    // nop
}

void g6_set_i_particle(int devid, int address,
		       int index,
                       double x[3], /* position */
                       double v[3], /* velocity */
 		       double eps2,
		       double h2)
{
    int i, k;
    Ipin_t *ip = Ipin + devid;

    if (NipAllocated[devid] <= index) {
        NipAllocated[devid] = realloc_ipin(Ipin + devid, index);
    }

    i = address;
    for (k = 0; k < 3; k++) {
        ip->x[i][k] = x[k];
        ip->v[i][k] = v[k];
    }
    ip->index[i] = index;
    ip->eps2[i] = eps2;
}

int g6_get_force(int devid,
                 double acc[][3],
                 double jerk[][3],
                 double phi[],
                 int flag[])
{
    int *nnbindex = NULL;
    double *rad   = NULL;
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
    return 0;
}

int
g6_get_force_etc(int devid,
                     double acc[][3],
                     double jerk[][3],
                     double phi[],
                     int nnbindex[],
                     int flag[])
{
    double *rad = NULL;
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
    return 0;
}

/*
 * This function is defined only to implement g6_get_force_etc_all().
 * A user should not call this directly. Use g6_get_force_etc() instead.
 */
static int
get_force_etc0(int devid,
               double acc[][3],
               double jerk[][3],
               double phi[],
               int nnbindex[],
               double rad[])
{
    get_force0(devid, acc, jerk, phi, nnbindex, rad);
}

static int
get_force0(int devid,
           double acc[][3],
           double jerk[][3],
           double phi[],
           int nnbindex[],
           double rad[])
{
    int i, k;
    double ascale = pow(2.0, -FSHIFT);
    double jscale = pow(2.0, -JSHIFT);

    pg2g6_get_foutMC(devid, Nip[devid], acc, jerk);
    for (i = 0; i < Nip[devid]; i++) {
        for (k = 0; k < 3; k++) {
            acc[i][k] *= ascale;
        }
        for (k = 0; k < 3; k++) {
            jerk[i][k] *= jscale;
        }
    }
}

int g6_getnjmax(int devid)
{
    return pg2g6_get_jmemsizeMC(devid);
}

void g6_initialize_jp_buffer(int devid, int N)
{
    // nop.
}

void g6_set_overflow_flag_test_mode(int force_test_mode,int jerk_test_mode, int pot_test_mode)
{
    // nop.
}

void g6_flush_jp_buffer(int devid)
{
    // nop.
}

void g6_reinitialize(int devid)
{
    // nop.
}
