/*
 * GRAPE-6 global functions
 */

static int Ndevice = 0;
static int DeviceId[NHIBMAX];

void g6_open_all(void)
{
    int ic;
    pg2g6_devices_t devs;

    static int firstcall = 1;
    if (firstcall) {
        firstcall = 0;
        pg2g6_devices(&devs);
        Ndevice = devs.ndevice;
        for (ic = 0; ic < Ndevice; ic++) {
            DeviceId[ic] = devs.deviceid[ic];
        }
    }
    for (ic = 0; ic < Ndevice; ic++) {
	g6_open(DeviceId[ic]);
    }
}

void g6_close_all(void)
{
    int ic;

    for (ic = 0; ic < Ndevice; ic++) {
	g6_close(DeviceId[ic]);
    }
}

int g6_set_j_particle_all(int address, int index, double tj, double dtj, double mass,
                          double a2by18[3], double a1by6[3], double aby2[3], double v[3], double x[3])
{
    int devid, adr0;

    devid = DeviceId[address % Ndevice];
    adr0 = address / Ndevice;

    g6_set_j_particle(devid, adr0, index, tj, dtj, mass, a2by18, a1by6, aby2, v, x);

}

int g6_set_j_particle_mxonly_all(int address, int index, double mass, double x[3])
{
    int devid, adr0;

    devid = DeviceId[address % Ndevice];
    adr0 = address / Ndevice;

    g6_set_j_particle_mxonly(devid, adr0, index, mass, x);
}

void g6_set_ti_all(double ti)
{
    int ic;

    for (ic = 0; ic < Ndevice; ic++) {
	g6_set_ti(DeviceId[ic], ti);
    }
}

void g6_set_eta_all(double eta)
{
    int ic;

    for (ic = 0; ic < Ndevice; ic++) {
	g6_set_eta(DeviceId[ic], eta);
    }
}

void g6calc_firsthalf_all(int nj, int ni, int index[], double xi[][3], double vi[][3], 
                          double fold[][3], double jold[][3], double phiold[], double eps2, double h2[])
{
    int ic, ii, njj;

    for (ic = 0; ic < Ndevice; ic++) {

        // an identical set of i-particles is shared among all devices.
        for (ii= 0; ii< ni; ii++){
            g6_set_i_particle(DeviceId[ic], ii, index[ii], xi[ii], vi[ii], eps2, h2[ii]);
        }
        g6_set_nip(DeviceId[ic], ni);

        // j-particles are splited into Ndevice pieces, each containing njj particles.
        njj = nj / Ndevice;
        if (ic < nj % Ndevice) {
	  //            njj + 1;
	  njj = njj + 1;	    
        }
        g6_set_njp(DeviceId[ic], njj);
    }
}
void g6calc_firsthalf0_all(int nj, int ni, int index[], double xi[][3], double vi[][3], 
                           double fold[][3], double jold[][3], double phiold[], double *eps2, double h2[], int mode)
{
    int ic, ii, njj;

    for (ic = 0; ic < Ndevice; ic++) {

        // an identical set of i-particles is shared among all devices.
        if (mode == 0) {
            for (ii= 0; ii< ni; ii++){
                g6_set_i_particle(DeviceId[ic], ii, index[ii], xi[ii], vi[ii], eps2[ii], h2[ii]);
            }
        }
        else {
            for (ii= 0; ii< ni; ii++){
                g6_set_i_particle(DeviceId[ic], ii, index[ii], xi[ii], vi[ii], eps2[0], h2[ii]);
            }
        }
        g6_set_nip(DeviceId[ic], ni);

        // j-particles are splited into Ndevice pieces, each containing njj particles.
        njj = nj / Ndevice;
        if (ic < nj % Ndevice) {
	  //            njj + 1;
	  njj = njj + 1;	    
        }
        g6_set_njp(DeviceId[ic], njj);
    }
}

int g6calc_lasthalf_all(int nj, int ni, int index[], double xi[][3], double vi[][3], 
                        double eps2, double h2[], double acc[][3], double jerk[][3], double pot[])
{
    g6_get_force_all(acc, jerk, pot, NULL);
    return 0;
}

int g6calc_lasthalf0_all(int nj, int ni, int index[], double xi[][3], double vi[][3], 
                         double *eps2, double h2[], double acc[][3], double jerk[][3], double pot[], int mode)
{
    g6_get_force_all(acc, jerk, pot, NULL);
    return 0;
}

int g6calc_lasthalf2_all(int nj, int ni, int index[], double xi[][3], double vi[][3],
                         double eps2, double h2[],
                         double acc[][3], double jerk[][3], double pot[], int nnbindex[])
{
    g6_get_force_etc_all(acc, jerk, pot, nnbindex, NULL);
    return 0;
}

int g6_read_neighbour_list_all(void)
{
    int ic;
    int ovflw = 0;
    for (ic = 0; ic < Ndevice; ic++) {
        ovflw |= g6_read_neighbour_list(DeviceId[ic]);
    }
    return ovflw;
}

int g6_get_neighbour_list_all(int ipipe, int maxlength, int *nblen, int nbl[])
{
    int ic, i, isum;
    int ovflw = 0;
    int len[NHIBMAX], totallen = 0;
    int list[NHIBMAX][NBMEMSIZE];

    for (ic = 0; ic < Ndevice; ic++) {
        ovflw |= g6_get_neighbour_list(DeviceId[ic], ipipe, maxlength, &len[ic], list[ic]);
    }

    for (ic = 0; ic < Ndevice; ic++) {
        totallen += len[ic];
    }
    if (maxlength < totallen) {
        totallen = maxlength;
        ovflw = 1;
    }
    *nblen = totallen;

    for (ic = 0, isum = 0; ic < Ndevice; ic++) {
        for (i = 0; i < len[ic] && isum < totallen; i++, isum++) {
            nbl[isum] = list[ic][i];
        }
    }

    if (NbSortMode) {
        qsort(nbl, totallen, sizeof(int), nbindex_a_lt_b);
    }

    return ovflw;
}

void g6_set_neighbour_list_sort_mode_all(int mode)
{
    g6_set_neighbour_list_sort_mode(mode);
}

int g6_get_neighbour_list_sort_mode_all(void)
{
    return g6_get_neighbour_list_sort_mode();
}

int g6_get_nbmax_all(void)
{
    int ic, nbmax = 0;
    for (ic = 0; ic < Ndevice; ic++) {
        nbmax += g6_get_nbmax(DeviceId[ic]);
    }

    return nbmax;
}

void g6_set_nip_all(int nip)
{
    int ic;

    for (ic = 0; ic < Ndevice; ic++) {
	g6_set_nip(DeviceId[ic], nip);
    }
}

void g6_set_njp_all(int njp)
{
    int ic, njj;

    for (ic = 0; ic < Ndevice; ic++) {

        // j-particles are splited into Ndevice pieces, each containing njj particles.
        njj = njp / Ndevice;
        if (ic < njp % Ndevice) {
	  //            njj + 1;
            njj = njj + 1;	    
        }
        g6_set_njp(DeviceId[ic], njj);
    }
}

void g6_set_i_particle_scales_from_real_value_all(int address, double acc[3], double jerk[3], double phi,
                                                  double jfactor, double ffactor)
{
    // nop
}

void g6_set_i_particle_all(int address, int index, double x[3], double v[3], double eps2, double h2)
{
    int ic;

    for (ic = 0; ic < Ndevice; ic++) {
	g6_set_i_particle(DeviceId[ic], address, index, x, v, eps2, h2);
    }
}

int g6_get_force_all(double acc[][3], double jerk[][3], double phi[], int flag[])
{
    int ic, i, k;
    int nipmax = 0;
    static double atmp[NFOMAX][3];
    static double jtmp[NFOMAX][3];
    static double ptmp[NFOMAX];

    for (ic = 0; ic < Ndevice; ic++) {
        nipmax = nipmax > Nip[DeviceId[ic]] ? nipmax : Nip[DeviceId[ic]];
    }
    for (i = 0; i < nipmax; i++) {
        for (k = 0; k < 3; k++) {
            acc[i][k] = 0.0;
        }
        for (k = 0; k < 3; k++) {
            jerk[i][k] = 0.0;
        }
        phi[i] = 0.0;
    }

    // accumulate calculation results retrieved from all devices.
    for (ic = 0; ic < Ndevice; ic++) {
        g6_get_force(DeviceId[ic], atmp, jtmp, ptmp, flag);
        for (i = 0; i < Nip[DeviceId[ic]]; i++) {
            for (k = 0; k < 3; k++) {
                acc[i][k] += atmp[i][k];
            }
            for (k = 0; k < 3; k++) {
                jerk[i][k] += jtmp[i][k];
            }
            phi[i] += ptmp[i];
        }
    }
    return 0;
}

int g6_get_force_etc_all(double acc[][3], double jerk[][3], double phi[], int nnbindex[], int flag[])
{
    int           ic, i, k;
    int           nipmax = 0;
    static double atmp[NFOMAX][3];
    static double jtmp[NFOMAX][3];
    static double ptmp[NFOMAX];
    static int    ntmp[NFOMAX];    // index of the nearest neighbor.
    static double rtmp[NFOMAX];
    static double rad[NFOMAX];     // distance from the nearest neighbor.

    for (ic = 0; ic < Ndevice; ic++) {
        nipmax = nipmax > Nip[DeviceId[ic]] ? nipmax : Nip[DeviceId[ic]];
    }
    for (i = 0; i < nipmax; i++) {
        for (k = 0; k < 3; k++) {
            acc[i][k] = 0.0;
        }
        for (k = 0; k < 3; k++) {
            jerk[i][k] = 0.0;
        }
        phi[i] = 0.0;
        rad[i] = -1.0;
    }

    // accumulate calculation results retrieved from all devices.
    for (ic = 0; ic < Ndevice; ic++) {

        // this should not be replaced with g6_get_force_etc(),
        // since rtmp is used to calculate nnbindex.
        get_force_etc0(DeviceId[ic], atmp, jtmp, ptmp, ntmp, rtmp);

        for (i = 0; i < Nip[DeviceId[ic]]; i++) {
            for (k = 0; k < 3; k++) {
                acc[i][k] += atmp[i][k];
            }
            for (k = 0; k < 3; k++) {
                jerk[i][k] += jtmp[i][k];
            }
            phi[i] += ptmp[i];

            // update the nearest neighbor if necessary.
            if (rad[i] < 0.0 || rad[i] > rtmp[i]) {
                rad[i] = rtmp[i];
                nnbindex[i] = ntmp[i];
            }
        }
    }
#if 0
    fprintf(stderr, "rad: ");
    for (i = 0; i < Nip[DeviceId[ic]]; i++) {
        fprintf(stderr, "%e ", rad[i]);
    }
    fprintf(stderr, "\n");
    fprintf(stderr, "nnbindex: ");
    for (i = 0; i < Nip[DeviceId[ic]]; i++) {
        fprintf(stderr, "%d ", nnbindex[i]);
    }
    fprintf(stderr, "\n");
    fprintf(stderr, "\n");
#endif
    return 0;
}

int g6_getnjmax_all(void)
{
    int ic;
    int njmax = 0;

    for (ic = 0; ic < Ndevice; ic++) {
	njmax += g6_getnjmax(DeviceId[ic]);
    }
    return njmax;
}
