#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <g5util.h>

#ifndef USECUTOFF
#define USECUTOFF 0
#endif

#define NMAX 1024

static double
g_p3m(double re)
{
  double func, cppfrc;

  if((re>=0)&&(re<1)){
    func=re*(224.+re*re*(-224.+re*(70.+re*(48.-re*21.))))/(35.*4.0);
    cppfrc = 1.0-re*re*func;
  }else{
    if((re>=1)&&(re<2)){
      func=(12./(re*re)-224.+re*(896.+re*(-840.+re*(224.+re*(70.+re*(-48.+re*7.))))))/(35.*4.0);
      cppfrc = 1.0-re*re*func;      
    }else{
      cppfrc = 0;
    }
  }
  return cppfrc;   
}

static double
g_p3m_pot(double re)
{
  double func,cpppot;

  if((re>=0)&&(re<1)){
    func=(208.+re*re*(-112.+re*re*(56.+re*(-14.+re*(-8.+re*3.)))))/(70.*2.0);
    cpppot = 1.0-re*func;
  }else{
    if((re>=1)&&(re<2)){
      func=(12./re+128.+re*(224.+re*(-448.+re*(280.+re*(-56.+re*(-14.+re*(8.-re)))))))/(70.*2.0);
      cpppot = 1.0-re*func;      
    }else{
      cpppot = 0;
    }
  }
  return cpppot;   
}

static void
put_two_particles(double r, double rmax, double xi[3], double xj[3], double r_dr)
{
    double phi, costheta, sintheta;
    int k;

    phi = 2.0 * M_PI * drand48();
    costheta = 2.0 * drand48() - 1.0;
    sintheta = sqrt(1 - costheta * costheta);

    r = (drand48() - 0.5) * (r_dr - r) + r;

    xj[0] = r * sintheta * cos(phi);
    xj[1] = r * sintheta * sin(phi);
    xj[2] = r * costheta;

    for (k = 0; k < 3; k++) {
	double axj = fabs(xj[k]);
	xi[k] = (rmax - 2.0 * axj) * (drand48() - 0.5);
	xj[k] += xi[k];
    }

    //	fprintf(stderr, "xi: %e %e %e  xj: %e %e %e\n",
    //		xi[0], xi[1], xi[2], xj[0], xj[1], xj[2]);
}

static void
pairwise_force_grape(double *m, double (*xi)[3], double (*xj)[3], double eps, double eta,
                     double (*ag)[3], double *pg)
{
    double dummy[NMAX];
    int n = 1;

#if G6API
    double a2by18[3] = {0.0, 0.0, 0.0};
    double a1by6[3]  = {0.0, 0.0, 0.0};
    double aby2[3]   = {0.0, 0.0, 0.0};
    double vdummy[3] = {0.0, 0.0, 0.0};
    double dbuf[128];
    int    ibuf[128];
    int    indexj = 0;
    int    indexi = 1;

    g6_set_tunit(51);
    g6_set_xunit(51);
#if USECUTOFF
    g6_set_eta_all(eta);
#endif
    g6_set_j_particle_all(0, indexj, 0.0, 0.0, m[0], a2by18, a1by6, aby2, vdummy, xj);
    g6_set_ti_all(0.0);

    g6_set_i_particle_all(0, indexi, xi, vdummy, eps * eps, 0.0);
    g6_set_nip_all(1);
    g6_set_njp_all(1);
    g6_get_force_all(ag, dbuf, pg, ibuf);
    *pg *= -1;

#else // G5API

#if USECUTOFF
    g5_set_cutoff_table(NULL, 0.0, 0.0, NULL, 0.0, 0.0);
    g5_set_eta(eta);
#endif
    g5_set_jp(0, n, m, (double (*)[3])xj);
    g5_set_eps2_to_all(eps * eps);
    g5_set_n(n);
    g5_set_xi(n, (double (*)[3])xi);
    g5_run();
    g5_get_force(n, (double (*)[3])ag, pg);
#endif
}

static void
pairwise_force_host_p3m(double m, double xi[3], double xj[3], double eps, double eta, double a[3], double *p)
{
    double r, r2, r3, g, gpot;
    int k;

    r2 = eps * eps;
    for (k = 0; k < 3; k++) {
	r2 += (xi[k] - xj[k]) * (xi[k] - xj[k]);
    }
    r = sqrt(r2);
    r3 = r2 * r;
    g = g_p3m(r/eta);
    for (k = 0; k < 3; k++) {
	a[k] = g * m * (xj[k] - xi[k]) / r3;
    }
    gpot = g_p3m_pot(r/eta);
    *p = gpot * m / r;
}

static void
pairwise_force_host(double m, double xi[3], double xj[3], double eps, double a[3], double *p)
{
    double r, r2, r3, g;
    int k;

    r2 = eps * eps;
    for (k = 0; k < 3; k++) {
	r2 += (xi[k] - xj[k]) * (xi[k] - xj[k]);
    }
    r = sqrt(r2);
    r3 = r2 * r;
    for (k = 0; k < 3; k++) {
	a[k] = m * (xj[k] - xi[k]) / r3;
    }
    *p = m / r;
}

/*
 * a0: force0 (may have cutoff)
 * a1: force1 (may have cutoff)
 * a2: force without cutoff (i.e. pure gravity)
 */
static double
compare_force(double a0[3], double a1[3], double a2[3])
{
    int k;
    double e, e2 = 0.0, absa = 0.0;

    for (k = 0; k < 3; k++) {
        absa += a2[k] * a2[k];
        e2 += (a0[k] - a1[k]) * (a0[k] - a1[k]);
    }
    absa = sqrt(absa);
    e = sqrt(e2);
    return e/absa; // relative error
}

static double
compare_potential(double p0, double p1, double p2)
{
    double e2;

    e2 = (p0 - p1) * (p0 - p1);

    return sqrt(e2 / (p2 * p2)); // relative error
}

/*
 * a0: force0 (may have cutoff)
 * a1: force1 (may have cutoff)
 * a2: force without cutoff (i.e. pure gravity)
 */
static double
compare_force_ave(double a0[3], double a1[3], double a2[3])
{
    int k;
    double e, abs0 = 0.0, abs1 = 0.0, abs2 = 0.0;

    for (k = 0; k < 3; k++) {
        abs0 += a0[k] * a0[k];
        abs1 += a1[k] * a1[k];
        abs2 += a2[k] * a2[k];
    }
    abs0 = sqrt(abs0);
    abs1 = sqrt(abs1);
    abs2 = sqrt(abs2);
    e = abs0 - abs1;
    return e/abs2;
}

static double
compare_potential_ave(double p0, double p1, double p2)
{
    return (fabs(p0) - fabs(p1)) / fabs(p2);
    //    return (p0 - p1) / fabs(p2);
}

int
main(int argc, char **argv)
{
    int n, i, k, ntry;
    double da, r, dr, rmax, eps, mmin, eta;
    double e, eave, s2;
    double ep, epave, sp2;
    double xi[NMAX][3], xj[NMAX][3];
    double ag[NMAX][3], pg[NMAX];
    double ah[NMAX][3], ph[NMAX]; // may have cutoff
    double ah0[NMAX][3], ph0[NMAX]; // pure gravity
    double m[NMAX];

    srand48(1234);
    ntry = 100;
    // ntry = 100;

    dr = 1.01;

    rmax = 1000.0;
    mmin = 1.0;

    // Only the 1st element m[0] is used for calculation.
    // m[1..NMAX] are safety buffer to avoid access violation
    // by malfunctioning G5 library just in case.
    n = 1;
    m[0] = 1.0;

    eps = 0.0; // without softening
    eps = 1e-5 * rmax; // with softening

#if USECUTOFF
    eta = rmax/8.0; // 1/r^2 force with cutoff
#else
    eta = rmax*1e10; // pure gravity
#endif
#if G6API
    g6_open_all();
#else // G5API
    g5_open();
    g5_set_range(-rmax, rmax, mmin);
#endif

#if 1
    for (r = rmax * 1e-9 ; r < rmax * 10.0; r *= dr) {
        //    for (r = 45.0 ; r < rmax * 10.0; r *= dr) {
        s2 = 0.0;
	eave = 0.0;
        sp2 = 0.0;
        epave = 0.0;

        for (i = 0; i < ntry; i++) {
            put_two_particles(r, rmax, xi[0], xj[0], r * dr);

            pairwise_force_grape(m, xi, xj, eps, eta, ag, pg);
            pairwise_force_host_p3m(m[0], xi[0], xj[0], eps, eta, ah[0], ph);
            pairwise_force_host(m[0], xi[0], xj[0], eps, ah0[0], ph0);

            e = compare_force(ah[0], ag[0], ah0[0]);
            s2 += e * e;
            eave += compare_force_ave(ah[0], ag[0], ah0[0]);

            ep = compare_potential(ph[0], pg[0], ph0[0]);
            sp2 += ep * ep;
            epave += compare_potential_ave(ph[0], pg[0], ph0[0]);

        }

        s2 /= ntry;
        s2 = sqrt(s2);
	eave /= ntry;

        sp2 /= ntry;
        sp2 = sqrt(sp2);
	epave /= ntry;

        //	printf("% 15.13E    % 15.13E      %15.13E    %15.13E  %15.13E\n",
        //               r, s2, eave, ah[0][0], ag[0][0]);

	printf("% 15.13E    % 15.13E      %15.13E    %15.13E  %15.13E"
               "    % 15.13E      %15.13E    %15.13E  %15.13E\n",
               r, s2, eave, ah[0][0], ag[0][0], sp2, epave, ph[0], pg[0]);
    }
#else
    {
        double sum = 0.0, sump = 0.0;

        r = 1e+3;
        for (i = 0; i < ntry; i++) {
            put_two_particles(r, rmax, xi[0], xj[0], r * dr);

            pairwise_force_grape(m, xi, xj, eps, eta, ag, pg);
            pairwise_force_host_p3m(m[0], xi[0], xj[0], eps, eta, ah[0], ph);
            pairwise_force_host(m[0], xi[0], xj[0], eps, ah0[0], ph0);

            e = compare_force(ah[0], ag[0], ah0[0]);
            s2 += e * e;
            eave = compare_force_ave(ah[0], ag[0], ah0[0]);
            sum +=eave;

            ep = compare_potential(ph[0], pg[0], ph0[0]);
            sp2 += ep * ep;
            epave = compare_potential_ave(ph[0], pg[0], ph0[0]);
            sump += epave;

            printf("%15.13E %15.13E %15.13E %15.13E \n"
                   "%15.13E %15.13E %15.13E %15.13E \n"
                   "%15.13E %15.13E  \n\n",
                   e, eave, ep, epave,
                   sqrt(s2) / (i + 1), sum / (i + 1), sqrt(sp2) / (i + 1), sump / (i + 1),
                   ph0[0], pg[0]);

        }
    }
#endif

#if G6API
    g6_close_all();
#else // G5API
    g5_close();
#endif
}
