#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "g5util.h"
#include "direct.h"

enum {
    NWALKMAX = 128,
};

void
calc_gravity_multiwalk(double *mj[NWALKMAX], double (*xj[NWALKMAX])[3], double (*vj[NWALKMAX])[3],
                       double eps, double (*a[NWALKMAX])[3], double *p[NWALKMAX], int n[NWALKMAX], int nwalk)
{
    int off, k, w;
    int ni;
    int npipe = g5_get_number_of_pipelines();
    int jmemsize = g5_get_jmemsize();

    for (w = 0; w < nwalk; w++) {
        g5_set_jp(0, n[w], mj[w], xj[w]);
        g5_set_eps2_to_all(eps*eps);
        g5_set_n(n[w]);
        for (off = 0; off < n[w]; off += npipe) {
            if (off + npipe > n[w]) {
                ni = n[w] - off;
            }
            else {
                ni = npipe;
            }
            g5_set_xi(ni, (double (*)[3])xj[w] + off);
            g5_run();
            g5_get_force(ni, (double (*)[3])(a[w] + off), p[w] + off);
        }
    }
    g5_flush_runs();

    for (w = 0; w < nwalk; w++) {
        if (eps != 0.0) {
            int i;
            double epsinv;
            epsinv = 1.0 / eps;
            for (i = 0; i < n[w]; i++) {
                p[w][i] += mj[w][i] * epsinv;
            }
        }
    }
}

int
main(int argc, char **argv)
{
    static int firstcall = 1;
    double *mj[NWALKMAX], (*xj[NWALKMAX])[3], (*vj[NWALKMAX])[3];
    double (*a[NWALKMAX])[3], *p[NWALKMAX];
    double e[NWALKMAX], e0[NWALKMAX], ke[NWALKMAX], pe[NWALKMAX];
    double cm[NWALKMAX][3], cm0[NWALKMAX][3];
    double time, dt, endt;;
    double eps;
    int nwalk = 1;
    int n[NWALKMAX];
    long int ninteraction = 0;
    int i, j, k, w;
    int nstep, step;
    int interval;
    double dinterval;
    double lt = 0.0, st = 0.0;

    if (argc < 2) {
        fprintf(stderr, "usage: %s <infile0> [<infile1>...] \n", argv[0]);
        exit(2);
    }

    if (firstcall) {
        firstcall = 0;
        for (w = 0; w < NWALKMAX; w++) {
            mj[w] = NULL;
            xj[w] = NULL;
            vj[w] = NULL;
            a[w] = NULL;
            p[w] = NULL;
        }
    }
  
    srand48(1234);

    endt = 10;
    eps = 0.02;
    dt = 0.01;
    time = 0.0;
    nstep = endt/dt;

    nwalk = argc - 1;
    fprintf(stderr, "nwalk:%d\n", nwalk);
    for (w = 0; w < nwalk; w++) {
        readnbodies(&n[w], &mj[w], &xj[w], &vj[w], &a[w], &p[w], argv[w + 1]);
        fprintf(stderr, "walk:%d  n:%d  outfile:%s.out  endtime:%f\n", w, n[w], argv[w + 1], endt);
        ninteraction += (long int)n[w] * (long int)n[w];
    }
    fprintf(stderr, "ninteraction:%d\n", ninteraction);

    dinterval = 500.0 * (10000.0/n[0]) * (10000.0/n[0]);
    interval = dinterval;
    if (dinterval * 10.0 > nstep) {
	interval = nstep / 10;
    }
    interval = interval < 1 ? 1 : interval;
    fprintf(stderr, "interval: %d\n", interval);

    g5_open();
    get_cputime(&lt,&st);
    calc_gravity_multiwalk(mj, xj, vj, eps, a, p, n, nwalk);

    for (w = 0; w < nwalk; w++) {
        energy(mj[w], vj[w], p[w], n[w], &ke[w], &pe[w]);
        e0[w] = ke[w] + pe[w];
        printf("ke[%d]: %f\n", w, ke[w]);
        fflush(stdout);
    }

    for (step = 1; step < nstep; step++) {
        for (w = 0; w < nwalk; w++) {
            push_velocity(vj[w], a[w], 0.5 * dt, n[w]);
            push_position(xj[w], vj[w], a[w], dt, n[w]);
        }
        time = time + dt;
        calc_gravity_multiwalk(mj, xj, vj, eps, a, p, n, nwalk);

        for (w = 0; w < nwalk; w++) {
            push_velocity(vj[w], a[w], 0.5 * dt, n[w]);
        }
        if (interval > 10 && step % (interval / 10) == 0) {
            fprintf(stderr, ".");
        }
        if (step % interval == 0) {
            get_cputime(&lt,&st);
            printf("step: %d time: %e\n", step, time);
            for (w = 0; w < nwalk; w++) {
                printf("walk:%d\n", w);
                center_of_mass(mj[w], xj[w], n[w], cm[w]);
                energy(mj[w], vj[w], p[w], n[w], &ke[w], &pe[w]);
                e[w] = ke[w] + pe[w];
                printf("    e: % 15.13E   de: % 15.13E\n", e[w], e[w] - e0[w]);
                printf("   ke: % 15.13E   pe: % 15.13E\n", ke[w], pe[w]);
                printf("ke/pe: % 15.13E\n", ke[w] / pe[w]);
                printf("cm : %22.15e %22.15e %22.15e\n", cm[w][0], cm[w][1], cm[w][2]);
                printf("dcm: %22.15e %22.15e %22.15e\n",
                       cm[w][0] - cm0[w][0], cm[w][1] - cm0[w][1], cm[w][2] - cm0[w][2]);
                printf("\n");
                for (k = 0; k < 3; k++) {
                    cm0[w][k] = cm[w][k];
                }
            }
            printf("CPU time: %e\n", lt);
            double gintrps = (double)ninteraction * (double)interval / lt /1e9;
            printf("speed: %e Ginteraction/s (%6.2f Gflops)\n\n",
                   gintrps, 38.0 * gintrps);
            fflush(stdout);
	    get_cputime(&lt, &st);
        }
    }
    g5_close();

    for (w = 0; w < nwalk; w++) {
        char buf[128];
        sprintf(buf, "%s.out", argv[w + 1]);
        if (n[w] < 2048) {
            // dump all particles
            writenbody(n[w], mj[w], xj[w], vj[w], buf);
        }
        else {
            // too many particles for full dump.
            // use check sum instead.
            writefingerprint(n[w], mj[w], xj[w], vj[w], buf);
        }
    }
}
