/* code for nbody : sticky9.c 
   (with GRAPE, leap-flog scheme)
*/
#include <stdio.h>
#include <stdlib.h>
#include "./sticky.h"

#define COMPARE_WITH_HOST (0) /* set 1 to compare the results on G5 and the host */

static int device_id=0;

void compare_force(REAL (*a0)[3], REAL (*a1)[3], int n)
{
    int i, k, nerr;
    double da2, a2, relerr;

    nerr = 0;
    for (i = 0; i < n; i++) {
	da2 = 0.0;
	a2 = 0.0;
	for (k = 0; k < 3; k++) {
	    da2 += (a0[i][k] - a1[i][k]) * (a0[i][k] - a1[i][k]);
	    a2 += a0[i][k] * a0[i][k];
	}
	relerr = sqrt(da2/a2);
	if (relerr > 1e-2) {
	  //	if (relerr > 4e-8) {	  
#if 1
	    fprintf(stderr, "%6d g %8.5f %8.5f %8.5f\n", i, a0[i][0], a0[i][1], a0[i][2]);
	    fprintf(stderr, "       h %8.5f %8.5f %8.5f\n", a1[i][0], a1[i][1], a1[i][2]);
	    fprintf(stderr, "relative error: %22.15e\n", relerr);
#endif
	    nerr++;
	}
    }
    fprintf(stderr, "compare_force: # of error(s) larger than 1%%: %d\n", nerr);
}

void read_nemoformat(fp,n,dim,t,m,x,v)
     FILE *fp;
     int *n;
     int *dim;
     REAL *t;
     REAL m[NMAX];
     REAL x[NMAX][DIM];
     REAL v[NMAX][DIM];
{
    unsigned long int index,i;

    fscanf(fp,"%d%d%lf",n,dim,t);
    index = *n;
    for(i=0;i<index;i++) fscanf(fp,"%lf",&m[i]);
    for(i=0;i<index;i++)fscanf(fp,"%lf%lf%lf",&x[i][0],&x[i][1],&x[i][2]);
    for(i=0;i<index;i++)fscanf(fp,"%lf%lf%lf",&v[i][0],&v[i][1],&v[i][2]);
}	


void data_input(x,v,m,n,filename,time)
     REAL x[NMAX][DIM];
     REAL v[NMAX][DIM];
     REAL m[NMAX];
     int *n;
     char filename[];
     REAL *time;
{
    int dum;
    FILE *fopen();
    FILE *fpinput;

    fpinput = fopen(filename,"r");
    if (!fpinput) {
      perror("data_input");
      exit(1);
    }

    read_nemoformat(fpinput,n,&dum,time,m,x,v);
    fclose(fpinput);

    printf("initialdata end\n");
}

void energy(t,pot,x,v,m,n,init_ene,icm)
     REAL t;
     REAL pot[NMAX];
     REAL x[NMAX][DIM];
     REAL v[NMAX][DIM];
     REAL m[NMAX];
     int n;
     REAL init_ene;
     REAL icm[DIM];
{
    REAL total_pot=0,total_kin=0;
    int i,k;	
    REAL cm[DIM];

    for(i=0;i<n;i++) total_pot += m[i]*pot[i];
    total_pot *= 0.5;

    for(i=0;i<n;i++) total_kin += m[i]*(v[i][0]*v[i][0]+v[i][1]*v[i][1]+v[i][2]*v[i][2]);
    total_kin *= 0.5;

    for(k=0;k<DIM;k++) cm[k] = 0.0;
    for(i=0;i<n;i++){
	for(k=0;k<DIM;k++) cm[k] += x[i][k] * m[i];
    }

    printf("time = %g\n",t);
    printf("pot = %22.15e kin = %22.15e \n total= %22.15e ratio = %e\n",
	   total_pot,total_kin,total_pot+total_kin,total_kin/total_pot);
    printf(" DE = %e %g\n",(init_ene-(total_pot+total_kin))/(total_pot+total_kin),t);
    printf(" (%d) DCM= %22.15e %22.15e %22.15e \n",device_id,icm[0]-cm[0],icm[1]-cm[1],icm[2]-cm[2]);
}

void initial_energy(pot,x,v,m,n,init_ene,cm)
     REAL pot[NMAX];
     REAL v[NMAX][DIM];
     REAL x[NMAX][DIM];
     REAL m[NMAX];
     int n;
     REAL *init_ene;
     REAL cm[DIM];
{
    REAL total_pot;
    REAL total_kin;
    int i,k;	
	
    total_pot = 0.0; 
    total_kin = 0.0; 

    for(i=0;i<n;i++)  total_pot += m[i]*pot[i];
    total_pot *= 0.5;

    for(i=0;i<n;i++) total_kin += m[i]*(v[i][0]*v[i][0]+v[i][1]*v[i][1]+v[i][2]*v[i][2]);
    total_kin *= 0.5;

    printf("pot = %22.15e kin = %22.15e \n total= %22.15e ratio = %e\n",
	   total_pot,total_kin,total_pot+total_kin, total_kin/total_pot);
    *init_ene = total_pot+ total_kin;

    for(k=0;k<DIM;k++) cm[k] = 0.0;
    for(i=0;i<n;i++){
	for(k=0;k<DIM;k++) cm[k] += x[i][k] * m[i];
    }
}

void push_velocity(v,a,dt,n)
     REAL v[NMAX][DIM];
     REAL a[NMAX][DIM];
     REAL dt;
     int n;
{
    int j,k;

    for(j=0;j<n;j++){
	for(k=0;k<DIM;k++) v[j][k] += dt*a[j][k];
    }
}

void push_position(x,v,dt,n)
     REAL x[NMAX][DIM];
     REAL v[NMAX][DIM];
     REAL dt;
     int n;
{
    int j,k;	

    for(j=0;j<n;j++){
	for(k=0;k<DIM;k++) x[j][k] += dt*v[j][k];
    }
}

main(int argc, char **argv)
{
    static REAL x[NMAX][DIM];
    static REAL v[NMAX][DIM];
    static REAL m[NMAX];
    static REAL a[NMAX][DIM];
    static REAL pot[NMAX];
    static REAL ah[NMAX][DIM];
    static REAL poth[NMAX];
    REAL dt,eps,init_ene,time=0.0;
    REAL deouttime,eouttime,idtinv,endtime,epsinv;
    REAL icm[DIM],ratio;
    FILE *fopen(),*fp2;
    int n,i,k,j;
    char filename[200];
    double lt=0.0, st=0.0;
    double hlt=0.0, hst=0.0;
    double holdtime;
    double xsize=2.0,rotint=2.0,sustained=0.0;
    int simid;
    double gflops, gintps;
    double peak,dum;

    if(argc==2){
      fp2 = fopen(argv[1],"r");
    }else{
      fp2 = fopen("inputpara9","r");
    }
    if (!fp2) {
      perror("main");
      exit(1);
    }

    fscanf(fp2,"%lf%lf%lf%lf",&epsinv,&idtinv,&endtime,&deouttime);
    fscanf(fp2,"%s",filename);
#ifdef ANIMATION
    //    fscanf(fp2,"%lf%lf%d",&xsize,&rotint,&simid);
    //    set_anim_title(simid);
#endif 
    fclose(fp2);


    /******		(input	para)
			1.0/eps: softening parameter (9999=> eps=0)
			1.0/idt: initial timestep
			endtime: end time
			deoutime: interval of energy output
			filename: input file name
    ********/	

    dt = 1.0/idtinv;

    if(epsinv==9999.0){
	eps = 0.0;
    }else{
	eps = 1.0/epsinv;
    }

    printf("eps:%g dt:%g \ndeouttime:%g endtime:%g \n",
	   eps,dt,deouttime,endtime);
    printf("inputfile %s \n",filename);

    data_input(x,v,m,&n,filename,&time);

    printf("m %g x %g %g %g\n",m[0],x[0][0],x[0][1],x[0][2]);

    eouttime= deouttime + time;

    hold_grape();

    get_cputime(&hlt,&hst);
    holdtime = hlt;

    for(i=0;i<n;i++){
	for(k=0;k<DIM;k++) a[i][k] = 1.0;
	pot[i] = -1.0; 
    }

    scale_grape(1024.0, 1.0/n);

    force_grape(x,m,eps*eps,a,pot,n);
    initial_energy(pot,x,v,m,n,&init_ene,icm);
    fflush(stdout);

#if COMPARE_WITH_HOST
    force(x,m,eps*eps,ah,poth,n);
    compare_force(a, ah, n);
#endif

    get_cputime(&lt,&st);
    while(time < endtime){
	static int step = 0;
	push_velocity(v,a,0.5*dt,n);
	push_position(x,v,dt,n);

	time += dt;
	force_grape(x,m,eps*eps,a,pot,n);

#ifdef ANIMATION
	plot_star(x, n, time, 0.3, m, m[0]);
#endif

#if COMPARE_WITH_HOST
	force(x,m,eps*eps,ah,poth,n);
	compare_force(a, ah, n);
#endif
	push_velocity(v,a,0.5*dt,n);

       if( time >= eouttime) {
	    energy(time,pot,x,v,m,n,init_ene,icm);
	    eouttime += deouttime;
	    get_cputime(&lt,&st);
            printf("  cputime %g ",lt);
	    sustained = 38.0*((double)n)*((double)n)
		*(deouttime/dt)/(lt)/1e9;
            gintps = ((double)n)*((double)n)*(deouttime/dt)/(lt)/1e9;
	    gflops = 38.0 * gintps;
            printf("speed %g Gflops  %g Gint/s\n", gflops, gintps);
	    get_cputime(&lt,&st);
	    fflush(stdout);
	}
    }
    free_grape();	
}
