#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "hibutil.h"

#if G6API
#include "g6util.h"
#else
#include "g5util.h"
#endif


#if G6API
static void force_grape6(double (*x)[3], double *m, double eps2,
                         double (*a)[3], double *pot, int n);
#else
static void force_grape5(double (*x)[3], double *m, double eps2,
                         double (*a)[3], double *pot, int n);
#endif

void
force(double (*x)[3], double *m, double eps2,
      double (*a)[3], double *pot, int n)
{
  double r, r2;
  int i, j, k;

  for (i = 0; i < n; i++) {
    for (k = 0; k < 3; k++) {
      a[i][k] = 0.0;
    }
    pot[i] = 0.0;

    for (j = 0; j < n; j++) {
      if(i!=j){
      r2 = eps2;
      for (k = 0; k < 3; k++) {
	r2 += (x[i][k] - x[j][k])*(x[i][k] - x[j][k]);
      }
      r = sqrt(r2);
      for (k = 0; k < 3; k++) {
	a[i][k] += m[i]*(x[j][k]-x[i][k])/r2/r;
      }      
      pot[i] -= m[i]/r;
      }
    }
  }
}

void
force_single(double (*x)[3], double *m, double eps2,
      double (*a)[3], double *pot, int n)
{
  float r, r2;
  int i, j, k;

  for (i = 0; i < n; i++) {
    for (k = 0; k < 3; k++) {
      a[i][k] = 0.0;
    }
    pot[i] = 0.0;

    for (j = 0; j < n; j++) {
      if(i!=j){
      r2 = eps2;
      for (k = 0; k < 3; k++) {
	r2 += (x[i][k] - x[j][k])*(x[i][k] - x[j][k]);
      }
      r = sqrt(r2);
      for (k = 0; k < 3; k++) {
          a[i][k] += (float)m[i]* (float)(x[j][k]-x[i][k]) / r2 / r;
      }      
      pot[i] -= (float)m[i] / r;
      }
    }
  }
}


void
hold_grape(void)
{
#if G6API
    g6_open_all();
#else
    g5_open();
#endif
}

void
free_grape(void)
{
#if G6API
    g6_close_all();
#else
    g5_close();
#endif
}

void
scale_grape(double xscale, double mscale)
{
#if G6API
    // nop
#else
    if (xscale < 0.0) {
	xscale *= -1.0;
    }
    g5_set_range(-xscale, xscale, mscale);
#endif
}

void
force_grape(double (*x)[3], double *m, double eps2,
	    double (*a)[3], double *pot, int n)
{
#if G6API
    force_grape6(x, m, eps2, a, pot, n);
#else
    force_grape5(x, m, eps2, a, pot, n);
#endif
}

#if G6API

static void
force_grape6(double (*x)[3], double *m, double eps2,
             double (*a)[3], double *pot, int n)
{
    int i,j,d,k;
    double a2by18[3];
    double a1by6[3];
    double aby2[3];
    double v[3];
    double tj,dtj;
    int nn,ii,index;
    double h2,epsinv,h;
    double time;

    static int npipe = 0;
    static int *index2;
    static double (*xi)[3],(*vi)[3],(*jerk)[3],*h2i;
    static double (*foldi)[3],(*j6oldi)[3],*phioldi;

    if (npipe == 0) {
        npipe = g6_npipes();
        index2 = (int *)malloc(sizeof(int) * npipe);
        xi = (double (*)[3])malloc(sizeof(double) * npipe * 3);
        vi = (double (*)[3])malloc(sizeof(double) * npipe * 3);
        jerk = (double (*)[3])malloc(sizeof(double) * npipe * 3);
        h2i = (double *)malloc(sizeof(double) * npipe);
        foldi = (double (*)[3])malloc(sizeof(double) * npipe * 3);
        j6oldi = (double (*)[3])malloc(sizeof(double) * npipe * 3);
        phioldi = (double *)malloc(sizeof(double) * npipe);
    }
	
    tj = 0.0;
    dtj = 1.0;
    for(d=0;d<3;d++){
        a2by18[d] = 0.0;
        a1by6[d] = 0.0;
        aby2[d] = 0.0;
        v[d] = 0.0;
    }
    time = 0.0;
    h = 0.4;
    h2 = h*h;
	
    for(i=0;i<n;i++){
        g6_set_j_particle_mxonly_all(i, i, m[i],x[i]);	  
    }

    g6_set_ti_all(time);

    for(i=0;i<n;i+=npipe){
        int nnbindex[48];
        nn = npipe;
        if(n-i<npipe) nn= n - i; 

        for(ii=0;ii<nn;ii++){
	    index2[ii] = i+ii;
	    for(k=0;k<3;k++){
                xi[ii][k] = x[i+ii][k];
                vi[ii][k] = 0.0;
                foldi[ii][k] = a[i+ii][k];
                j6oldi[ii][k] = 0.0;
   	    }
	    h2i[ii] = h2;
	    phioldi[ii] = pot[i+ii];
        }
        g6calc_firsthalf_all(n,nn,index2,xi,vi,foldi,j6oldi,phioldi,eps2,h2i);
	  
        g6calc_lasthalf_all(n,nn,index2,xi,vi,eps2,h2i,a+i,jerk,pot+i);

#if NBCHECK
        {
	    static int maxlength=1020,nblen,nbl[1024],ret,ret2,inb,nblh[1024],nnbh,nflag;
	    int jtest;
	    double r2;
            int is_ovflwn = 0;
            int had_err = 0;
	    
	    nflag =1;
	    g6_set_neighbour_list_sort_mode_all(nflag);
	    
	    ret=g6_read_neighbour_list_all();  
	    if(ret==1){
                static int didwarned = 0;
                if (!didwarned) {
                    printf("NB list overflown\n");
                }
                didwarned = 1;
                is_ovflwn = 1;
	    }else{
                for(ii=0;ii<nn;ii++){
                    ret2=g6_get_neighbour_list_all(ii,maxlength,&nblen,nbl);  
                    if(ret2==1){
                        printf("NB list overflown or larger than maxlength\n");  
                        is_ovflwn = 1;
                    }else{
                        nnbh = 0;
                        for(j=0;j<n;j++){
                            r2 = (x[j][0]-x[i+ii][0])*(x[j][0]-x[i+ii][0])
                                + (x[j][1]-x[i+ii][1])*(x[j][1]-x[i+ii][1])
                                + (x[j][2]-x[i+ii][2])*(x[j][2]-x[i+ii][2])+eps2;
                            if((r2<h2)&&((i+ii)!=j)){
                                nblh[nnbh] = j;
                                nnbh++;
                            }
                        }
                        if(nblen!=nnbh){
                            printf("g6 ii %d nblen %2d:",i+ii,nblen);
                            for(inb=0;inb<nblen;inb++) printf("%d ",nbl[inb]);
                            printf("\n");

                            printf("ho ii %d nblen %2d:",i+ii,nnbh);
                            for(inb=0;inb<nnbh;inb++) printf("%d ",nblh[inb]);
                            printf("\n");
                            had_err = 1;
                        }else{
                            for(inb=0;inb<nblen;inb++){
                                if(nbl[inb]!=nblh[inb]){
                                    printf("diff i %d g6 %d host %d\n",i+ii,nbl[inb],nblh[inb]);
                                    had_err = 1;
                                }
                            }
                        }  
                    }
                }
            }
            if (is_ovflwn) {
                static int nvisit = 0;
                static int didwarned = 0;
                if (nvisit < 10) {
                    fprintf(stderr, "some neighbour lists overflown (non fatal).\n");
                }
                else if (!didwarned) {
                    fprintf(stderr, "some neighbour lists overflown many times. "
                            "this is not fatal and thus do not warn any more for readability.\n");
                    didwarned = 1;
                }
                nvisit++;
            }
            if (had_err) {
                fprintf(stderr, "wrong neighbour list found. abort.\n");
                exit(1);
            }
            //            fprintf(stderr, "NB check OK.\n");
        }
#endif // NBCHECK
    }
	
}

#else // G5API

static void
force_grape5(double (*x)[3], double *m, double eps2,
	    double (*a)[3], double *pot, int n)
{
  int i, j, nn, njmax,k;
  int nii;

#if 0
    fprintf(stderr, "x[0]: %5.3f %5.3f %5.3f\n",
	    x[0][0], x[0][1], x[0][2]);
    fprintf(stderr, "x[1]: %5.3f %5.3f %5.3f\n",
	    x[1][0], x[1][1], x[1][2]);
    fprintf(stderr, "m[0]: %5.3f\n", m[0]);
    fprintf(stderr, "m[1]: %5.3f\n", m[1]);
    fprintf(stderr, "eps2: %5.3f\n", eps2);
    fprintf(stderr, "n: %d\n", n);
#endif

    /* send i particle softening */
    g5_set_eps2_to_all(eps2);

    /* send j particle */
    g5_set_jp(0, n, m, x);

    /* set N */
    g5_set_n(n);

    /* send i, run, and then get force */
    nii = g5_get_number_of_pipelines();

    for (i = 0; i < n; i += nii) {
	if ((i + nii) > n) {
	    nn = n - i;
	}
	else {
	    nn = nii;
	}
	g5_set_xi(nn, (double (*)[3])x[i]);

	g5_run();
        g5_get_force(nn, (double (*)[3])a[i], pot+i);
    }

    if(eps2!=0.0){
      double epsinv;
      epsinv = 1.0/sqrt(eps2);
      for(i=0;i<n;i++) pot[i] += m[i] * epsinv;
    }

}

#endif // G5API
