/*
 * g6util.c
 *
 *  (partly from g6chip.c by JM)
 *
 *
 Note: Guide to setting xunit, tunit, xscale etc...
       xunit, tunit: the location of the binary point
                     for the fixed point format, counted
		     from LSB. 2^(63-xunit)gives the max value
		     for position. For systems with the standard
		     unit, xunit=tunit=54 should work fine
       fscale: -fscale+xunit*2-512 is used as the argument
               for ldexp. If the force is around 2^k, the argument
	       for ldexp should be k-(48-56), which means
	       fscale = (48-56)-512+xunit*2-k ??

       jscale: similarly,
	       jscale = (48-56)-512+xunit*3-k ??
       pscale: similarly,
	       pscale = (48-56)-512+xunit-k ??
       
   assume int is 32 bit interger

*/

#include <math.h>
#include <stdio.h>

/*#define IJPDMA */

#define LONG long long int
#define ULONG unsigned long long int 
#define CONVERT_DOUBLE_TO_GRAPE_INT_POS(x,unit)  (rint((LONG)ldexp((x),(unit))))
#define CONVERT_GRAPE_INT_POS_TO_DOUBLE(x,unit) (ldexp((double)(x),-((int)(unit))))

#define PRED_V_LEN 24
#define PRED_A_LEN 20
#define PRED_A1_LEN 16
#define PRED_A2_LEN 10
#define LONGBITS 64

#define GRAPE6_NCHIPS 4

static LONG tunit;
static LONG xunit;
static LONG xunit2;
static double xscale,xscale2, xscaleinv;
static int mkmintab[5] = {0,1,6,10,16};
static int lentab[5] = {LONGBITS, PRED_V_LEN, PRED_A_LEN,
PRED_A1_LEN, PRED_A2_LEN};
static unsigned int ni;

static int fscale[3][48];
static int jscale[3][48];
static int phiscale[3][48];
static int global_rscale = 0x0073c800; /* temporary */

void g6_set_tunit(int newtunit)
{
  tunit = newtunit;
}

void g6_set_tunit_(int * newtunit)
{
  g6_set_tunit((*newtunit));
}

void g6_set_xunit(int newxunit)
{
  xunit = newxunit;
  xunit2 = 2*xunit;
  if(xunit>63){
    double pow();
    xscale = pow(2.0,(double)xunit);
  }else{
    xscale = ((ULONG)1)<<((int)xunit);
  }
  xscaleinv = 1.0/xscale;
  xscale2 = xscale * xscale;
  /*  printf("xscale %g %g %g\n",xscale,xscaleinv,xscale2);*/
}

void g6_set_xunit_(int * newxunit)
{
  g6_set_xunit((*newxunit));
}

static int nchip_on_module=4;
/*static int devid=0;*/

void g6_close_(int * clusterid)
{
  g6_close((*clusterid));
}

void g6_open(int clusterid)
{
  int i,tunit,xunit,itmp,ii;
  unsigned int ipdata[200];
  unsigned int jpdata[200];
  int nword,ichip,j;
  int devid;
  devid = clusterid;

#if 0
  tunit = 51;                         /* 2^51 */
  g6_set_tunit_(&tunit);
  xunit = 50;                         /* 2^50 */
  g6_set_xunit_(&xunit);
#endif
  
  /* 0x25da3eaa is temporary */

  itmp = 0x25da3eaa;
  for(ii=0;ii<3;ii++){
    phiscale[ii][0] = 0x3ff & (itmp>>20);
    fscale[ii][0] = 0x3ff & (itmp>>10);
    jscale[ii][0] = 0x3ff & itmp;

    if((0x1&(phiscale[ii][0]>>9))==1) phiscale[ii][0] = 0xfffffc00 | phiscale[ii][0];
    if((0x1&(fscale[ii][0]>>9))==1) fscale[ii][0] = 0xfffffc00 | fscale[ii][0];
    if((0x1&(jscale[ii][0]>>9))==1) jscale[ii][0] = 0xfffffc00 | jscale[ii][0];

    for(i=1;i<48;i++){
      phiscale[ii][i] = phiscale[ii][0];
      fscale[ii][i] = fscale[ii][0];
      jscale[ii][i] = jscale[ii][0];
    }
  }
/*printf("fscale %d phiscale %d jscale %d\n",fscale[0],phiscale[0],jscale[0]);*/

  lock_grape(0);
  g6_init(devid);

  /*  g6_send_fpga_data(devid,"/usr2/fukushig/g6tb/pld_g6mod2_thru/fo_unit_thru2.ttf");*/

  /* g6_send_fpga_data(devid,"/usr2/fukushig/g6tb/pld_g6mod2/fo_unit_with_fifo_fc2.ttf"); */

                    /* JPRG */
  ipdata[0] = 0x812;
  ipdata[1] = 0x4;
  ipdata[2] = 0x1;                   /* ADLY */
  ipdata[3] = 0x1;                   /* WDLY */
  ipdata[4] = 0x2;                   /* ODLY */
  ipdata[5] = 0x1;                   /* DDLY */
  g6_set_ipdata(devid,ipdata);
  
  if(nchip_on_module==1){
    ipdata[0] = 0x811;
    ipdata[1] = 0x1;
    ipdata[2] = 0x0;               /* VCID (0x0 --> 0) */
    g6_set_ipdata(devid,ipdata);
  }

  if(nchip_on_module==2){
    ipdata[0] = 0x811;
    ipdata[1] = 0x1;
    ipdata[2] = 0x48800;               /* VCID (0x122 --> 0) */
    g6_set_ipdata(devid,ipdata);

    ipdata[2] = 0x48c01;               /* VCID (0x123 --> 1) */
    g6_set_ipdata(devid,ipdata);
  }

  if(nchip_on_module==4){
    ipdata[0] = 0x811;
    ipdata[1] = 0x1;
    ipdata[2] = 0x00000;               /* VCID (0x000 --> 0) */
    g6_set_ipdata(devid,ipdata);

    ipdata[2] = 0x00401;               /* VCID (0x001 --> 1) */
    g6_set_ipdata(devid,ipdata);

    ipdata[2] = 0x00802;               /* VCID (0x002 --> 2) */
    g6_set_ipdata(devid,ipdata);

    ipdata[2] = 0x00c03;               /* VCID (0x003 --> 3) */
    g6_set_ipdata(devid,ipdata);
  }

  /* memory initialization has no effect  */
  /* because internal register is cleared to O */
  /* before sending data to memory */

  ipdata[0] = 0x800;                  /* JPRG */
  ipdata[1] = 0x11;
  /*  for(i=0;i<0x10;i++) ipdata[i+2] = i; /* address conversion table */
  ipdata[2] = 1;
  ipdata[3] = 3;
  ipdata[4] = 5;
  ipdata[5] = 14;

  ipdata[6] = 15;
  ipdata[7] = 0;
  ipdata[8] = 2;
  ipdata[9] = 4;
  ipdata[10] = 8;
  ipdata[11] = 9;
  ipdata[12] = 10;
  ipdata[13] = 11;
  ipdata[14] = 12;
  ipdata[15] = 6;
  ipdata[16] = 7;
  ipdata[17] = 13;    
  /*ipdata[18] = 0x10;                  /* ND */
  /*ipdata[18] = 0x8;                  /* ND */
  ipdata[18] = 0x4;                  /* ND */    
  g6_set_ipdata(devid,ipdata);
 

  ipdata[0] = 0x400;                  /* IPRG */
  ipdata[1] = 0xd;
  ipdata[2] = 1;
  ipdata[3] = 3;
  ipdata[4] = 5;
  ipdata[5] = 11;
  ipdata[6] = 0;
  ipdata[7] = 0;  
  ipdata[8] = 0;
  ipdata[9] = 0;
  ipdata[10] = 0;
  ipdata[11] = 0;
  ipdata[12] = 0;
  ipdata[13] = 0;
  ipdata[14] = 0;
  g6_set_ipdata(devid,ipdata);
  
  ipdata[0] = 0x410;                  /* IPRG */
  ipdata[1] = 0x1;
  ipdata[2] = 0x4;                    /* ND */  
  g6_set_ipdata(devid,ipdata);

  ipdata[1] = 0x4;  
  ipdata[2] = 0;
  ipdata[3] = 0;
  ipdata[4] = 0;
  ipdata[5] = 1; /* index (jpindex is always 0 )*/
  for(ii=0;ii<48;ii++){
    ipdata[0] = ii<<4;
    g6_set_ipdata(devid,ipdata);
  }    

  ipdata[0] = 0x400;                  /* IPRG */
  ipdata[1] = 0xd;
  /*  for(i=0;i<0xd;i++) ipdata[i+2] = i; /* address conversion table */
  ipdata[2] = 0;
  ipdata[3] = 2;
  ipdata[4] = 4;
  ipdata[5] = 9;
  ipdata[6] = 12;

  ipdata[7] = 11;  
  ipdata[8] = 10;
  ipdata[9] = 1;
  ipdata[10] = 3;
  ipdata[11] = 5;
  ipdata[12] = 6;
  ipdata[13] = 7;
  ipdata[14] = 8;
  g6_set_ipdata(devid,ipdata);
  
  ipdata[0] = 0x410;                  /* IPRG */
  ipdata[1] = 0x2;
  /*  ipdata[2] = 0xd;                    /* ND */
/*  ipdata[2] = 0xa;                    /* ND */
  ipdata[2] = 0x5;                    /* ND */  
  ipdata[3] = 0x0;                    /* testmode */
  g6_set_ipdata(devid,ipdata);
  
  ipdata[0] = 0x1000;                 /* FORG */
  ipdata[1] = 0x10;

  if((nchip_on_module==2)||(nchip_on_module==4)){
    ipdata[2] = 1;                    /* address conversion table */ 
    ipdata[3] = 0;
    ipdata[4] = 3;
    ipdata[5] = 2;
    ipdata[6] = 5;
    ipdata[7] = 4;
    ipdata[8] = 7;
    ipdata[9] = 6;
    /*    for(i=0x8;i<0xf;i++) ipdata[i+2] = i; */
    ipdata[10] = 8;
    ipdata[11] = 9;
    ipdata[12] = 10;
    ipdata[13] = 11;
    ipdata[14] = 12;
    ipdata[15] = 13;
    ipdata[16] = 14;
  }
  if(nchip_on_module==1){
    for(i=0x0;i<0xf;i++) ipdata[i+2] = i; 
  }
  g6_set_ipdata(devid,ipdata);
  
  ipdata[0] = 0x1013;                 /* FORG */
  ipdata[1] = 0x2;                    
  ipdata[2] = 0xe;                    /* ND */
  ipdata[3] = 0x0;                    /* INACTIVE */
  g6_set_ipdata(devid,ipdata);

  ipdata[0] = 0x1400;                 /* CARG */
  ipdata[1] = 0x2;                    
  ipdata[2] = 0x2;                    /* LRAM */
  ipdata[3] = 0x32;                   /* LFORCE */
  g6_set_ipdata(devid,ipdata);

  ipdata[0] = 0xc00;                  /* set cutoff table (no cutoff) */
  ipdata[1] = 0x80;
  for(i=0;i<0x80;i++) ipdata[i+2] = 0x2000001;
  g6_set_ipdata(devid,ipdata);

  ipdata[0] = 0xc80;                  /* set rscale for cutoff table */
  ipdata[1] = 0x1;
  ipdata[2] = global_rscale;
  g6_set_ipdata(devid,ipdata);

}

void g6_open_(int * clusterid)
{
  (void)g6_open((*clusterid));
}

void g6_set_ti(int clusterid, double ti)
{
  unsigned int ipdata[10];
  ULONG iti;
  int devid;
  devid = clusterid;
  
  iti = (ULONG) ldexp(ti, (int)tunit);
  ipdata[0] = 0x1800; 
  ipdata[1] = 0x2;
  ipdata[2] = (iti>>32);
  ipdata[3] = iti ;
  g6_set_ipdata(devid,ipdata);
}

void g6_set_ti_(int *clusterid, double *ti)
{
  g6_set_ti((*clusterid), (*ti));
}

LONG convert_predictor_time(ULONG *  tjlsb, /* LSB of tj */
			    ULONG * dtjmsb, /* location of MSB of delta ti */
			    LONG * dtexp, /* exponent of dt in physical unit */
			    double rtj, /* particle time */
			    double rdtj, /* particle timestep */
			    ULONG tunit /* time resolution=2**-tinit*/)
{
  
  /* first, we calculate time values */
  double dtmin, xmin, dtfrac;
  int idtexp, dtminexp;
  int e0btmp, ek0, e0b, expv;
  ULONG t_int;
  int i,k;
  
  dtmin = 1.0/(((ULONG)1)<<tunit);
  dtfrac = frexp(rdtj, &idtexp);
  *dtexp = 1-idtexp;
  if(dtfrac != 0.5){
    fprintf(stderr, "convert_predictor: dtfrac  = %le != 0.5!\n");
    return -1;
  }
  frexp(dtmin, &dtminexp);
  *dtjmsb = idtexp - dtminexp;
  t_int = rtj/rdtj;
  if (t_int * rdtj != rtj){
    fprintf(stderr, "tj not multiple of dtj %le %le %le\n",
	    rtj, rdtj, rtj/rdtj);
    return -1;
  }
  *tjlsb = (ULONG) t_int & (ULONG) 1;
  return 0; 
}


/* unions for type conversion */
typedef union datapack{
    unsigned int i32[2];
    ULONG        i64;
    double       d;
}DATAPACK;

typedef union{
    unsigned int i32;
    float f;
}SHORTPACK;


/* This is for x86 (high word in larger address, 32-bit int machine */ 
int convert_predictor_using_e0b2x86i32(ULONG *ix, /* converted position*/
				       unsigned int ix32[4],
				       double x[5], /* position, vel ... */
				       int e0b,
				       int dtexp)

{
    DATAPACK *xp;
    int  ek0,  expv;
    int i,k;
    int msbloc, sign;
    double scaled_pk;
    ULONG l;
    unsigned int li;
    LONG ixsigned;
    DATAPACK l1;
    /* convert x and set exponent of v */

    ixsigned = x[0]*xscale;
    *ix = (ULONG) ixsigned;

    expv = e0b+xunit;
#ifdef INTERNAL_OUT
    fprintf(stderr, "ia32 x[0], xscale, result = %g %g %Lx %x\n", x[0], xscale, *ix,expv);
#endif
    xp = (DATAPACK*) x ;
    if(expv > 0){
      /*
      for(i=1;i<5;i++){
	    li = xp[i].i32[1];
	    l1.i64 = xp[i].i64;
	    ek0 = (li>>20)&0x7ff;
	    sign = li>>31;
	    msbloc = PRED_V_LEN +ek0-e0b-dtexp*i;
	    if (ek0 != 0) {
		l1.i32[1] =  (((unsigned int) msbloc)<<20) | (0xfffff & l1.i32[1]);
	    }else{
		l1.i32[1] = 0;
	    }
	    ix32[i-1] = ((unsigned int)(l1.d+0.5))| (((unsigned int)sign) <<lentab[i]);
#ifdef INTERNAL_OUT
    fprintf(stderr, "ix32[%d] = %x\n",i-1, ix32[i-1]);
#endif
	}
      */
        ix32[0] |= (((unsigned int)expv) << (lentab[1]+1));
    }else{
	ix32[0] = ix32[1] = ix32[2]=ix32[3] = 0;
    }
#ifdef INTERNAL_OUT
    fprintf(stderr, "final ix[1] = %Lx\n", ix[1]);
#endif
    return 0;
}


/* This is for x86 (high word in larger address, 32-bit int machine */ 
ULONG convert_predictor_using_e0b2x86(ULONG ix[5], /* converted position, vel... */
				  double x[5], /* position, vel ... */
				  LONG e0b,
				  LONG dtexp)

{
    ULONG iret;
    unsigned int ix32[4];

    iret = convert_predictor_using_e0b2x86i32(ix, ix32, x,(int) e0b, (int) dtexp);

    ix[1]=ix32[0];
    ix[2]=ix32[1];
    ix[3]=ix32[2];
    ix[4]=ix32[3];
    return iret;
}

ULONG determine_predictor_exponent2(LONG * e0b,
				    LONG dtexp,
				    double x[5], /* position, vel ... */
				    ULONG xunit /* position resolution= 2**-xunit*/)
{

  /* since we use common exponent for a2-v, they should be
     calculated in the same way, except for the difference in the
     power of dt.
     For velocity, we have the exponent same as the original velocity,
     since velocity is already normalized and dt does not exceed one.
     In order to prevent possible overflow, mantissa of v should be
     downshifted (to use only 23 bits).
  */

  register int e0btmp, ek0,  e0bmax;
  register ULONG * ix = (ULONG *) x;
  register ULONG l;
  int i;

  e0bmax = -1000000000;
  /*  for(i=1;i<5;i++){*/
  for(i=1;i<1;i++){    
    l = ix[i];
    /*      if (l & 0x7ff0000000000000L ){*/
    /* Here, I skip the test for zero, since zero would anyway treated as
       very small exponent value, which is okay */
    ek0 =  (l>>52)&0x7ff;
    e0btmp = ek0 - dtexp*i + mkmintab[i];
    if ((e0bmax < e0btmp)) e0bmax = e0btmp;
    /*      }*/
  }
  *e0b = e0bmax-0x3fe;
  #ifdef INTERNAL_OUT
  fprintf(stderr,"(determine) e0b = %lx\n", *e0b);
  #endif
  return 0;
}

ULONG convert_predictor_vector(ULONG *  tjlsb, /* LSB of tj */
			ULONG * dtjmsb, /* location of MSB of delta ti */
			ULONG ix[3][5], /* converted position, vel... */
			double rtj, /* particle time */
			double rdtj, /* particle timestep */
			double x[3], /* position, vel ... */
			double v[3], /* position, vel ... */
			double aby2[3], /* position, vel ... */
			double a1by6[3], /* position, vel ... */
			double a2by18[3], /* position, vel ... */
			ULONG xunit, /* position resolution= 2**-xunit*/
			ULONG tunit /* time resolution=2**-tinit*/)
{
  
  /* since we use common exponent for a2-v, they should be
     calculated in the same way, except for the difference in the
     power of dt.
     
     For velocity, we have the exponent same as the original velocity,
     since velocity is already normalized and dt does not exceed one.
     In order to prevent possible overflow, mantissa of v should be
     downshifted (to use only 23 bits).
     */
  
  /* first, we calculate time values */
  double dtmin, xmin, dtfrac;
  LONG dtexp,e0b;
  double xdata[3][5];
  LONG e0btmp;
  int i,k;
  ULONG err;
  /*
  err = convert_predictor_time(tjlsb,dtjmsb, &dtexp,  rtj, rdtj,tunit);
  */
  /*  e0b = -10000000000;  (old one 2003/2/2)*/
  e0b = -100000000;

 for(k=0;k<3;k++){
    xdata[k][0] = x[k];
/*  xdata[k][1] = v[k];
    xdata[k][2] = aby2[k];
    xdata[k][3] = a1by6[k];
    xdata[k][4] = a2by18[k];
    */
    err |= determine_predictor_exponent2(&e0btmp, dtexp,  xdata[k], xunit);
    if (e0btmp > e0b)e0b = e0btmp;
  }
  for(k=0;k<3;k++){
    err |= convert_predictor_using_e0b2x86(ix[k],  xdata[k],  e0b,  dtexp);    
  }

  return err;
}



int g6_set_j_particle(int clusterid, int address,
				 int index,
				 double tj, /* particle time */
				 double dtj, /* particle time */
				 double mass,
				 double a2by18[3], /* a2dot divided by 18 */
				 double a1by6[3], /* a1dot divided by 6 */
				 double aby2[3], /* a divided by 2 */
				 double v[3], /* velocity */
				 double x[3] /* position */)
{
  static unsigned int jpdata[20],nword,ichip,mem_adr;
  float fmass;
  ULONG tjlsb,dtjmsb,ix[3][5],retcode;
  int devid;
  devid = clusterid;

  retcode = convert_predictor_vector(&tjlsb, &dtjmsb, ix,
				     tj, dtj, x, v, aby2, a1by6, a2by18,
				     xunit, tunit);

  fmass = mass;
/*  nword = 18;*/
/*  nword = 10;*/
  nword = 6;

  ichip = (address)%(nchip_on_module) ;
  mem_adr = (address)/(nchip_on_module) ;
/*printf("address %d ichip %d mem %d\n",(address),ichip,mem_adr);*/

  /*  printf("jp %llx %llx %llx\n",ix[0][0],ix[1][0],ix[2][0]);*/

  jpdata[0] = 0xffc00 | ichip;
  jpdata[1] = mem_adr<<3;

  jpdata[2] = (ix[0][0]>>32); 
  jpdata[3] = (ix[1][0]>>32); 
  jpdata[4] = (ix[2][0]>>32); 
  jpdata[5] = *((unsigned int *)(&fmass)) ;
  /*  jpdata[6] = index;*/
  /*
  jpdata[2] = ix[0][0];
  jpdata[3] = (ix[0][0]>>32); 
  jpdata[4] = ix[1][0]; 
  jpdata[5] = (ix[1][0]>>32); 
  jpdata[6] = ix[2][0]; 
  jpdata[7] = (ix[2][0]>>32); 
  jpdata[8] = *((unsigned int *)(&fmass)) ;
  jpdata[9] = index;
  */
  
  /*  jpdata[8] = ix[0][1];
  jpdata[9] = ix[1][1];
  jpdata[10] = ix[2][1];
  jpdata[11] = (ix[1][2]<<21) | ix[0][2];
  jpdata[12] = (ix[0][3]<<31) | (ix[2][2]<<10) | (ix[1][2]>>11);
  jpdata[13] = (ix[1][3]<<16) | (ix[0][3]>>1);
  jpdata[14] = (ix[1][4]<<29) | (ix[0][4]<<18) | (ix[2][3]<<1) | (ix[1][3]>>16);
  */
  /*  jpdata[15] = (tjlsb<<25) | (dtjmsb<<19) | (ix[2][4]<<8) | (ix[1][4]>>3);*/
/*printf("jpdata15 %x tjlsb %x dtjmsb %x\n",jpdata[15],tjlsb,dtjmsb);*/

  g6_set_jpdata(devid,nword,jpdata);

  return retcode;
}

int g6_set_j_particle_(int *clusterid, int *address,
				 int *index,
				 double *tj, /* particle time */
				 double *dtj, /* particle time */
				 double *mass,
				 double a2by18[3], /* a2dot divided by 18 */
				 double a1by6[3], /* a1dot divided by 6 */
				 double aby2[3], /* a divided by 2 */
				 double v[3], /* velocity */
				 double x[3] /* position */)
{
   g6_set_j_particle((*clusterid),(*address),(*index),(*tj),(*dtj),(*mass),
		    a2by18, a1by6, aby2, v, x);
}

void g6_set_nip(int clusterid, int nip)
{
  int devid;
  unsigned int ipdata[10];

  devid = clusterid;

  ipdata[0] = 0x1012;                 
  ipdata[1] = 0x1;                    
  ipdata[2] = nip;
  g6_set_ipdata(devid,ipdata);
  ni = nip; 
}

void g6_set_nip_(int *clusterid, int * nip)
{
  g6_set_nip((*clusterid),(*nip));
}

void g6_set_i_particle_scales_from_real_value(int clusterid, 
	                           int address,
				   double acc[3],
                                   double jerk[3],
                                   double phi,
				   double jfactor,
				   double ffactor)
{
  int k,i;
  double amax = fabs(acc[0]);
  double jmax = fabs(jerk[0])*(jfactor) + fabs(acc[0])*(ffactor);
  int devid;

  devid = clusterid;
  i = address;
  for(k=1;k<3;k++){
    double jtmp = fabs(jerk[k])*(jfactor)+ fabs(acc[k])*(ffactor);
    if(fabs(acc[k])>amax)amax = fabs(acc[k]);
    if(jtmp>jmax)jmax = jtmp;  
  }

  frexp(amax, &k);
  /*  fscale[i] = 52 - k - 513 + 2*xunit;*/
  fscale[devid][i] = - k - 461 + xunit2;  
  frexp(jmax, &k);
  /*  jscale[i] = 22 - k - 513 + 2*xunit + tunit;*/
  jscale[devid][i] = - k - 491 + xunit2 + tunit;
  frexp(phi, &k);
  /*  phiscale[i] = 52 - k - 513 + xunit; */
  phiscale[devid][i] = - k - 461 + xunit;

/*  if(i==0)printf("i %d fs %x %d ps %x %d js %x %d \n",
	 i,0x3ff&fscale[i],fscale[i],0x3ff&phiscale[i],phiscale[i],0x3ff&jscale[i],jscale[i]);
*/

}

void g6_set_i_particle_scales_from_real_value_(int *clusterid, 
	                           int *address,
				   double acc[3],
                                   double jerk[3],
                                   double *phi,
				   double *jfactor,
				   double *ffactor)
{
  g6_set_i_particle_scales_from_real_value((*clusterid),(*address),
		    acc, jerk, (*phi), (*jfactor), (*ffactor));

}

#ifndef IJPDMA

void g6_set_i_particle(int clusterid, int address,
		       int index,
			double x[3], /* position */
			double v[3], /* velocity */
 		       double eps2,
		       double h2)
{
  static unsigned int ipdata[20];
  LONG  ixsigned;
  ULONG ix[3];
  int k,ii;
  float fv[3],feps2,fh2; 
  int devid;
  devid = clusterid;

  for(k=0;k<3;k++){
    ixsigned = x[k]*xscale;
    ix[k] = (ULONG) ixsigned;
    /*    fv[k] = (float)ldexp(v[k],(int)(xunit-tunit));*/
  } 
  feps2 = (float)(eps2)*xscale2;
  /*  fh2 = (float)(h2)*xscale2;*/

  ii = address;
  ipdata[0] = ii<<4;
  /*  ipdata[1] = 0xd;*/
  ipdata[1] = 0x5;  

  ipdata[2] = ix[0]>>32;
  ipdata[3] = ix[1]>>32;
  ipdata[4] = ix[2]>>32;

  /*  ipdata[8] = *((unsigned int*)(&fv[0]));
  ipdata[9] = *((unsigned int*)(&fv[1]));
  ipdata[10] = *((unsigned int*)(&fv[2]));
  */
  
  ipdata[5] = *((unsigned int*)(&feps2));

  /*  ipdata[6] = *((unsigned int*)(&fh2));*/
  /*  ipdata[6] = index;*/

  /*  ipdata[7] = ((0x3ff&phiscale[devid][ii])<<20) | ((0x3ff&fscale[devid][ii])<<10) | (0x3ff&jscale[devid][ii]);*/
  ipdata[6] = ((0x3ff&phiscale[devid][ii])<<20) | ((0x3ff&fscale[devid][ii])<<10) ;

  g6_set_ipdata(devid,ipdata);
}
#endif

#ifdef IJPDMA

static int ibuf=0;
static unsigned int bipdata[1024]; 

void g6_set_i_particle(int clusterid, int address,
		       int index,
			double x[3], /* position */
			double v[3], /* velocity */
 		       double eps2,
		       double h2)
{
  static unsigned int ipdata[20];
  LONG  ixsigned;
  ULONG ix[3];
  int k,ii,j,jj;
  float fv[3],feps2,fh2; 
  int devid;
  devid = clusterid;

  for(k=0;k<3;k++){
    /*    ix[k] = CONVERT_DOUBLE_TO_GRAPE_INT_POS(x[k],xunit);*/
    ixsigned = x[k]*xscale;
    ix[k] = (ULONG) ixsigned;
    /*    fv[k] = (float)ldexp(v[k],(int)(xunit-tunit))*/
  } 
  feps2 = (float)(eps2)*xscale2;
  /*  fh2 = (float)(h2)*xscale2;*/

  ii = address;

  if(ibuf==0){
    bipdata[0] = ii<<4;
    bipdata[1] = 5;
  }else{
    bipdata[1] += 5;
  }
  bipdata[ibuf+2] = ix[0]>>32;
  /*  bipdata[ibuf+3] = ix[0];*/
  bipdata[ibuf+3] = ix[1]>>32;
  /*  bipdata[ibuf+5] = ix[1];*/
  bipdata[ibuf+4] = ix[2]>>32;
  /*  bipdata[ibuf+7] = ix[2];*/
  /*  bipdata[ibuf+8] = *((unsigned int*)(&fv[0]));
  bipdata[ibuf+9] = *((unsigned int*)(&fv[1]));
  bipdata[ibuf+10] = *((unsigned int*)(&fv[2]));
  */
  bipdata[ibuf+5] = *((unsigned int*)(&feps2));
  /*  bipdata[ibuf+6] = *((unsigned int*)(&fh2));*/
  /*  bipdata[ibuf+6] = index;*/
  bipdata[ibuf+6] = ((0x3ff&phiscale[devid][ii])<<20) | ((0x3ff&fscale[devid][ii])<<10) | (0x3ff&jscale[devid][ii]);

  if(ibuf>500){
    g6_set_ipdata_dma(devid,bipdata);
    ibuf = 0;
  }else{
    ibuf += 5;
  }  
}

void ip_flush(int clusterid)
{
  g6_set_ipdata(clusterid,bipdata);
}

#endif

void g6_set_i_particle_(int *clusterid, int *address,
		       int *index,
			double x[3], /* position */
			double v[3], /* velocity */
 		       double * eps2,
		       double * h2)
{
  g6_set_i_particle((*clusterid),(*address),(*index), x, v, (*eps2),(*h2));
}

void g6_set_njp(int clusterid, int njp)
{

  unsigned int ipdata[10],nj,nmod,i;
  int devid;
  devid = clusterid;
  
#ifdef IJPDMA 
  if(ibuf!=0){
    g6_set_ipdata_dma(devid,bipdata);
    ibuf = 0;
  }
#endif

  nj = ((njp)+nchip_on_module-1)/(nchip_on_module);
  nmod = (njp)%(nchip_on_module);  

  if(nmod != 0){
    for(i=0;i<(nchip_on_module-nmod);i++){
      int idum=0,address;
      double ddum0=1.0,ddum1=1.0;
      double ddum2[3]={0,0,0};
      double mass;
      address = nj * (nchip_on_module) - i - 1;
      mass = 0.0;
      g6_set_j_particle_(&devid, &address, &idum, &ddum0, &ddum1, &mass, 
			 ddum2,ddum2, ddum2, ddum2, ddum2);
    }
  }
  
  ipdata[0] = 0x1402;                 
  ipdata[1] = 0x1;                    
  ipdata[2] = nj;
  /*printf("njp %d nj %d nmod %d\n",njp,nj,nmod);*/
  g6_set_ipdata(devid,ipdata);
}

void g6_set_njp_(int *clusterid, int * njp)
{
  g6_set_njp((*clusterid),(*njp));
}

void g6_get_force(int clusterid, double acc[][3],
	           double jerk[][3],
	           double phi[],
	           int flag[])
{
   static unsigned int fodata[1024];
   int i,adr,k;
   static LONG ia[3],iphi;
   signed int ij[3];
   int devid;
   double aunit;
   
   devid = clusterid;
  

   g6_get_fodata(devid,ni*14,fodata);

   /*   for(i=0;i<(ni*14);i++) printf("fodata[%x] %x\n",i,fodata[i]);*/
    
   /*   printf("fodata %x %x %x %x %x %x\n",fodata[0],fodata[1],fodata[2],fodata[3],fodata[4],fodata[5]);*/

  
   for(i=0;i<ni;i++){
    adr = i*14;

    if(nchip_on_module==4){
      ia[0] = fodata[adr+1];
      ia[0] = (ia[0]<<32) | fodata[adr]; 
      ia[1] = fodata[adr+3];
      ia[1] = (ia[1]<<32) | fodata[adr+2]; 
      ia[2] = fodata[adr+5];
      ia[2] = (ia[2]<<32) | fodata[adr+4]; 
      iphi = fodata[adr+7];
      iphi = (iphi<<32) | fodata[adr+6]; 
    }

    aunit = fscale[devid][i]-xunit*2+512;
    for(k=0;k<3;k++){
      acc[i][k] = CONVERT_GRAPE_INT_POS_TO_DOUBLE(ia[k],aunit);
    }
    phi[i] = (CONVERT_GRAPE_INT_POS_TO_DOUBLE(iphi,phiscale[devid][i]-xunit+512))*(-1);

    flag[i] = fodata[adr+13]; 
    /*printf("i %d a %g j %g pot %g\n",i,acc[i][0],jerk[i][0],phi[i]);*/
    
   }

}

void g6_get_force_(int *clusterid, double acc[][3],
	           double jerk[][3],
	           double phi[],
	           int flag[])
{
  g6_get_force((*clusterid), acc, jerk, phi, flag);
}

/*********************/

int g5_get_number_of_pipelines()
{
  return g6_get_number_of_pipelines();
}

static int clusterid=0;
static int minimage_flag=0;

void g5_set_xmj(int adr, int nj, double (*xj)[3], double *mj)
{
  int i,k;
  double tj,dtj,a2by18[3],a1by6[3],aby2[3],v[3];
    
  tj = 0.0;
  dtj = 1.0;
  for(k=0;k<3;k++) a2by18[k] = a1by6[k] = aby2[k] = v[k] = 0.0;
  for(i=adr;i<(adr+nj);i++){
    double tmpxj[3];  
    for(k=0;k<3;k++) tmpxj[k] = xj[i][k];
    if(minimage_flag==1){
      for(k=0;k<3;k++){
	if(xj[i][k]>0.5) tmpxj[k] = xj[i][k] - 1.0;
      }
    }                 
    g6_set_j_particle(clusterid, i, i+48, tj, dtj, mj[i], a2by18, a1by6, aby2, v, tmpxj);
  }
}

void
g5_set_jp(int adr, int nj, double *m, double (*x)[3])
{
    g5_set_xmj(adr, nj, x, m);
}

static double eps2;

void
g5_set_eps_to_all(double eps)
{
  eps2 = eps*eps; 
}

void
g5_set_eps2_to_all(double e2)
{
  eps2 = e2; 
}

void
g5_set_xi(int ni, double (*xi)[3])
{
  int ii,k;
  double v[3];
  double h2;
  h2 = 0.0;
  for(k=0;k<3;k++) v[k] = 0.0;

  for(ii=0;ii<ni;ii++){
    double tmpxi[3];
    for(k=0;k<3;k++) tmpxi[k] = xi[ii][k];
    if(minimage_flag==1){
      for(k=0;k<3;k++){
	if(xi[ii][k]>0.5) tmpxi[k] = xi[ii][k] - 1.0;
      }
    }                 
    g6_set_i_particle(clusterid, ii, ii, tmpxi, v, eps2, h2);
  }

  g6_set_nip(clusterid, ni);
}

void
g5_set_xiMC(int devid, int ni, double (*xi)[3])
{
  int ii,k;
  double v[3];
  double h2;
  h2 = 0.0;
  for(k=0;k<3;k++) v[k] = 0.0;

  for(ii=0;ii<ni;ii++){
    double tmpxi[3];
    for(k=0;k<3;k++) tmpxi[k] = xi[ii][k];
    if(minimage_flag==1){
      for(k=0;k<3;k++){
	if(xi[ii][k]>0.5) tmpxi[k] = xi[ii][k] - 1.0;
      }
    }                 
    g6_set_i_particle(devid, ii, ii, tmpxi, v, eps2, h2);
  }

  g6_set_nip(devid, ni);
}

static int nj;

void
g5_run(void)
{
  g6_set_njp(clusterid, nj);
}

void
g5_runMC(int devid)
{
  g6_set_njp(devid, nj);
}

void
g5_set_n(int n)
{
  nj = n;
}
 
void
g5_get_force(int ni, double (*a)[3], double *pot)
{
  int ii;
  double jerk[48][3];
  int flag[48];
    
  g6_get_force(clusterid, a,jerk,pot,flag);

  for(ii=0;ii<ni;ii++){
    //     pot[ii] = -pot[ii];  /* 20090715 */
    /*     if((0xf00c00db&flag[ii])!=0) printf("i %d flag %x\n",ii,flag[ii]);*/
     if((0x600c00db&flag[ii])!=0) printf("i %d flag %x\n",ii,flag[ii]);      
   }
}

void
g5_get_forceMC(int devid, int ni, double (*a)[3], double *pot)
{
  int ii;
  double jerk[48][3];
  int flag[48];
    
  g6_get_force(devid, a,jerk,pot,flag);

   for(ii=0;ii<ni;ii++){
     //     pot[ii] = -pot[ii];  /* 20090715 */
    /*     if((0xf00c00db&flag[ii])!=0) printf("i %d flag %x\n",ii,flag[ii]);*/
     if((0x600c00db&flag[ii])!=0) printf("i %d flag %x\n",ii,flag[ii]);      
   }
}

void
g5_open(void)
{
  double timetmp;
  g6_open(clusterid);

  timetmp = 0.0;
  g6_set_ti(clusterid, timetmp);

}
     
void
g5_close(void)
{
  g6_close(clusterid);
}


void
g5_set_range(double xmin, double xmax, double mmin)
{
  /**** should be modified !!!! *****/

  int tunit,xunit,i;
  tunit = 51;                         /* 2^51 */
  g6_set_tunit(tunit);

  if((xmin==0.0)&&(xmax==1.0)){
    double acc[3],phi,jfactor=6,ffactor=0;
    acc[0] = acc[1] = acc[2] = 3.0e-3;
    
    xunit = 64;                         /* 2^50 */
    minimage_flag = 1;
    for(i=0;i<48;i++){
      g6_set_i_particle_scales_from_real_value(0,i,acc,acc,phi,jfactor,ffactor);
    }
  }else{
    xunit = 50;                         /* 2^50 */
    minimage_flag = 0;
  }
  g6_set_xunit(xunit);
}

int
g5_get_number_of_boards(void)
{
  return 1;
}

int
g5_get_firstcluster(void)
{
  return 0;
}

int g5_get_number_of_pipelines_per_board(void)
{
  return g6_get_number_of_pipelines();
}

#define INTERACTION_POSITION_EXP_OFFSET 512
#define ULONG_ONE ((ULONG) 1)
#define EXP_MASK ((ULONG)0x3ff)

ULONG compose_float(ULONG mantissa_bits, ULONG exponent,
                     ULONG  sign, ULONG  zero, ULONG  mantissa)
{
  ULONG data;
  exponent &= (ULONG) EXP_MASK;
  sign &=  (ULONG)ULONG_ONE;
  zero &=  (ULONG)ULONG_ONE;
  mantissa &= (ULONG_ONE<<mantissa_bits)-1;
  data =  (exponent<<(mantissa_bits + 2))
    | (sign<<(mantissa_bits + 1))
    | (zero<<mantissa_bits)
    | mantissa;

 return data;

}

ULONG convert_double_to_grape_float(double x, ULONG float_bits)
{
  ULONG sign, exponent, mantissa, zero;
  int iexp;
  sign = zero = 0;
  if (x < 0.0){
    sign = 1;
    x = -x;
  }else if(x == 0.0){
    zero = 1;
  }
  x = frexp(x, &iexp);
  mantissa = rint(ldexp(x, (int)float_bits));
  if (mantissa >>float_bits){
    iexp ++;
    mantissa >>=1;
  }
  exponent = iexp + INTERACTION_POSITION_EXP_OFFSET;
  if (zero == 0){
      return compose_float(float_bits, exponent, sign, zero, mantissa);
  }else{
      return 0x609800000L;
  }
}

void g5_set_eta(double eta)
{
  unsigned int ipdata[10],devid=0,rscale;  

  /* now, only for P3M table below*/
  
  rscale = convert_double_to_grape_float(xscaleinv/(4.0*eta),12); 
  ipdata[0] = 0xc80;  
  ipdata[1] = 0x1;
  ipdata[2] = rscale;
  g6_set_ipdata(devid,ipdata);
  
}

void g5_set_cutoff_table(double (*ffunc)(double), double fcut, double fcor,
                double (*pfunc)(double), double pcut, double pcor)
{

  /* now, only for P3M */
     
  unsigned int ipdata[200],devid;
  devid = 0;
  
ipdata[0] = 0xc00;  
ipdata[1] = 0x80;
ipdata[2] = 0x020002f9;
ipdata[3] = 0x01d082ef;
ipdata[4] = 0x01a1a2db;
ipdata[5] = 0x0173e2c1;
ipdata[6] = 0x0147e29d;
ipdata[7] = 0x011e0275;
ipdata[8] = 0x00f6c247;
ipdata[9] = 0x00d28215;
ipdata[10] = 0x00b141df;
ipdata[11] = 0x009341ab;
ipdata[12] = 0x0078c175;
ipdata[13] = 0x00618141;
ipdata[14] = 0x004d6111;
ipdata[15] = 0x003c80e3;
ipdata[16] = 0x002e40bb;
ipdata[17] = 0x0022c095;
ipdata[18] = 0x00198075;
ipdata[19] = 0x0012405b;
ipdata[20] = 0x000ca043;
ipdata[21] = 0x00088031;
ipdata[22] = 0x00058023;
ipdata[23] = 0x00036017;
ipdata[24] = 0x0002000f;
ipdata[25] = 0x00012009;
ipdata[26] = 0x0000a005;
ipdata[27] = 0x00004003;
ipdata[28] = 0x00002003;
ipdata[29] = 0x00000001;
ipdata[30] = 0x00000001;
ipdata[31] = 0x00000001;
ipdata[32] = 0x00000001;
ipdata[33] = 0x00000001;
ipdata[34] = 0x00000001;
ipdata[35] = 0x00000001;
ipdata[36] = 0x00000001;
ipdata[37] = 0x00000001;
ipdata[38] = 0x00000001;
ipdata[39] = 0x00000001;
ipdata[40] = 0x00000001;
ipdata[41] = 0x00000001;
ipdata[42] = 0x00000001;
ipdata[43] = 0x00000001;
ipdata[44] = 0x00000001;
ipdata[45] = 0x00000001;
ipdata[46] = 0x00000001;
ipdata[47] = 0x00000001;
ipdata[48] = 0x00000001;
ipdata[49] = 0x00000001;
ipdata[50] = 0x00000001;
ipdata[51] = 0x00000001;
ipdata[52] = 0x00000001;
ipdata[53] = 0x00000001;
ipdata[54] = 0x00000001;
ipdata[55] = 0x00000001;
ipdata[56] = 0x00000001;
ipdata[57] = 0x00000001;
ipdata[58] = 0x00000001;
ipdata[59] = 0x00000001;
ipdata[60] = 0x00000001;
ipdata[61] = 0x00000001;
ipdata[62] = 0x00000001;
ipdata[63] = 0x00000001;
ipdata[64] = 0x00000001;
ipdata[65] = 0x00000001;
ipdata[66] = 0x02000005;
ipdata[67] = 0x01ffe017;
ipdata[68] = 0x01fea03b;
ipdata[69] = 0x01fb006f;
ipdata[70] = 0x01f440ad;
ipdata[71] = 0x01e980f3;
ipdata[72] = 0x01da413d;
ipdata[73] = 0x01c6a183;
ipdata[74] = 0x01ae61c5;
ipdata[75] = 0x019201fd;
ipdata[76] = 0x01724227;
ipdata[77] = 0x014fc243;
ipdata[78] = 0x012b824f;
ipdata[79] = 0x0106824b;
ipdata[80] = 0x00e1e237;
ipdata[81] = 0x00be6215;
ipdata[82] = 0x009d21e9;
ipdata[83] = 0x007e81b5;
ipdata[84] = 0x0063417b;
ipdata[85] = 0x004ba13f;
ipdata[86] = 0x0037c105;
ipdata[87] = 0x0027a0cd;
ipdata[88] = 0x001ae09b;
ipdata[89] = 0x0011406f;
ipdata[90] = 0x000a604b;
ipdata[91] = 0x0005c02f;
ipdata[92] = 0x0002e01b;
ipdata[93] = 0x0001400f;
ipdata[94] = 0x00006007;
ipdata[95] = 0x00002003;
ipdata[96] = 0x00000001;
ipdata[97] = 0x00000001;
ipdata[98] = 0x00000001;
ipdata[99] = 0x00000001;
ipdata[100] = 0x00000001;
ipdata[101] = 0x00000001;
ipdata[102] = 0x00000001;
ipdata[103] = 0x00000001;
ipdata[104] = 0x00000001;
ipdata[105] = 0x00000001;
ipdata[106] = 0x00000001;
ipdata[107] = 0x00000001;
ipdata[108] = 0x00000001;
ipdata[109] = 0x00000001;
ipdata[110] = 0x00000001;
ipdata[111] = 0x00000001;
ipdata[112] = 0x00000001;
ipdata[113] = 0x00000001;
ipdata[114] = 0x00000001;
ipdata[115] = 0x00000001;
ipdata[116] = 0x00000001;
ipdata[117] = 0x00000001;
ipdata[118] = 0x00000001;
ipdata[119] = 0x00000001;
ipdata[120] = 0x00000001;
ipdata[121] = 0x00000001;
ipdata[122] = 0x00000001;
ipdata[123] = 0x00000001;
ipdata[124] = 0x00000001;
ipdata[125] = 0x00000001;
ipdata[126] = 0x00000001;
ipdata[127] = 0x00000001;
ipdata[128] = 0x00000001;
ipdata[129] = 0x00000001;
g6_set_ipdata(devid,ipdata);
}

void
g5_calculate_force_on_x(double (*x)[3], double (*a)[3], double *p, int ni)
{
  int off, nii;
  int ic, np;

  np = g5_get_number_of_pipelines();

  for (off = 0; off < ni; off += np)
    {
      nii = np;
      if (off+nii > ni)
	{
	  nii = ni - off;
	}

      g5_set_xi(nii, (double (*)[3])x[off]);

      g5_run();

      g5_get_force(nii, (double (*)[3])a[off], &p[off]);

    }
}
