#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#include<malloc.h>
#define REAL double
/*#define NMAX 1100000*/
#define DIM 3
#include "jlist.h"

static int nleaf=10;
static int ng=2000;
static double theta=0.75;
static int logout_initflag=1;

void nbody_force_gravity_init(i_theta, i_ncrit, i_nleaf)
double i_theta;
int i_ncrit;
int i_nleaf;
{
  theta = i_theta;
  ng = i_ncrit;
  nleaf = i_nleaf;
}

void force_on_ith_particle(i,xi,x,m,eps,ai,poti,n)
        int i;
        REAL xi[3];
        REAL x[][3];
        REAL m[];
        REAL eps;
        REAL ai[3];
        REAL *poti;
        int n;
{
        int j,d,k;
        double r2,r3inv,r2inv,rinv,eps2;
        double r3invdx;
        double dx[3];

  for(k=0;k<3;k++){
          ai[k] = 0.0;
  }
        *poti = 0.0;
        eps2 = eps*eps;

  for(j=0;j<n;j++){
    if(j!=i){
            r2 = eps2;
      for(d=0;d<3;d++){
              dx[d] = x[j][d] - xi[d];
              r2 += dx[d] * dx[d];
      }
            r2inv = 1.0/r2;
            rinv = sqrt(r2inv);
            r3inv = r2inv*rinv;
      for(d=0;d<3;d++){
              r3invdx = r3inv * dx[d];
              ai[d] += m[j] * r3invdx;
      }
            *poti += -m[j]*rinv;
    }
  }
}

void sort_particles_longint(first, last, value, index)
int first, last;
long long int value[];
int index[];
{
  long long int ref_value;
  int ref_index;
  int i, j;
  int tmp;

  ref_index = (first + last)/2;
  ref_value = value[index[ref_index]];

  i = first;
  j = last;

  for( ; ; ){
    while(value[index[i]] < ref_value) i++;
    while(value[index[j]] > ref_value) j--;
    if(i >= j) break;
    tmp = index[i];
    index[i] = index[j];
    index[j] = tmp;

    i++;
    j--;
  }
  if (first < i-1) sort_particles_longint(first, i-1, value, index);
  if (j+1 < last) sort_particles_longint(j+1, last, value, index);
}


void
kawaiqsort(a, key, lo, up)
int a[];
long long int key[];
int lo;
int up;
{
    int i, j;
    int tempa;
    long long int tempk;
 
  while (up > lo) {
        i = lo;
        j = up;
        tempa = a[lo];
        tempk = key[lo]; 
    while (i < j) {
            for (; key[j] > tempk; j--);
            for (a[i] = a[j], key[i] = key[j]; i < j && key[i] <= tempk; i++);
            a[j] = a[i]; 
            key[j] = key[i];
    }
        a[i] = tempa;
        key[i] = tempk;
           
    if (i-lo < up-i) {
            kawaiqsort(a, key, lo, i-1);
            lo = i+1;
    }
    else {
            kawaiqsort(a, key, i+1, up);
            up = i-1;
    }
  }
}

void btos(idata,nbit,cdata)
long long int idata;
int nbit;
char cdata[];
{
  int i;
  strcpy(cdata,"");
  for(i=(nbit-1);i>=0;i--){
    if(((idata>>i)&1)==1){
      strcat(cdata,"1");
    }else{
      strcat(cdata,"0");      
    }
  }
}

#if 0
struct clist_t {
  int next;
/*  int icell;*/
  int key_level;
  long long int key; 
  int ifirst;
  int n;
  REAL mass;
  REAL cm[3];
  int gflag; /* 1: group cell 0: upper cell 2: lower cell */
  REAL l_theta;
  REAL length;
  int zeroflag;
  int calcflag;
};
#endif


static double L_theta;
static int clistmask;


int tree_construct(ifirst,n,key,index,key_level,nnode,current_key,clist,clist_adr,gflag,cmass,ccm,x,m)
int ifirst;
int n;
long long int key[];
int index[];
int key_level;
int *nnode;
long long int current_key;
struct clist_t clist[];
int *clist_adr;
int gflag;
double *cmass;
double ccm[];
REAL x[][3];
REAL m[];
{
  int ncell[8],ifirstc[8],ic,gflagc[8];
  int flag,i,ii,prevflag,adr,next,nextadr;
  char tmpc[70];
  int zeroflag,k,j;
  double ccmtmp[3];

/*  printf("ifirst %d n %d ckey %d nleaf %d\n",ifirst,n,current_key,nleaf);*/

  key_level -= 3;
  if(key_level<0){printf("key_level error\n");exit(1);}
  for(ic=0;ic<8;ic++) ncell[ic]=ifirstc[ic]=0;

  prevflag=-1;
  for(i=ifirst;i<(n+ifirst);i++){

/*   ii = index[i];
    flag = (key[ii]>>key_level)&0x7; */

    flag = (key[i]>>key_level)&0x7; 
    ncell[flag]++;
    if(flag!=prevflag) ifirstc[flag]=i;
    prevflag = flag;
  }

/*  for(ic=0;ic<8;ic++)printf("key %d ic %d ncell %d ifirstc %d\n",key_level,ic,ncell[ic],ifirstc[ic]);*/

/*
printf("ii %d key %lx flag %x\n",ii,key[ii],flag);
*/

   for(ic=0;ic<8;ic++){
     long long int tmp_key;
     int iadr;
     if(ncell[ic]!=0){
       tmp_key = ((current_key<<3)|ic);  
       adr = (clistmask) & tmp_key;

/*printf("adr %x %d clist[adr].key %lx\n",adr,adr,clist[adr].key);*/

       if(clist[adr].key == 0){
         clist[adr].key = tmp_key;
/*         clist[adr].icell = *nnode;*/
	 clist[adr].ifirst = ifirstc[ic];
	 clist[adr].key_level = key_level;
	 clist[adr].n = ncell[ic];
         clist[adr].next = -1;
	 clist[adr].l_theta = L_theta*pow(2.0,-(63.0-key_level)/3.0);
	 clist[adr].length = clist[adr].l_theta*theta;
	 clist[adr].zeroflag = 0xff;
	 if(gflag==0){
	   if(ncell[ic]<=ng){
	     clist[adr].gflag = gflagc[ic] = 1;
	   }else{
	     clist[adr].gflag = gflagc[ic] = 0;
	   }
         }else{
           clist[adr].gflag = gflagc[ic] = 2;
	 }
       }else{
	 iadr = (*clist_adr);
         clist[iadr].key = tmp_key;
/*         clist[iadr].icell = *nnode;*/
	 clist[iadr].ifirst = ifirstc[ic];
	 clist[iadr].key_level = key_level;
	 clist[iadr].n = ncell[ic];
	 clist[iadr].l_theta = L_theta*pow(2.0,-(63.0-key_level)/3.0);
	 clist[iadr].length = clist[iadr].l_theta*theta;
	 clist[iadr].zeroflag = 0xff;
	 if(gflag==0){
	   if(ncell[ic]<=ng){
	     clist[iadr].gflag = gflagc[ic] = 1;
	   }else{
	     clist[iadr].gflag = gflagc[ic] = 0;
	   }
         }else{
           clist[iadr].gflag = gflagc[ic] = 2;
	 }
         clist[iadr].next = -1;
         next = adr;        
         do{
           nextadr = next;
           next = clist[nextadr].next;      
         }while(next!=-1);
         clist[nextadr].next = iadr;
        (*clist_adr)++;
       }
       (*nnode)++;
     }
   }  

   ccmtmp[0] = ccmtmp[1] = ccmtmp[2] = (*cmass) = 0.0;
   for(ic=0;ic<8;ic++){
     int tmpif,tmpnc,level,ii,tmpgflagc,tmpadr;
     long long int tmp_key;

     if(ncell[ic]!=0){

     tmpif = ifirstc[ic];
     tmpnc = ncell[ic];
     tmpgflagc = gflagc[ic];
     tmp_key =  current_key<<3 | ic;
     tmpadr =  key_to_adr(tmp_key,clist);

     if(ncell[ic]>nleaf){
       double ccmass,cccm[3];
/*
       level = (60-key_level)/3;
       for(ii=0;ii<level;ii++)printf("  ");
       printf("key %x ic %d ifirstc %d ncell %d gflag %d nleaf %d\n",current_key,ic,ifirstc[ic],ncell[ic],gflagc[ic],nleaf);
*/
       clist[tmpadr].zeroflag
	 = tree_construct(tmpif,tmpnc,key,index,key_level,nnode,tmp_key,clist,clist_adr,tmpgflagc,&ccmass,cccm,x,m);

       *cmass += ccmass;
       for(k=0;k<3;k++) ccmtmp[k] += cccm[k];

       clist[tmpadr].mass = ccmass;
       for(k=0;k<3;k++) clist[tmpadr].cm[k] = cccm[k]/ccmass;

     }else{
        double totalm=0,cm[3]={0,0,0},totalminv;
/*      int tmpif,tmpn;

        tmpif = clist[tmpadr].ifirst;
        tmpn = clist[tmpadr].n;
        for(j=tmpif;j<(tmpif+tmpn);j++){*/

        for(j=tmpif;j<(tmpif+tmpnc);j++){
          ii = index[j];
          totalm += m[ii];
	  for(k=0;k<3;k++) cm[k] += m[ii]*x[ii][k];
        }
        clist[tmpadr].mass = totalm;

       (*cmass) += totalm;
        for(k=0;k<3;k++) ccmtmp[k] += cm[k];

        totalminv = 1.0/totalm;      
        for(k=0;k<3;k++) clist[tmpadr].cm[k] = cm[k]*totalminv;
     } 
     }
   }
   for(k=0;k<3;k++) ccm[k] = ccmtmp[k];

   zeroflag = 0;
   for(ic=0;ic<8;ic++){
     if(ncell[ic]==0) zeroflag |= (1<<ic);
   }
/*   printf("zeroflag %x\n",zeroflag);*/
   return zeroflag;
}

void center_of_cell(key,key_level,length,coc,xmax)
long long int key;
int key_level;
double length;
double coc[3];
double xmax;
{
  int i,k;
  long long int tmpkey[3]={0,0,0};
  char tmpc0[70],tmpc1[40],tmpc2[40],tmpc3[40];
  double tmpscale;

  tmpscale = 1.0/pow(2.0,21.0);

  key = (key<<key_level);

  for(i=0;i<21;i++){
    tmpkey[2] |= ((key>>(3*i))&1)<<i;
    tmpkey[1] |= ((key>>(3*i+1))&1)<<i;
    tmpkey[0] |= ((key>>(3*i+2))&1)<<i;
  }
  for(k=0;k<3;k++)coc[k] = tmpkey[k]*tmpscale*(2.0*xmax)-xmax + length*0.5;
/*  printf("key %lx : %lx %lx %lx coc %g %g %g\n",key,tmpkey[0],tmpkey[1],tmpkey[2],coc[0],coc[1],coc[2]);*/
}

int key_to_adr(key,clist)
long long int key;
struct clist_t clist[];
{
   int adr,retadr;

   adr = (clistmask) & key;  
   if(clist[adr].key == key){
     retadr = adr;
   }else{
     do{
       adr = clist[adr].next;
     }while(clist[adr].key!=key);
     retadr = adr;
   }
   return retadr;
}


static double lt=0,st=0;



void make_interaction_list(icel_key,coc,clength,current_key,clist,njlist,index,jlist,x,m)
long long int icel_key;
double coc[3];
double clength;
long long int current_key;
struct clist_t clist[];
int *njlist;
int index[];
struct jlist_t *jlist;
REAL x[][3];
REAL m[];
{
    int adr,ic,isp,i,ii,iii;
    double adx,ady,adz,fabs(),r2=0.0,l_theta2,sqrt(),log();

    adr = key_to_adr(current_key,clist);  

    adx = fabs(coc[0]-clist[adr].cm[0])-clength*0.5;
    ady = fabs(coc[1]-clist[adr].cm[1])-clength*0.5;
    adz = fabs(coc[2]-clist[adr].cm[2])-clength*0.5;
    if(adx>0) r2 += adx*adx;
    if(ady>0) r2 += ady*ady;
    if(adz>0) r2 += adz*adz;

    l_theta2 = clist[adr].l_theta*clist[adr].l_theta;

/*printf("key %lx adr %x l_theta %g\n",current_key,adr,clist[adr].l_theta);*/

/*  printf("adr %d key %lx r %g l_theta %g cm %g %g %g coc %g %g %g zero %x\n",adr,current_key,sqrt(r2),sqrt(l_theta2),
	 clist[adr].cm[0],clist[adr].cm[1],clist[adr].cm[2],coc[0],coc[1],coc[2],clist[adr].zeroflag);*/

/*
isp = 6-log(sqrt(l_theta2))/log(2.0);
for(i=0;i<isp;i++)printf("  ");
*/
    if(r2>l_theta2){

      iii = *njlist;
/*      (*jlist).index[iii] = -1;*/
      (*jlist).x[iii][0] = clist[adr].cm[0];
      (*jlist).x[iii][1] = clist[adr].cm[1];
      (*jlist).x[iii][2] = clist[adr].cm[2];
      (*jlist).mass[iii] = clist[adr].mass;
      (*njlist)++;
/*
printf("key %lx r %g l/theta %g njlist %d: well separated\n",current_key,sqrt(r2),sqrt(l_theta2),*njlist); 
*/
    }else{
/*
printf("key %lx r %g l/theta %g njlist %d:\n",current_key,sqrt(r2),sqrt(l_theta2),*njlist); 
*/
      if(clist[adr].n>nleaf){
        for(ic=0;ic<8;ic++){
	  int zeroflag;
	  zeroflag = (clist[adr].zeroflag>>ic)&1;
	  if(zeroflag != 1){ 
	    long long int tmpkey;
	    int tmpn,tmpadr;
            tmpkey = (current_key<<3)|ic;
	    tmpadr = key_to_adr(tmpkey,clist);
	    tmpn = clist[tmpadr].n;
/*
for(i=0;i<isp+1;i++)printf("  ");
printf("ic %d key %lx n %d icellkey %lx\n",ic,tmpkey,tmpn,icel_key);
*/
	    if(icel_key!=tmpkey){
	      make_interaction_list(icel_key,coc,clength,tmpkey,clist,njlist,index,jlist,x,m);
	    }else{
	      for(i=clist[tmpadr].ifirst;i<(clist[tmpadr].ifirst+tmpn);i++){
                ii = index[i];
		iii = *njlist;
/*                (*jlist).index[iii] = index[i];*/
                (*jlist).x[iii][0] = x[ii][0];
                (*jlist).x[iii][1] = x[ii][1];
                (*jlist).x[iii][2] = x[ii][2];
                (*jlist).mass[iii] = m[ii];
                (*njlist)++;
	      }
	    }
	  }
        }
      }else{
	for(i=clist[adr].ifirst;i<(clist[adr].ifirst+clist[adr].n);i++){
	  ii = index[i];
	  iii = *njlist;
/*          (*jlist).index[iii] = index[i];*/
          (*jlist).x[iii][0] = x[ii][0];
          (*jlist).x[iii][1] = x[ii][1];
          (*jlist).x[iii][2] = x[ii][2];
          (*jlist).mass[iii] = m[ii];
	  (*njlist)++;
	}
      }
    }
}


#if 0
void check_ijlist(ii,ilist,nilist,jlist,njlist)
int ii;
int ilist[];
int nilist;
struct jlist_t jlist[];
int njlist;
{
  int i,j,iflag,jflag;

  iflag = 1;
  for(i=0;i<nilist;i++){
    jflag = 0;
    for(j=0;j<njlist;j++){
      if(ilist[i]==jlist[j].index){
/*	printf("i %d ilist %d j %d jlist %d\n",i,ilist[i],j,jlist[j].index);*/
	jflag = 1;
      }
    }
    if(jflag==0)printf("i %d ilist %d j %d jlist %d\n",i,ilist[i],j,jlist[j].index);
    iflag = iflag & jflag;
  }
  printf("ii %d iflag %x\n",ii,iflag);

  if(iflag==0){
    for(i=0;i<nilist;i++)printf("iflag i %d %d\n",i,ilist[i]);
    printf("\n");  
    for(i=0;i<njlist;i++)printf("jflag j %d %d\n",i,jlist[i].index);
    printf("\n");  
  }

}
#endif

/*
static  double (*xj)[3] = NULL;
static  double *mj = NULL;
*/

#if 0
void calculate_force_using_tree(x,m,a,pot,ilist,nilist,jlist,njlist)
REAL x[][3];
REAL m[];
REAL a[][3];
REAL pot[];
int ilist[];
int nilist;
struct jlist_t *jlist;
int njlist;
{
  int i,j,k,npipes,ii,jj,nn;

  static double xi[200][3];
  static double atmp[200][3];
  static double ptmp[200];

/*  static double xj[NJMAX][3];*/
/*  static double mj[NJMAX];*/

/*
  mj = (double *)realloc(mj,sizeof(double)*njlist);
  xj = (double (*)[3])realloc(xj,sizeof(double)*njlist*3);
*/
  
/*
  double **xjj,**xj;
  xjj = (double **)malloc(sizeof(double *)*njlist);
  for(j=0;j<njlist;j++) xjj[j] = (double *)malloc(sizeof(double)*3);
  xj = xjj;
*/

  npipes = g5_get_number_of_pipelines();

/*
  for(j=0;j<njlist;j++){
    jj = jlist[j].index; 
    if(jj == -1){
      for(k=0;k<3;k++) xj[j][k] = jlist[j].x[k];
      mj[j] = jlist[j].mass;
    }else{
      for(k=0;k<3;k++) xj[j][k] = x[jj][k];
      mj[j] = m[jj];
    }
  }
*/

/*  for(j=0;j<njlist;j++) printf("j %d xj %g %g %g\n",j,xj[j][0],xj[j][1],xj[j][2]);*/

/*
  g5_set_xj(0,njlist,xj);
   g5_set_mj(0,njlist,mj);
*/
  g5_set_xj(0,njlist,(*jlist).x);
  g5_set_mj(0,njlist,(*jlist).mass);

  g5_set_n(njlist);

  for(i=0;i<nilist;i+=npipes){
    nn = npipes;
    if((i+npipes)>nilist) nn = nilist-i;

    for(ii=0;ii<nn;ii++){
      for(k=0;k<3;k++) xi[ii][k] = x[ilist[i+ii]][k];
    }
    g5_calculate_force_on_x(xi,atmp,ptmp,nn);
    for(ii=0;ii<nn;ii++){
      int iii;
      iii = ilist[i+ii];
      for(k=0;k<3;k++) a[iii][k] = atmp[ii][k];
      pot[iii] = -ptmp[ii];
    }
  }

}

#endif

/* static int *ilist;*/

void nbody_force_gravity(n, eps, m, x, a, pot)
int n;
REAL eps;
REAL m[];
REAL x[][3];
REAL a[][3];
REAL pot[];
{
  int i,j,k,ic,ii;
  double fabs(),maxx,log(),pow(),log(),power2,xscale;
  long long int tmpi[3];
  int tmpc[70];
  int ncell[8],key_level,flag;
  long long int current_key;
  int clist_adr,nnode,nclist,sumjlist;

  int nwalk=0;
  double tgrape,tlist;
  double tmpx;

/*  static int flags[NMAX];*/
/*  static struct clist_t clist[CLISTMAX];*/
/*  static long long int key[NMAX];*/
/*  static int index[NMAX];*/

  double cmass,ccm[3],stbegin,dni;

  static int walklist[1000000];
  
  int clistmax;

  long long int *key;
  static int *index;
  int *flags;
  struct clist_t *clist;

  long long int tmpone;


/*  struct jlist_t *jlist;  
  jlist = (struct jlist_t *)malloc(sizeof(struct jlist_t));*/

  get_wcputime(&lt,&st);
  stbegin = st;  

  clistmax = 250000 * (30.0/nleaf) * (n/1.0e6);
  
  clistmask = (int)(log((double)clistmax)/log(2.0));
  clistmask = (1<<(clistmask-1))-1  ;

  clist = (struct clist_t *)malloc(sizeof(struct clist_t)*clistmax);

  key = (long long int *)malloc(sizeof(long long int)*n);

  index = (int *)malloc(sizeof(int)*n);
/*  flags = (int *)malloc(sizeof(int)*n);*/
   if(logout_initflag==1){
      printf("\n********** tree code stat. **************\n");
      printf("clist max %d adrmask %x %d\n",clistmax,clistmask,clistmask);  
   }

  for(i=0;i<(clistmask+1);i++) clist[i].key=0;
  for(i=0;i<n;i++) index[i] = i;

   get_wcputime(&lt,&st);
   if(logout_initflag==1){
     printf("cpu %g: before key %g\n",lt,st);
   }

/************ making key ****************/

  maxx=0;
  for(i=0;i<n;i++){
    for(k=0;k<3;k++){
      tmpx = x[i][k]*x[i][k];
      if(maxx<tmpx) maxx = tmpx;
    }
  }
  maxx = sqrt(maxx);

  printf("max %g ",maxx);
  power2 = (int)(log(maxx)/log(2.0)+1.0 + 50.0)-50.0;
  maxx = pow(2.0, power2);

  printf("%g ",maxx);

   xscale = ((double)(1<<21))/(2.0*maxx);

  tmpone = ((long long int)1)<<63;
  for(i=0;i<n;i++){
    for(k=0;k<3;k++){
      tmpi[k] = (int)((x[i][k]+maxx)*xscale);
    }
    key[i] = tmpone;
/*  for(j=0;j<21;j++){
      key[i] |= ((long long int)((((tmpi[0]>>j)&1)<<2)|(((tmpi[1]>>j)&1)<<1)|((tmpi[2]>>j)&1)))<<(j*3);
    }
*/
      key[i] |= ((long long int)(((tmpi[0]&0x1)<<2)|((tmpi[1]&0x1)<<1)|((tmpi[2]&0x1))));
      key[i] |= ((long long int)(((tmpi[0]&0x2)<<2)|((tmpi[1]&0x2)<<1)|((tmpi[2]&0x2)))<<2);
      key[i] |= ((long long int)(((tmpi[0]&0x4)<<2)|((tmpi[1]&0x4)<<1)|((tmpi[2]&0x4)))<<4);
      key[i] |= ((long long int)(((tmpi[0]&0x8)<<2)|((tmpi[1]&0x8)<<1)|((tmpi[2]&0x8)))<<6);
      key[i] |= ((long long int)(((tmpi[0]&0x10)<<2)|((tmpi[1]&0x10)<<1)|((tmpi[2]&0x10)))<<8);

      key[i] |= ((long long int)(((tmpi[0]&0x20)<<2)|((tmpi[1]&0x20)<<1)|((tmpi[2]&0x20)))<<10);
      key[i] |= ((long long int)(((tmpi[0]&0x40)<<2)|((tmpi[1]&0x40)<<1)|((tmpi[2]&0x40)))<<12);
      key[i] |= ((long long int)(((tmpi[0]&0x80)<<2)|((tmpi[1]&0x80)<<1)|((tmpi[2]&0x80)))<<14);
      key[i] |= ((long long int)(((tmpi[0]&0x100)<<2)|((tmpi[1]&0x100)<<1)|((tmpi[2]&0x100)))<<16);
      key[i] |= ((long long int)(((tmpi[0]&0x200)<<2)|((tmpi[1]&0x200)<<1)|((tmpi[2]&0x200)))<<18);

      key[i] |= ((long long int)(((tmpi[0]&0x400)<<2)|((tmpi[1]&0x400)<<1)|((tmpi[2]&0x400)))<<20);
      key[i] |= ((long long int)(((tmpi[0]&0x800)<<2)|((tmpi[1]&0x800)<<1)|((tmpi[2]&0x800)))<<22);
      key[i] |= ((long long int)(((tmpi[0]&0x1000)<<2)|((tmpi[1]&0x1000)<<1)|((tmpi[2]&0x1000)))<<24);
      key[i] |= ((long long int)(((tmpi[0]&0x2000)<<2)|((tmpi[1]&0x2000)<<1)|((tmpi[2]&0x2000)))<<26);
      key[i] |= ((long long int)(((tmpi[0]&0x4000)<<2)|((tmpi[1]&0x4000)<<1)|((tmpi[2]&0x4000)))<<28);

      key[i] |= ((long long int)(((tmpi[0]&0x8000)<<2)|((tmpi[1]&0x8000)<<1)|((tmpi[2]&0x8000)))<<30);
      key[i] |= ((long long int)(((tmpi[0]&0x10000)<<2)|((tmpi[1]&0x10000)<<1)|((tmpi[2]&0x10000)))<<32);
      key[i] |= ((long long int)(((tmpi[0]&0x20000)<<2)|((tmpi[1]&0x20000)<<1)|((tmpi[2]&0x20000)))<<34);
      key[i] |= ((long long int)(((tmpi[0]&0x40000)<<2)|((tmpi[1]&0x40000)<<1)|((tmpi[2]&0x40000)))<<36);
      key[i] |= ((long long int)(((tmpi[0]&0x80000)<<2)|((tmpi[1]&0x80000)<<1)|((tmpi[2]&0x80000)))<<38);

      key[i] |= ((long long int)(((tmpi[0]&0x100000)<<2)|((tmpi[1]&0x100000)<<1)|((tmpi[2]&0x100000)))<<40);

  }
   get_wcputime(&lt,&st);
   if(logout_initflag==1){
     printf("\ncpu %g: making key\n",lt);
   }
/*  sort_particles_longint(0, n-1, key, index);*/
  kawaiqsort(index, key, 0, n-1);

   get_wcputime(&lt,&st);
   if(logout_initflag==1){
     printf("cpu %g: sorting\n",lt);
   }

/************ tree construction ****************/

  L_theta = 2*maxx/theta;
  current_key = 1;  
  key_level = 63;
  if(logout_initflag==1){
     printf("theta %g L/theta %g nleaf %d ncrit %d\n",theta,L_theta,nleaf,ng); 
  }
  clist_adr = (clistmask) + 1;
  nnode = 0;
  
  clist[current_key].key = current_key;
/*  clist[current_key].icell = nnode;*/
  clist[current_key].key_level = key_level;
  clist[current_key].next = -1;
  clist[current_key].n = n;
  clist[current_key].ifirst = 0;
  clist[current_key].l_theta = L_theta;
  clist[current_key].length = L_theta*theta;
  clist[current_key].cm[0] = 0.0;
  clist[current_key].cm[1] = 0.0;
  clist[current_key].cm[2] = 0.0;
  nnode++;

  clist[current_key].zeroflag = 
       tree_construct(0,n,key,index,key_level,&nnode,current_key,clist,&clist_adr,0,&cmass,ccm,x,m);

  nclist = clist_adr;

  if(logout_initflag==1){
    printf("nnode %d clist_adr %d\n",nnode,nclist);
  }

  for(i=0;i<nclist;i++){  
    if(clist[i].key!=0){
      if(clist[i].key!=1){
	if(clist[i].gflag==1){
	  walklist[nwalk] = i;
	  nwalk++;
	}
      }
    }
  }
  if(logout_initflag==1){
    printf("nwalk %d ng %d\n",nwalk,n/nwalk);

    get_wcputime(&lt,&st);
    printf("cpu %g: tree construction \n",lt);
  }
/************ list construction and force calcuation ****************/
  
/*  for(i=0;i<n;i++)flags[i]=0;*/

  calculate_force_using_tree(n,x,m,a,pot,eps,clist,nwalk,walklist,index,maxx,&st,&dni);

   free(clist); /* free(flags) */ ; free(index); free(key);
/*   free(ilist);*/

   get_wcputime(&lt,&st);
   printf("total cputime %g: %g Gflops\n",st-stbegin,38*dni/(st-stbegin)/1e9);

   if(logout_initflag==1) logout_initflag = 0 ;

}

