#define USEGRAPE6
c      
c     code for nbody : sticky8.F 
c   (4nd-order predictor-corrector scheme
c	 and Individual time step,
c        copied from sticky8.c)
c
      program s8
c
      include 'sticky8.inc'
      real*8 m(nmax)
      real*8 x0(3,nmax),x1(3,nmax)
      real*8 v0(3,nmax),v1(3,nmax)
      real*8 a0(3,nmax),a1(3,nmax)
      real*8 adot0(3,nmax),adot1(3,nmax)
      real*8 pot(nmax),eps(nmax)
      real*8 dti(nmax),ti(nmax)
      integer index(nmax)
c     
      character*80 filename
      real*8 epsinv,douttime,endtime,deouttime,etas,eta
      real*8 outtime,eouttime
      integer n,dim,i,k,initflag,ncor,ii
      real*8 time,initene,nextt
      real*8  dt2half,dt3over6,dt
      real*8  atmp(3), adottmp(3)
      real*8 x0tmp(3),v0tmp(3),a0tmp(3),adot0tmp(3)
      real*8 x1tmp(3),v1tmp(3),a1tmp(3),adot1tmp(3)        
      real*8 speed,st,lt
      integer nstep
      integer isame,nsame,idi,mode
c
      integer ni
      integer index2(npipe)
      real*8 xi(3,npipe),vi(3,npipe),h2i(npipe),eps2i(npipe)
      real*8 foldi(3,npipe),joldi(3,npipe),phioldi(npipe)
      real*8 aby2(3),a1by6(3),a2by18(3)
      real*8 tmpa(3,npipe),tmpadot(3,npipe),tmppot(npipe)
      real*8 over2,over6
c
c
c     
c     read parameter file
c
c     1.0/eps: softening parameter (9999=> eps=0)
c     douttime: interval of output
c     endtime: end time
c     deoutime: interval of energy output
c     filename: name of input file 
c     etas: parameter for initial timestep determination
c     eta: parameter for timestep determination
c
      read(5,*) epsinv,douttime,endtime,deouttime,filename,etas,eta
      write(6,*) 'reading from ',filename
c     
c     read NEMO ascii file
c
      open(10,file=filename,status='old')
      read(10,*) n
      read(10,*) dim
      read(10,*) time
      read(10,*) (m(i),i=1,n)
      read(10,*) ((x0(k,i),k=1,dim),i=1,n)
      read(10,*) ((v0(k,i),k=1,dim),i=1,n)            
      close(10)
c
      do i=1,n
        if (epsinv.eq.9999.0) then  
          eps(i) = 0.0
        else
          eps(i) = 1.0/epsinv 
        end if
        ti(i) = time
      end do         
c
      outtime = douttime + time
      eouttime = deouttime + time
      write(6,*) 'eps=', eps(1)
      write(6,*) 'douttime=', douttime, '  endtime=', endtime
      write(6,*) 'deouttime=', deouttime, '  filename=', filename
      write(6,*) 'etas=', etas, '  eta=', eta
c        
c           Initial setting 
c
      do i=1,n
        do k=1,dim
          a0(k,i) = 1.0
          adot0(k,i) = 100.0
        end do
        pot(i) = -1.0
      end do        
c     
#ifdef USEGRAPE6
      call holdgrape
      call forcegrape6(x0,v0,m,ti,eps,a0,adot0,pot,n)
      call forcegrape6(x0,v0,m,ti,eps,a0,adot0,pot,n)
#else
      call forcehost(x0,v0,m,ti,eps,a0,adot0,pot,n)      
#endif
      initflag=1
      call energy(pot,x0,v0,m,n,initene,initflag,time)
      call initialtimestep(a0,adot0,dti,n,etas)
c       
      do i=1,n
        index(i) = i
      end do
#ifdef USEGRAPE6
      call setparticleongrape(x0,v0,a0,adot0,m,ti,dti,n)
#endif
      nsame = n
      ncor = 0
      call getcputime(st,lt)
      nts = 0
      nstep = 0 
      over2 = 1.0/2.0
      over6 = 1.0/6.0
c
c           Main loop
c      
      do while(time.lt.endtime) 
c
c           Select updated particles
c
        call sorttimestep(nsame,dti,index)
        time = ti(index(1)) + dti(index(1))
        nts = nts + 1
c       
        isame = 1
        nextt = time
	do while((time.eq.nextt).and.(isame.le.n))
	  isame = isame + 1
	  nextt = ti(index(isame)) + dti(index(isame));
        end do
	nsame = isame - 1
c
c           Predictor 
c
#ifdef USEGRAPE6
c
	do i=1,nsame
          idi = index(i)
	  dt = time - ti(idi)
	  dt2half = 0.5*dt*dt
	  dt3over6 = 1.0/3.0*dt*dt2half
	  do k=1,3
            x1(k,idi) = x0(k,idi) + dt*v0(k,idi) + dt2half*a0(k,idi)
     *                  + dt3over6*adot0(k,idi)
            v1(k,idi) = v0(k,idi) + dt*a0(k,idi) + dt2half*adot0(k,idi)
          end do   
	end do
c
#else
c
        do i=1,n
	  dt = time - ti(i)
	  dt2half = 0.5*dt*dt
	  dt3over6 = 1.0/3.0*dt*dt2half
	  do k=1,3
            x1(k,i) = x0(k,i) + dt*v0(k,i) + dt2half*a0(k,i)
     *                  + dt3over6*adot0(k,i)
            v1(k,i) = v0(k,i) + dt*a0(k,i) + dt2half*adot0(k,i)
          end do   
        end do
c
#endif
c
c	     Force calculation
c
#ifdef USEGRAPE6
c
        call g6_set_ti(clusterid, time)
        do i=1,nsame,npipe
	  ni = npipe
	  if((i+ni).gt.nsame) then
            ni = nsame - i + 1
          end if
c
          do ii=1,ni
	    idi = index(i+ii-1)
            index2(ii) = idi
	    do k=1,3
              xi(k,ii) = x1(k,idi)
	      vi(k,ii) = v1(k,idi)
	      foldi(k,ii) = a0(k,idi)
	      joldi(k,ii) = adot0(k,idi)
	    end do
	    h2i(ii) = h2;
	    phioldi(ii) = pot(idi)
            eps2i(ii) = eps(idi)*eps(idi)
          end do
c
          call g6calc_firsthalf0(clusterid,n,ni,index2,xi,vi,
     *                   foldi,joldi,phioldi,eps2i,h2i,mode)          
	  call g6calc_lasthalf(clusterid,n,ni,index2,xi,vi,
     *                   eps(1),h2i,tmpa,tmpadot,tmppot)
c                        
          do ii=1,ni
	    idi = index(i+ii-1)
	    do k=1,3
	      a1(k,idi) = tmpa(k,ii)
	      adot1(k,idi) = tmpadot(k,ii)
	    end do
	    pot(idi) = tmppot(ii);
	  end do 
	end do
c
#else  
c
        do i=1,nsame
          ii = index(i)
          call forceonithparticle(ii,x1,v1,m,eps,atmp,adottmp,pot(ii),n)
          do k=1,3
	    a1(k,ii) = atmp(k)
	    adot1(k,ii) = adottmp(k)
          end do
	end do
c         
#endif
c
c             Corrector  				
c
        do i=1,nsame
	  ii = index(i)
          do k=1,3
	    x0tmp(k) = x0(k,ii)
	    v0tmp(k) = v0(k,ii)
	    a0tmp(k) = a0(k,ii)
	    adot0tmp(k) = adot0(k,ii)
	    x1tmp(k) = x1(k,ii)
	    v1tmp(k) = v1(k,ii)
	    a1tmp(k) = a1(k,ii)
	    adot1tmp(k) = adot1(k,ii)
          end do
          call correct(x1tmp,v1tmp,x0tmp,v0tmp,a0tmp,adot0tmp,a1tmp,
     *                   adot1tmp,dti(ii),time,eta)
          do k=1,3
	    x1(k,ii) = x1tmp(k)
	    v1(k,ii) = v1tmp(k)
          end do
c
          ti(ii) = time
          do k=1,3
	    x0(k,ii) = x1(k,ii)
	    v0(k,ii) = v1(k,ii)
	    a0(k,ii) = a1(k,ii)
	    adot0(k,ii) = adot1(k,ii)
	  end do
c           
#ifdef USEGRAPE6
	  do k=1,3
	    x0tmp(k) = x0(k,ii)
	    v0tmp(k) = v0(k,ii)
            aby2(k) = over2*a0(k,ii)
            a1by6(k) = over6*adot0(k,ii)
            a2by18(k) = 0.0
	  end do
          call g6_set_j_particle(clusterid, ii-1, ii, ti(ii),
     *           dti(ii), m(ii), a2by18, a1by6, aby2, v0tmp, x0tmp)
#endif
c
        end do           
        ncor = nsame
c       
c             Output misc. 
c
        nstep = nstep + ncor
        if(time.ge.eouttime) then 
  	  call energy(pot,x1,v1,m,n,initene,initflag,time)
	  eouttime = eouttime + deouttime
          write(6,*) '   ncor = ',ncor
          write(6,*) '   nts = ',nts,' nstep = ', nstep
	  call getcputime(st,lt)
	  write(6,*) '   cputime ',st,lt
          speed = 60.0*n*nstep/st/1000000000.0
          write(6,*) '   speed ',speed,' Gflops ',nstep/st,' nstep/s'
	  nts = 0
	  nstep = 0 
cc          call lagrad(pot,x1,n,time)
        end if
        ncor = 0
c
      end do
c       
c          End of Main Loop
c         
#ifdef USEGRAPE6
      call free_grape
#endif
      stop
      end
c
c--------------------------------------------------------------------------
c
      subroutine correct(x1,v1,x0,v0,a0,adot0,a1,adot1,dt,time,eta)
c
      include 'sticky8.inc'
      real*8 x0(3),v0(3),a0(3),adot0(3)
      real*8 x1(3),v1(3),a1(3),adot1(3)
      real*8 dt,time,eta
c
      integer k,power,intmod
      real*8 dt3over6,dt4over24,dt5over120
      real*8 dtinv,dt2inv,dt3inv,nextdt
      real*8 a0mia1,ad04plad12,ad0plad1,a2(3),a3(3)
      real*8 a1abs,adot1abs,a2dot1abs,a3dot1abs,a2dot1(3)
      real*8 realmod
c     
      dt3over6 = dt*dt*dt/6.0
      dt4over24 = dt3over6*dt/4.0
      dt5over120 = dt4over24*dt/5.0
      dtinv = 1.0/dt
      dt2inv = dtinv*dtinv
      dt3inv = dt2inv*dtinv
      do k=1,3
        a0mia1 = a0(k)-a1(k)
        ad04plad12 = 4.0*adot0(k) + 2.0*adot1(k)
	ad0plad1 = adot0(k) + adot1(k)
	a2(k) = -6.0*a0mia1*dt2inv - ad04plad12*dtinv
	a3(k) = 12.0*a0mia1*dt3inv + 6.0*ad0plad1*dt2inv
	x1(k) = x1(k) + dt4over24*a2(k) + dt5over120*a3(k)
	v1(k) = v1(k) + dt3over6*a2(k) + dt4over24*a3(k)
      end do
c
      a1abs = sqrt(a1(1)*a1(1)+a1(2)*a1(2)+a1(3)*a1(3))
      adot1abs = sqrt(adot1(1)*adot1(1)+adot1(2)*adot1(2)
     *                +adot1(3)*adot1(3))
      do k=1,3
        a2dot1(k) = a2(k) + dt*a3(k)
      end do
      a2dot1abs = sqrt(a2dot1(1)*a2dot1(1)+a2dot1(2)*a2dot1(2)
     *                  +a2dot1(3)*a2dot1(3))
      a3dot1abs = sqrt(a3(1)*a3(1)+a3(2)*a3(2)+a3(3)*a3(3))
      nextdt=sqrt(eta*(a1abs*a2dot1abs+adot1abs*adot1abs)
     *		/(adot1abs*a3dot1abs+a2dot1abs*a2dot1abs))
      if(nextdt.lt.dt) then
        if (nextdt.gt.1.0D-8) then 
	  power = log(nextdt)/log(2.0)-1
          dt = 2.0**power
        end if 
      end if
      if(nextdt.gt.(2.0*dt)) then 
        intmod = time / (2.0*dt)
        realmod = time / (2.0*dt)        
        if((realmod-intmod).eq.0) then 
          if((2.0*dt).le.timestepmax) then 
	    dt = 2.0 * dt
          end if
        end if  
      end if
c     
      return
      end
c     
c--------------------------------------------------------------------------
c
      subroutine forceonithparticle(i,x,v,m,eps,a,adot,pot,n)
c
      include 'sticky8.inc'
      integer i,n
      real*8 x(3,nmax),v(3,nmax),m(nmax),eps(nmax)
      real*8 a(3),adot(3),pot
c
      integer j,k
      real*8 r2,r3inv,r2inv,rinv,eps2,xdotv
      real*8 r5inv,xdotvr5inv,r3invdx,r3invdvetc
      real*8 dx(3),dv(3)
c	
      do k=1,3
        a(k) = 0.0
	adot(k) = 0.0
      end do
      pot = 0.0
c
      eps2 = eps(i)*eps(i)
      do j=1,n
        if(j.ne.i) then
	  r2 = eps2
	  xdotv = 0.0
	  do k=1,3 
            dx(k) = x(k,j) - x(k,i)
	    dv(k) = v(k,j) - v(k,i)
	    r2 = r2 + dx(k) * dx(k)
	    xdotv = xdotv + dx(k)*dv(k)
	  end do
          r2inv = 1.0/r2
          rinv = sqrt(r2inv)
          r3inv = r2inv*rinv
          r5inv = r2inv*r2inv*rinv
	  xdotvr5inv = 3.0*xdotv*r5inv
          do k=1,3
	    r3invdx = r3inv * dx(k)
	    a(k) = a(k) + m(j) * r3invdx
	    r3invdvetc = r3inv * dv(k) - xdotvr5inv * dx(k) 
	    adot(k) = adot(k) + m(j) * r3invdvetc
	  end do
	  pot = pot - m(j)*rinv
        end if
      end do
c     
      return
      end
c
c--------------------------------------------------------------------------
c
      subroutine sorttimestep(n,dti,index)
c
      include 'sticky8.inc'
      real*8 dti(nmax)
      integer n,index(nmax)
      integer itmp
c     
      do i=1,n-1
        do j=i+1,n
          if(dti(index(i)).gt.dti(index(j))) then
            itmp = index(i)
            index(i) = index(j)
            index(j) = itmp
         end if             
        end do           
      end do
c     
      return
      end
c      
c--------------------------------------------------------------------------
c
      subroutine forcehost(x,v,m,ti,eps,a,adot,pot,n)
c
      include 'sticky8.inc'
      real*8 x(3,nmax),v(3,nmax),m(nmax),ti(nmax)
      real*8 eps(nmax),a(3,nmax),adot(3,nmax),pot(nmax)
      integer n
c
      integer i,j,d,k
      real*8 r2,r3inv,r2inv,rinv,eps2,xdotv
      real*8 r5inv,xdotvr5inv,r3invdx
      real*8 r3invdvetc, dx(3),dv(3)
c	
      do i=1,n
        do k=1,3
          a(k,i) = 0.0
          adot(k,i) = 0.0
	end do
        pot(i) = 0.0
      end do
c
      do i=1,n
         eps2 = eps(i)*eps(i)
         do j=1,n
          if(i.ne.j) then  
            r2 = eps2
	    xdotv = 0.0
	    do k=1,3
	      dx(k) = x(k,j) - x(k,i)
	      dv(k) = v(k,j) - v(k,i)
	      r2 = r2 + dx(k)* dx(k)
	      xdotv = xdotv + dx(k)*dv(k)
	    end do
            r2inv = 1.0/r2
            rinv = sqrt(r2inv)
            r3inv = r2inv*rinv
            r5inv = r2inv*r2inv*rinv
	    xdotvr5inv = 3.0*xdotv*r5inv
            do k=1,3
	      r3invdx = r3inv * dx(k)
	      a(k,i) = a(k,i) + m(j)* r3invdx
	      r3invdvetc = r3inv * dv(k) - xdotvr5inv * dx(k)
	      adot(k,i) = adot(k,i) + m(j) * r3invdvetc
            end do   
	    pot(i) = pot(i) - m(j)*rinv
         end if
        end do
      end do
c
      return
      end
c
c--------------------------------------------------------------------------      
c
      subroutine energy(pot,x,v,m,n,initene,initflag,time)
c
      include 'sticky8.inc'
      real*8 pot(nmax),x(3,nmax),v(3,nmax),m(nmax),initene,time
      integer n,initflag
c
      real*8 totalpot,totalkin, error
      integer i	
c
      totalpot = 0.0
      totalkin = 0.0         
c
      do i=1,n
 	totalpot = totalpot + 0.5*m(i)*pot(i)
	totalkin = totalkin
     *        + 0.5*m(i)*(v(1,i)*v(1,i)+v(2,i)*v(2,i)+v(3,i)*v(3,i))
      end do
c
      write(6,*) 'time = ',time
      write(6,1000) totalpot,totalkin      
 1000 format('  pot=',F25.17,' kin= ',F25.17)
      write(6,*) '  total = ',totalpot+totalkin,
     *                '  ratio = ',totalkin/totalpot
      if(initflag.eq.1) then
        initene = totalpot+totalkin
        initflag = 0 
      else
        error = (initene-(totalpot+totalkin))/initene
        write(6,*) '  error = ', error,time  
      end if
c     
      return
      end
c     
c--------------------------------------------------------------------------      
c
      subroutine initialtimestep(a,adot,dt,n,etas)
c
      include 'sticky8.inc'
      real*8 a(3,nmax),adot(3,nmax),dt(nmax),etas
      integer n
c
      real*8 a2,adot2
      integer power,i
c
      do i=1,n
         a2 = a(1,i)*a(1,i)+a(2,i)*a(2,i)+a(3,i)*a(3,i)
	adot2 = adot(1,i)*adot(1,i)+adot(2,i)*adot(2,i)
     *              +adot(3,i)*adot(3,i)
	if(adot2.eq.0.0) then
	  dt(i) = etas
	else
	  dt(i) = etas*sqrt(a2/adot2)
        end if
        power = log(dt(i))/log(2.0)  
        dt(i) = 2.0**(power-1)
	if(dt(i).gt.timestepmax) dt(i) = timestepmax
      end do
c     
      return
      end
c      
c--------------------------------------------------------------------------
#ifdef USEGRAPE6
c     
      subroutine holdgrape
c
      include 'sticky8.inc'
      integer tunit,xunit
c
      tunit = 51
      xunit = 50
c
      call g6_set_tunit(tunit)
      call g6_set_xunit(xunit)
      call g6_open(clusterid)
c
      return
      end
c      
#endif
c--------------------------------------------------------------------------
#ifdef USEGRAPE6
c     
      subroutine free_grape
c
      include 'sticky8.inc'
c     
      call g6_close(clusterid)
c
      return
      end
c      
#endif
c--------------------------------------------------------------------------
#ifdef USEGRAPE6
c     
      subroutine setparticleongrape(x,v,a,adot,m,t,dt,n)
c
      include 'sticky8.inc'
      real*8 x(3,nmax),v(3,nmax),a(3,nmax),adot(3,nmax)
      real*8 m(nmax),t(nmax),dt(nmax)
      integer n
      integer i,k
      real*8 over2,over6
      real*8 a2by18(3), a1by6(3), aby2(3)
      real*8 tmpx(3), tmpv(3)
c     
      over2 = 1.0/2.0
      over6 = 1.0/6.0

      do i=1,n
	do k=1,3
	  aby2(k) = over2 * a(k,i)
	  a1by6(k) = over6 * adot(k,i)
	  a2by18(k) = 0.0
          tmpx(k) = x(k,i)
          tmpv(k) = v(k,i)          
        end do
        call g6_set_j_particle(clusterid,i-1,i,t(i),
     *           dt(i),m(i), a2by18, a1by6, aby2, tmpv, tmpx)
      end do
c
      return
      end
c      
#endif
c--------------------------------------------------------------------------
#ifdef USEGRAPE6
c     
      subroutine forcegrape6(x,v,m,t,eps,a,adot,pot,n)
c
      include 'sticky8.inc'
      real*8 x(3,nmax),v(3,nmax),m(nmax),t(nmax)
      real*8 eps(nmax),a(3,nmax),adot(3,nmax),pot(nmax)
      integer n
c     
      integer i,j,k,ni,ii,mode
      real*8 a2by18(3), a1by6(3), aby2(3)      
      real*8 tmpv(3), tmpx(3)
      real*8 xi(3,npipe),vi(3,npipe),foldi(3,npipe)
      real*8 joldi(3,npipe),h2i(npipe),phioldi(npipe),eps2i(npipe)
      integer index2(npipe)
      real*8 tmpa(3,npipe),tmpadot(3,npipe),tmppot(npipe)
      real*8 dtj
c     
      dtj = 1.0;
      do k=1,3
        a2by18(k) = 0.0
        a1by6(k) = 0.0
        aby2(k) = 0.0
      end do
c
      do i=1,n
        do k=1,3
          tmpx(k) = x(k,i)
          tmpv(k) = v(k,i)          
        end do
        call g6_set_j_particle(clusterid, i-1, i, t(i), dtj, m(i),
     *             a2by18, a1by6, aby2, tmpv, tmpx);
      end do
c
      call g6_set_ti(clusterid, t(1))
c
      do i=1,n,npipe
        ni = npipe
        if((i+ni).gt.n) then
          ni = n - i + 1
        end if 
c      
        do ii=1,ni
          idi = i+ii-1
          index2(ii) = idi
          do k=1,3
            xi(k,ii) = x(k,idi)
            vi(k,ii) = v(k,idi)
            foldi(k,ii) = a(k,idi)
            joldi(k,ii) = adot(k,idi)
          end do
          phioldi(ii) = pot(idi)
          eps2i(ii) = eps(idi)*eps(idi)
          h2i(ii) = 0.0
        end do          
c         
        call g6calc_firsthalf0(clusterid,n,ni,index2,xi,vi,
     *                 foldi,joldi,phioldi,eps2i,h2i,mode)        
        call g6calc_lasthalf(clusterid,n,ni,index2,xi,vi,eps(1),
     *                  h2i,tmpa,tmpadot,tmppot)
c
        do ii=1,ni
          idi = i+ii-1
          do k=1,3
            a(k,idi) = tmpa(k,ii)
            adot(k,idi) = tmpadot(k,ii)
          end do
          pot(idi) = tmppot(ii)
        end do
      end do
c      
      return
      end
c      
#endif
c--------------------------------------------------------------------------
