/*
 * pg2g6emu.c: pg2g6 emulator user library.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#include <assert.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <pg2g6util.h>

#ifdef ICC_RCD
#define ONEHALF (0.0) // Intel CC with -rcd switch
#else
#define ONEHALF (0.5) // standard C
#endif

#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
static int warn_level = 2; // warning message output level. the higher the more verbose.

#define NPIPE 1
#define JMEMSIZE 2048

static UINT32 Nbodies;

/*
 * raw bit data
 */
// COEFF variables
static UINT64 Iti;

// IP variables
static UINT64 Ixi[NPIPE][3];
static UINT64 Ivi[NPIPE][3];
static UINT64 Iepsi2[NPIPE];
static UINT64 Iindexi[NPIPE];

// JP variables
static UINT64 Imj[JMEMSIZE];
static UINT64 Ixj[JMEMSIZE][3];
static UINT64 Ivj[JMEMSIZE][3];
static UINT64 Itj[JMEMSIZE];
static UINT64 Iacc0by2[JMEMSIZE][3];
static UINT64 Ijerk0by6[JMEMSIZE][3];
static UINT64 Iindexj[JMEMSIZE];

// FO variables
static UINT64 Iacc[NPIPE][3];
static UINT64 Ijerk[NPIPE][3];


/*
 * local function prototypes
 */
static void processor(int ni);
static void pipeline(UINT64 ti, 
    UINT64 vi_0, UINT64 vi_1, UINT64 vi_2, UINT64 xi_0, UINT64 xi_1, UINT64 xi_2, UINT64 indexi, UINT64 epsi2, 
    UINT64 jerk0by6_0, UINT64 jerk0by6_1, UINT64 jerk0by6_2, UINT64 mj, UINT64 xj_0, UINT64 vj_0, UINT64 acc0by2_0, UINT64 xj_1, UINT64 vj_1, UINT64 acc0by2_1, UINT64 xj_2, UINT64 vj_2, UINT64 acc0by2_2, UINT64 indexj, UINT64 tj, 
    UINT64 *acc_0p, UINT64 *acc_1p, UINT64 *acc_2p, UINT64 *jerk_0p, UINT64 *jerk_1p, UINT64 *jerk_2p, 
    int run_begin);

// COEFF conversion
static inline UINT64 convert_ti(int devid, double src);

// JP conversion
static inline UINT64 convert_mj(int devid, double src);
static inline UINT64 convert_xj(int devid, double src);
static inline UINT64 convert_vj(int devid, double src);
static inline UINT64 convert_tj(int devid, double src);
static inline UINT64 convert_acc0by2(int devid, double src);
static inline UINT64 convert_jerk0by6(int devid, double src);
static inline UINT64 convert_indexj(int devid, int src);

// IP conversion
static inline UINT64 convert_xi(int devid, double src);
static inline UINT64 convert_vi(int devid, double src);
static inline UINT64 convert_epsi2(int devid, double src);
static inline UINT64 convert_indexi(int devid, int src);

// FO conversion
static inline double convert_acc(int devid, UINT64 src);
static inline double convert_jerk(int devid, UINT64 src);


void
pg2g6_set_coeffMC(int devid, double ti)
{
    int k;
    Iti = convert_ti(devid, ti);

}

void
pg2g6_set_jpMC(int devid, int adr, int nj, double *mj, double (*xj)[3], double (*vj)[3], double *tj, double (*acc0by2)[3], double (*jerk0by6)[3], int *indexj)
{
    int j, k;

    for (j = 0; j < nj; j++) {
        Imj[j] = convert_mj(devid, mj[j]);
        for (k = 0; k < 3; k++) {
            Ixj[j][k] = convert_xj(devid, xj[j][k]);
        }
        for (k = 0; k < 3; k++) {
            Ivj[j][k] = convert_vj(devid, vj[j][k]);
        }
        Itj[j] = convert_tj(devid, tj[j]);
        for (k = 0; k < 3; k++) {
            Iacc0by2[j][k] = convert_acc0by2(devid, acc0by2[j][k]);
        }
        for (k = 0; k < 3; k++) {
            Ijerk0by6[j][k] = convert_jerk0by6(devid, jerk0by6[j][k]);
        }
        Iindexj[j] = convert_indexj(devid, indexj[j]);

    }
}

void
pg2g6_set_ipMC(int devid, int ni, double (*xi)[3], double (*vi)[3], double *epsi2, int *indexi)
{
    int i, k;

    for (i = 0; i < ni; i++) {
        for (k = 0; k < 3; k++) {
            Ixi[i][k] = convert_xi(devid, xi[i][k]);
        }
        for (k = 0; k < 3; k++) {
            Ivi[i][k] = convert_vi(devid, vi[i][k]);
        }
        Iepsi2[i] = convert_epsi2(devid, epsi2[i]);
        Iindexi[i] = convert_indexi(devid, indexi[i]);

    }
}

void
pg2g6_get_foutMC(int devid, int ni, double (*acc)[3], double (*jerk)[3])
{
    int i, k;

    processor(ni);
    for (i = 0; i < ni; i++) {
        for (k = 0; k < 3; k++) {
            acc[i][k] = convert_acc(devid, Iacc[i][k]);
        }
        for (k = 0; k < 3; k++) {
            jerk[i][k] = convert_jerk(devid, Ijerk[i][k]);
        }

    }
}

void
pg2g6_openMC(int devid)
{
    // do nothing.
}

void
pg2g6_closeMC(int devid)
{
    // do nothing.
}

void
pg2g6_runMC(int devid)
{
    // do nothing.
}

void
pg2g6_set_nMC(int devid, int n)
{
    Nbodies = n;
}

int
pg2g6_get_number_of_pipelinesMC(int devid)
{
    return NPIPE;
}

int
pg2g6_get_jmemsizeMC(int devid)
{
    return JMEMSIZE;
}

void
pg2g6_device_info(pg2g6_device_info_t *devinfo)
{
    int i;
    devinfo->ndevice = 1;
    devinfo->deviceid[0] = 0;
}

void
pg2g6_open(void)
{
    int devid = 0;
    pg2g6_openMC(devid);
}

void
pg2g6_close(void)
{
    int devid = 0;
    pg2g6_closeMC(devid);
}

void
pg2g6_set_coeff(double ti)
{
    int devid = 0;
    pg2g6_set_coeffMC(devid, ti);
}

void
pg2g6_set_jp(int adr, int nj, double *mj, double (*xj)[3], double (*vj)[3], double *tj, double (*acc0by2)[3], double (*jerk0by6)[3], int *indexj)
{
    int devid = 0;
    int j0 = 0;
    pg2g6_set_jpMC(devid, adr, nj, mj + j0, (double (*)[3])(xj[j0]), (double (*)[3])(vj[j0]), tj + j0, (double (*)[3])(acc0by2[j0]), (double (*)[3])(jerk0by6[j0]), indexj + j0);
}

void
pg2g6_set_ip(int ni, double (*xi)[3], double (*vi)[3], double *epsi2, int *indexi)
{
    int devid = 0;
    pg2g6_set_ipMC(devid, ni, xi, vi, epsi2, indexi);
}

void
pg2g6_run(void)
{
    int devid = 0;
    pg2g6_runMC(devid);
}

void
pg2g6_set_n(int nj)
{
    int devid = 0;
    pg2g6_set_nMC(devid, nj);
}

void
pg2g6_get_fout(int ni, double (*acc)[3], double (*jerk)[3])
{
    int devid = 0;
    pg2g6_get_foutMC(devid, ni, acc, jerk);
}

int
pg2g6_get_number_of_pipelines(void)
{
    int devid = 0;
    return pg2g6_get_number_of_pipelinesMC(devid);
}

int
pg2g6_get_jmemsize(void)
{
    int devid = 0;
    pg2g6_get_jmemsizeMC(devid);
}

void
pg2g6_calculate_fout_on_ip(double (*xi)[3], double (*vi)[3], double *epsi2, int *indexi, double (*acc)[3], double (*jerk)[3], int ni)
{
    int off, nii, np;

    np = pg2g6_get_number_of_pipelines();

    for (off = 0; off < ni; off += np) {
	nii = np;
	if (off+nii > ni) {
	    nii = ni - off;
	}

	pg2g6_set_ip(nii, (double (*)[3])xi[off], (double (*)[3])vi[off], &epsi2[off], &indexi[off]);
	pg2g6_run();
	pg2g6_get_fout(nii, (double (*)[3])acc[off], (double (*)[3])jerk[off]);
    }
}

/*
 * scaling utilities for 'ti' of type float63.52:
 */

static double Ti_scale[NHIB];

inline void
pg2g6_set_scale_ti(double scale)
{
    pg2g6_set_scale_tiMC(0, scale);
}

inline void
pg2g6_set_scale_tiMC(int devid, double scale)
{
    Ti_scale[devid] = scale;
}

inline double
pg2g6_get_scale_ti(void)
{
    return pg2g6_get_scale_tiMC(0);
}

inline double
pg2g6_get_scale_tiMC(int devid)
{
    return Ti_scale[devid];
}


/*
 * scaling utilities for 'mj' of type float34.23:
 */

static double Mj_scale[NHIB];

inline void
pg2g6_set_scale_mj(double scale)
{
    pg2g6_set_scale_mjMC(0, scale);
}

inline void
pg2g6_set_scale_mjMC(int devid, double scale)
{
    Mj_scale[devid] = scale;
}

inline double
pg2g6_get_scale_mj(void)
{
    return pg2g6_get_scale_mjMC(0);
}

inline double
pg2g6_get_scale_mjMC(int devid)
{
    return Mj_scale[devid];
}

/*
 * scaling utilities for 'xj' of type int64:
 */

static double Xj_scale[NHIB];
static double Xj_offset[NHIB];
static double Xj_min[NHIB];
static double Xj_max[NHIB];

void
pg2g6_set_range_xj(double min, double max)
{
    pg2g6_set_range_xjMC(0, min, max);
}

void
pg2g6_set_range_xjMC(int devid, double min, double max)
{
    double size;

    size = max - min;
    Xj_scale[devid] = pow(2.0, (double)64) / size;
    Xj_offset[devid] = min;
    Xj_min[devid] = min;
    Xj_max[devid] = max;
}

void
pg2g6_get_range_xj(double *min, double *max)
{
    pg2g6_get_range_xjMC(0, min, max);
}

void
pg2g6_get_range_xjMC(int devid, double *min, double *max)
{
    *min = Xj_min[devid];
    *max = Xj_max[devid];
}

inline double
pg2g6_get_scale_xj(void)
{
    return pg2g6_get_scale_xjMC(0);
}

inline double
pg2g6_get_scale_xjMC(int devid)
{
    return Xj_scale[devid];
}

inline double
pg2g6_get_offset_xj(void)
{
    return pg2g6_get_offset_xjMC(0);
}

inline double
pg2g6_get_offset_xjMC(int devid)
{
    return Xj_offset[devid];
}


/*
 * scaling utilities for 'vj' of type float28.17:
 */

static double Vj_scale[NHIB];

inline void
pg2g6_set_scale_vj(double scale)
{
    pg2g6_set_scale_vjMC(0, scale);
}

inline void
pg2g6_set_scale_vjMC(int devid, double scale)
{
    Vj_scale[devid] = scale;
}

inline double
pg2g6_get_scale_vj(void)
{
    return pg2g6_get_scale_vjMC(0);
}

inline double
pg2g6_get_scale_vjMC(int devid)
{
    return Vj_scale[devid];
}

/*
 * scaling utilities for 'tj' of type float63.52:
 */

static double Tj_scale[NHIB];

inline void
pg2g6_set_scale_tj(double scale)
{
    pg2g6_set_scale_tjMC(0, scale);
}

inline void
pg2g6_set_scale_tjMC(int devid, double scale)
{
    Tj_scale[devid] = scale;
}

inline double
pg2g6_get_scale_tj(void)
{
    return pg2g6_get_scale_tjMC(0);
}

inline double
pg2g6_get_scale_tjMC(int devid)
{
    return Tj_scale[devid];
}

/*
 * scaling utilities for 'acc0by2' of type float28.17:
 */

static double Acc0by2_scale[NHIB];

inline void
pg2g6_set_scale_acc0by2(double scale)
{
    pg2g6_set_scale_acc0by2MC(0, scale);
}

inline void
pg2g6_set_scale_acc0by2MC(int devid, double scale)
{
    Acc0by2_scale[devid] = scale;
}

inline double
pg2g6_get_scale_acc0by2(void)
{
    return pg2g6_get_scale_acc0by2MC(0);
}

inline double
pg2g6_get_scale_acc0by2MC(int devid)
{
    return Acc0by2_scale[devid];
}

/*
 * scaling utilities for 'jerk0by6' of type float28.17:
 */

static double Jerk0by6_scale[NHIB];

inline void
pg2g6_set_scale_jerk0by6(double scale)
{
    pg2g6_set_scale_jerk0by6MC(0, scale);
}

inline void
pg2g6_set_scale_jerk0by6MC(int devid, double scale)
{
    Jerk0by6_scale[devid] = scale;
}

inline double
pg2g6_get_scale_jerk0by6(void)
{
    return pg2g6_get_scale_jerk0by6MC(0);
}

inline double
pg2g6_get_scale_jerk0by6MC(int devid)
{
    return Jerk0by6_scale[devid];
}


/*
 * scaling utilities for 'xi' of type int64:
 */

static double Xi_scale[NHIB];
static double Xi_offset[NHIB];
static double Xi_min[NHIB];
static double Xi_max[NHIB];

void
pg2g6_set_range_xi(double min, double max)
{
    pg2g6_set_range_xiMC(0, min, max);
}

void
pg2g6_set_range_xiMC(int devid, double min, double max)
{
    double size;

    size = max - min;
    Xi_scale[devid] = pow(2.0, (double)64) / size;
    Xi_offset[devid] = min;
    Xi_min[devid] = min;
    Xi_max[devid] = max;
}

void
pg2g6_get_range_xi(double *min, double *max)
{
    pg2g6_get_range_xiMC(0, min, max);
}

void
pg2g6_get_range_xiMC(int devid, double *min, double *max)
{
    *min = Xi_min[devid];
    *max = Xi_max[devid];
}

inline double
pg2g6_get_scale_xi(void)
{
    return pg2g6_get_scale_xiMC(0);
}

inline double
pg2g6_get_scale_xiMC(int devid)
{
    return Xi_scale[devid];
}

inline double
pg2g6_get_offset_xi(void)
{
    return pg2g6_get_offset_xiMC(0);
}

inline double
pg2g6_get_offset_xiMC(int devid)
{
    return Xi_offset[devid];
}


/*
 * scaling utilities for 'vi' of type float28.17:
 */

static double Vi_scale[NHIB];

inline void
pg2g6_set_scale_vi(double scale)
{
    pg2g6_set_scale_viMC(0, scale);
}

inline void
pg2g6_set_scale_viMC(int devid, double scale)
{
    Vi_scale[devid] = scale;
}

inline double
pg2g6_get_scale_vi(void)
{
    return pg2g6_get_scale_viMC(0);
}

inline double
pg2g6_get_scale_viMC(int devid)
{
    return Vi_scale[devid];
}

/*
 * scaling utilities for 'epsi2' of type float34.23:
 */

static double Epsi2_scale[NHIB];

inline void
pg2g6_set_scale_epsi2(double scale)
{
    pg2g6_set_scale_epsi2MC(0, scale);
}

inline void
pg2g6_set_scale_epsi2MC(int devid, double scale)
{
    Epsi2_scale[devid] = scale;
}

inline double
pg2g6_get_scale_epsi2(void)
{
    return pg2g6_get_scale_epsi2MC(0);
}

inline double
pg2g6_get_scale_epsi2MC(int devid)
{
    return Epsi2_scale[devid];
}


/*
 * scaling utilities for 'acc' of type int64:
 */

static double Acc_scale[NHIB];

inline void
pg2g6_set_scale_acc(double scale)
{
    pg2g6_set_scale_accMC(0, scale);
}

inline void
pg2g6_set_scale_accMC(int devid, double scale)
{
    Acc_scale[devid] = scale;
}

inline double
pg2g6_get_scale_acc(void)
{
    return pg2g6_get_scale_accMC(0);
}

inline double
pg2g6_get_scale_accMC(int devid)
{
    return Acc_scale[devid];
}

/*
 * scaling utilities for 'jerk' of type int48:
 */

static double Jerk_scale[NHIB];

inline void
pg2g6_set_scale_jerk(double scale)
{
    pg2g6_set_scale_jerkMC(0, scale);
}

inline void
pg2g6_set_scale_jerkMC(int devid, double scale)
{
    Jerk_scale[devid] = scale;
}

inline double
pg2g6_get_scale_jerk(void)
{
    return pg2g6_get_scale_jerkMC(0);
}

inline double
pg2g6_get_scale_jerkMC(int devid)
{
    return Jerk_scale[devid];
}



static inline UINT64
convert_ti(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'ti' from double to float63.52.\n");
    pg_conv_cdouble_to_float(src * Ti_scale[devid], &dst, 63, 52);
    // fprintf(stdout, "ti:%llx\n", dst);
    return dst;
}


static inline UINT64
convert_mj(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'mj' from double to float34.23.\n");
    pg_conv_cdouble_to_float(src * Mj_scale[devid], &dst, 34, 23);
    // fprintf(stdout, "mj:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_xj(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'xj' from double to int64.\n");
    double scale  = pg2g6_get_scale_xjMC(devid);
    double offset = pg2g6_get_offset_xjMC(devid);
    dst = (UINT64)((src - offset) * scale + ONEHALF);

    // fprintf(stdout, "xj:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_vj(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'vj' from double to float28.17.\n");
    pg_conv_cdouble_to_float(src * Vj_scale[devid], &dst, 28, 17);
    // fprintf(stdout, "vj:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_tj(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'tj' from double to float63.52.\n");
    pg_conv_cdouble_to_float(src * Tj_scale[devid], &dst, 63, 52);
    // fprintf(stdout, "tj:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_acc0by2(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'acc0by2' from double to float28.17.\n");
    pg_conv_cdouble_to_float(src * Acc0by2_scale[devid], &dst, 28, 17);
    // fprintf(stdout, "acc0by2:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_jerk0by6(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'jerk0by6' from double to float28.17.\n");
    pg_conv_cdouble_to_float(src * Jerk0by6_scale[devid], &dst, 28, 17);
    // fprintf(stdout, "jerk0by6:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_indexj(int devid, int src)
{
    UINT64 dst;

    WARN(3, "convert 'indexj' from int to int32.\n");
    dst = (UINT64)src;
    // fprintf(stdout, "indexj:%llx\n", dst);
    return dst;
}


static inline UINT64
convert_xi(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'xi' from double to int64.\n");
    double scale  = pg2g6_get_scale_xiMC(devid);
    double offset = pg2g6_get_offset_xiMC(devid);
    dst = (UINT64)((src - offset) * scale + ONEHALF);

    // fprintf(stdout, "xi:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_vi(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'vi' from double to float28.17.\n");
    pg_conv_cdouble_to_float(src * Vi_scale[devid], &dst, 28, 17);
    // fprintf(stdout, "vi:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_epsi2(int devid, double src)
{
    UINT64 dst;

    WARN(3, "convert 'epsi2' from double to float34.23.\n");
    pg_conv_cdouble_to_float(src * Epsi2_scale[devid], &dst, 34, 23);
    // fprintf(stdout, "epsi2:%llx\n", dst);
    return dst;
}

static inline UINT64
convert_indexi(int devid, int src)
{
    UINT64 dst;

    WARN(3, "convert 'indexi' from int to int32.\n");
    dst = (UINT64)src;
    // fprintf(stdout, "indexi:%llx\n", dst);
    return dst;
}


static inline double
convert_acc(int devid, UINT64 src)
{
    double dst;

    WARN(3, "convert 'acc' from int64 to double.\n");
    // fprintf(stdout, "acc:%llx\n", src);
    dst = Acc_scale[devid] * (INT64)src;
    return dst;
}

static inline double
convert_jerk(int devid, UINT64 src)
{
    double dst;

    WARN(3, "convert 'jerk' from int48 to double.\n");
    // fprintf(stdout, "jerk:%llx\n", src);
    dst = Jerk_scale[devid] * (INT64)src;
    return dst;
}



/*
 *
 * local functions
 *
 */

/*
 * processor-unit emulator
 */
static void
processor(int ni)
{
    int i, j;
    for (i = 0; i < ni; i++) {
        int run_begin = 1;
        for (j = 0; j < Nbodies; j++) {
            pipeline(Iti, 
                Ivi[i][0], Ivi[i][1], Ivi[i][2], Ixi[i][0], Ixi[i][1], Ixi[i][2], Iindexi[i], Iepsi2[i], 
                Ijerk0by6[j][0], Ijerk0by6[j][1], Ijerk0by6[j][2], Imj[j], Ixj[j][0], Ivj[j][0], Iacc0by2[j][0], Ixj[j][1], Ivj[j][1], Iacc0by2[j][1], Ixj[j][2], Ivj[j][2], Iacc0by2[j][2], Iindexj[j], Itj[j], 
                &(Iacc[i][0]), &(Iacc[i][1]), &(Iacc[i][2]), &(Ijerk[i][0]), &(Ijerk[i][1]), &(Ijerk[i][2]), 
                run_begin);
            run_begin = 0;
        }
    }
}

/*
 * pipeline emulator
 */
static void
pipeline(UINT64 ti, 
    UINT64 vi_0, UINT64 vi_1, UINT64 vi_2, UINT64 xi_0, UINT64 xi_1, UINT64 xi_2, UINT64 indexi, UINT64 epsi2, 
    UINT64 jerk0by6_0, UINT64 jerk0by6_1, UINT64 jerk0by6_2, UINT64 mj, UINT64 xj_0, UINT64 vj_0, UINT64 acc0by2_0, UINT64 xj_1, UINT64 vj_1, UINT64 acc0by2_1, UINT64 xj_2, UINT64 vj_2, UINT64 acc0by2_2, UINT64 indexj, UINT64 tj, 
    UINT64 *acc_0p, UINT64 *acc_1p, UINT64 *acc_2p, UINT64 *jerk_0p, UINT64 *jerk_1p, UINT64 *jerk_2p, 
    int run_begin)
{
    int i, j;
    UINT64 acc0D7_0, acc0D7_1, acc0D7_2, acc0_0, acc0_1, acc0_2, acc0by2D11_0, acc0by2D11_1;
    UINT64 acc0by2D11_2, dt, dt2, dt3, dtD3, dvD2_0, dvD2_1, dvD2_2;
    UINT64 dvD38_0, dvD38_1, dvD38_2, dv_0, dv_1, dv_2, dxD36_0, dxD36_1;
    UINT64 dxD36_2, dx_0, dx_1, dx_2, j1_0, j1_1, j1_2, j2D3_0;
    UINT64 j2D3_1, j2D3_2, j2_0, j2_1, j2_2, jerk0by2D8_0, jerk0by2D8_1, jerk0by2D8_2;
    UINT64 jerk0by2_0, jerk0by2_1, jerk0by2_2, jerk0by6D14_0, jerk0by6D14_1, jerk0by6D14_2, mass, massD62;
    UINT64 mjD1, mjr3inv, mjr5inv, node1294, node1295_0, node1295_1, node1295_2, node1297_0;
    UINT64 node1297_1, node1297_2, node1298_0, node1298_1, node1298_2, node1327_0, node1327_1, node1327_2;
    UINT64 node1474_0, node1474_1, node1474_2, node1475, node1476, node1477, node1477D6, node1478;
    UINT64 node1493, node1493D59, node1494, node1509, node1509D62, node1510, node1525D36_0, node1525D36_1;
    UINT64 node1525D36_2, node1525_0, node1525_1, node1525_2, node1526, node1960_0, node1960_1, node1960_2;
    UINT64 node1961_0, node1961_1, node1961_2, node1963_0, node1963_1, node1963_2, node1964_0, node1964_1;
    UINT64 node1964_2, node42, node555_0, node555_1, node555_2, node557_0, node557_1, node557_2;
    UINT64 node558_0, node558_1, node558_2, node591_0, node591_1, node591_2, node593_0, node593_1;
    UINT64 node593_2, node594_0, node594_1, node594_2, node627_0, node627_1, node627_2, node629_0;
    UINT64 node629_1, node629_2, node630_0, node630_1, node630_2, node631D1_0, node631D1_1, node631D1_2;
    UINT64 node631_0, node631_1, node631_2, node632D1_0, node632D1_1, node632D1_2, node632_0, node632_1;
    UINT64 node632_2, node651_0, node651_1, node651_2, node652D3_0, node652D3_1, node652D3_2, node652_0;
    UINT64 node652_1, node652_2, node653_0, node653_1, node653_2, node660, node860_0, node860_1;
    UINT64 node860_2, node861_0, node861_1, node861_2, node889, node890, node891, node891D6;
    UINT64 node892, node893, node979, r2, r2D9, r3inv, r5inv, s;
    UINT64 sD18, vj1_0, vj1_1, vj1_2, vjD11_0, vjD11_1, vjD11_2, vjD8_0;
    UINT64 vjD8_1, vjD8_2, xj1_0, xj1_1, xj1_2, xjD15_0, xjD15_1, xjD15_2;


    // acc_0
    pg_comp_int(indexj, 32, 0, indexi, 32, 0, &node660, 1, 0, "E");
    pg_delay(mj, &mjD1);
    pg_selector(node660, 1, 0, (UINT64)0x0LL, 34, 23, mjD1, 34, 23, &mass, 34, 23);
    pg_delay(mass, &massD62);
    pg_delay(xj_0, &xjD15_0);
    pg_sub_float_d8(ti, 63, 52, tj, 63, 52, &node42, 63, 52);
    pg_conv_float_to_float(node42, 63, 52, &dt, 28, 17);
    pg_delay(vj_0, &vjD8_0);
    pg_mul_float(dt, 28, 17, vjD8_0, 28, 17, &node555_0, 28, 17);
    pg_shift_float(node555_0, 28, 17, (UINT64)0x33LL, 10, 0, &node557_0, 28, 17, 1);
    pg_conv_float_to_int(node557_0, 28, 17, &node558_0, 64, 0);
    pg_add_int(xjD15_0, 64, 0, node558_0, 64, 0, &node631_0, 64, 0);
    pg_delay(node631_0, &node631D1_0);
    pg_mul_float(dt, 28, 17, dt, 28, 17, &dt2, 28, 17);
    pg_delay(acc0by2_0, &acc0by2D11_0);
    pg_mul_float(dt2, 28, 17, acc0by2D11_0, 28, 17, &node591_0, 28, 17);
    pg_shift_float(node591_0, 28, 17, (UINT64)0x33LL, 10, 0, &node593_0, 28, 17, 1);
    pg_conv_float_to_int(node593_0, 28, 17, &node594_0, 64, 0);
    pg_add_int(node631D1_0, 64, 0, node594_0, 64, 0, &node632_0, 64, 0);
    pg_delay(node632_0, &node632D1_0);
    pg_delay(dt, &dtD3);
    pg_mul_float(dt2, 28, 17, dtD3, 28, 17, &dt3, 28, 17);
    pg_delay(jerk0by6_0, &jerk0by6D14_0);
    pg_mul_float(dt3, 28, 17, jerk0by6D14_0, 28, 17, &node627_0, 28, 17);
    pg_shift_float(node627_0, 28, 17, (UINT64)0x33LL, 10, 0, &node629_0, 28, 17, 1);
    pg_conv_float_to_int(node629_0, 28, 17, &node630_0, 64, 0);
    pg_add_int(node632D1_0, 64, 0, node630_0, 64, 0, &xj1_0, 64, 0);
    pg_sub_int(xj1_0, 64, 0, xi_0, 64, 0, &node860_0, 64, 0);
    pg_conv_int_to_float_d5(node860_0, 64, 0, &node861_0, 34, 23);
    pg_shift_float(node861_0, 34, 23, (UINT64)0x33LL, 10, 0, &dx_0, 34, 23, -1);
    pg_mul_float(dx_0, 34, 23, dx_0, 34, 23, &node889, 34, 23);
    pg_delay(xj_1, &xjD15_1);
    pg_delay(vj_1, &vjD8_1);
    pg_mul_float(dt, 28, 17, vjD8_1, 28, 17, &node555_1, 28, 17);
    pg_shift_float(node555_1, 28, 17, (UINT64)0x33LL, 10, 0, &node557_1, 28, 17, 1);
    pg_conv_float_to_int(node557_1, 28, 17, &node558_1, 64, 0);
    pg_add_int(xjD15_1, 64, 0, node558_1, 64, 0, &node631_1, 64, 0);
    pg_delay(node631_1, &node631D1_1);
    pg_delay(acc0by2_1, &acc0by2D11_1);
    pg_mul_float(dt2, 28, 17, acc0by2D11_1, 28, 17, &node591_1, 28, 17);
    pg_shift_float(node591_1, 28, 17, (UINT64)0x33LL, 10, 0, &node593_1, 28, 17, 1);
    pg_conv_float_to_int(node593_1, 28, 17, &node594_1, 64, 0);
    pg_add_int(node631D1_1, 64, 0, node594_1, 64, 0, &node632_1, 64, 0);
    pg_delay(node632_1, &node632D1_1);
    pg_delay(jerk0by6_1, &jerk0by6D14_1);
    pg_mul_float(dt3, 28, 17, jerk0by6D14_1, 28, 17, &node627_1, 28, 17);
    pg_shift_float(node627_1, 28, 17, (UINT64)0x33LL, 10, 0, &node629_1, 28, 17, 1);
    pg_conv_float_to_int(node629_1, 28, 17, &node630_1, 64, 0);
    pg_add_int(node632D1_1, 64, 0, node630_1, 64, 0, &xj1_1, 64, 0);
    pg_sub_int(xj1_1, 64, 0, xi_1, 64, 0, &node860_1, 64, 0);
    pg_conv_int_to_float_d5(node860_1, 64, 0, &node861_1, 34, 23);
    pg_shift_float(node861_1, 34, 23, (UINT64)0x33LL, 10, 0, &dx_1, 34, 23, -1);
    pg_mul_float(dx_1, 34, 23, dx_1, 34, 23, &node890, 34, 23);
    pg_add_float_d6(node889, 34, 23, node890, 34, 23, &node892, 34, 23);
    pg_delay(xj_2, &xjD15_2);
    pg_delay(vj_2, &vjD8_2);
    pg_mul_float(dt, 28, 17, vjD8_2, 28, 17, &node555_2, 28, 17);
    pg_shift_float(node555_2, 28, 17, (UINT64)0x33LL, 10, 0, &node557_2, 28, 17, 1);
    pg_conv_float_to_int(node557_2, 28, 17, &node558_2, 64, 0);
    pg_add_int(xjD15_2, 64, 0, node558_2, 64, 0, &node631_2, 64, 0);
    pg_delay(node631_2, &node631D1_2);
    pg_delay(acc0by2_2, &acc0by2D11_2);
    pg_mul_float(dt2, 28, 17, acc0by2D11_2, 28, 17, &node591_2, 28, 17);
    pg_shift_float(node591_2, 28, 17, (UINT64)0x33LL, 10, 0, &node593_2, 28, 17, 1);
    pg_conv_float_to_int(node593_2, 28, 17, &node594_2, 64, 0);
    pg_add_int(node631D1_2, 64, 0, node594_2, 64, 0, &node632_2, 64, 0);
    pg_delay(node632_2, &node632D1_2);
    pg_delay(jerk0by6_2, &jerk0by6D14_2);
    pg_mul_float(dt3, 28, 17, jerk0by6D14_2, 28, 17, &node627_2, 28, 17);
    pg_shift_float(node627_2, 28, 17, (UINT64)0x33LL, 10, 0, &node629_2, 28, 17, 1);
    pg_conv_float_to_int(node629_2, 28, 17, &node630_2, 64, 0);
    pg_add_int(node632D1_2, 64, 0, node630_2, 64, 0, &xj1_2, 64, 0);
    pg_sub_int(xj1_2, 64, 0, xi_2, 64, 0, &node860_2, 64, 0);
    pg_conv_int_to_float_d5(node860_2, 64, 0, &node861_2, 34, 23);
    pg_shift_float(node861_2, 34, 23, (UINT64)0x33LL, 10, 0, &dx_2, 34, 23, -1);
    pg_mul_float(dx_2, 34, 23, dx_2, 34, 23, &node891, 34, 23);
    pg_delay(node891, &node891D6);
    pg_add_float_d6(node892, 34, 23, node891D6, 34, 23, &node893, 34, 23);
    pg_add_float_d6(node893, 34, 23, epsi2, 34, 23, &r2, 34, 23);
    pg_pow_float(r2, &node979, 34, 23, -5, 2, 9);
    pg_conv_float_to_float(node979, 34, 23, &r5inv, 34, 23);
    pg_delay(r2, &r2D9);
    pg_mul_float(r5inv, 34, 23, r2D9, 34, 23, &r3inv, 34, 23);
    pg_mul_float(massD62, 34, 23, r3inv, 34, 23, &node1294, 34, 23);
    pg_delay(dx_0, &dxD36_0);
    pg_mul_float(node1294, 34, 23, dxD36_0, 34, 23, &node1295_0, 34, 23);
    pg_shift_float(node1295_0, 34, 23, (UINT64)0x30LL, 10, 0, &node1297_0, 34, 23, 1);
    pg_conv_float_to_int(node1297_0, 34, 23, &node1298_0, 64, 0);
    pg_inc_int(node1298_0, 64, 0, acc_0p, 64, 0, run_begin);

    // acc_1
    pg_delay(dx_1, &dxD36_1);
    pg_mul_float(node1294, 34, 23, dxD36_1, 34, 23, &node1295_1, 34, 23);
    pg_shift_float(node1295_1, 34, 23, (UINT64)0x30LL, 10, 0, &node1297_1, 34, 23, 1);
    pg_conv_float_to_int(node1297_1, 34, 23, &node1298_1, 64, 0);
    pg_inc_int(node1298_1, 64, 0, acc_1p, 64, 0, run_begin);

    // acc_2
    pg_delay(dx_2, &dxD36_2);
    pg_mul_float(node1294, 34, 23, dxD36_2, 34, 23, &node1295_2, 34, 23);
    pg_shift_float(node1295_2, 34, 23, (UINT64)0x30LL, 10, 0, &node1297_2, 34, 23, 1);
    pg_conv_float_to_int(node1297_2, 34, 23, &node1298_2, 64, 0);
    pg_inc_int(node1298_2, 64, 0, acc_2p, 64, 0, run_begin);

    // jerk_0
    pg_conv_float_to_float(mass, 34, 23, &node1509, 28, 17);
    pg_delay(node1509, &node1509D62);
    pg_conv_float_to_float(r3inv, 34, 23, &node1510, 28, 17);
    pg_mul_float(node1509D62, 28, 17, node1510, 28, 17, &mjr3inv, 28, 17);
    pg_delay(vj_0, &vjD11_0);
    pg_shift_float(acc0by2_0, 28, 17, (UINT64)0x1LL, 10, 0, &acc0_0, 28, 17, 1);
    pg_delay(acc0_0, &acc0D7_0);
    pg_mul_float(dt, 28, 17, acc0D7_0, 28, 17, &node651_0, 28, 17);
    pg_add_float_d6(vjD11_0, 28, 17, node651_0, 28, 17, &node653_0, 28, 17);
    pg_mul_float(jerk0by6_0, 28, 17, (UINT64)0x4030000LL, 28, 17, &jerk0by2_0, 28, 17);
    pg_delay(jerk0by2_0, &jerk0by2D8_0);
    pg_mul_float(dt2, 28, 17, jerk0by2D8_0, 28, 17, &node652_0, 28, 17);
    pg_delay(node652_0, &node652D3_0);
    pg_add_float_d6(node653_0, 28, 17, node652D3_0, 28, 17, &vj1_0, 28, 17);
    pg_sub_float_d6(vj1_0, 28, 17, vi_0, 28, 17, &node1327_0, 28, 17);
    pg_conv_float_to_float(node1327_0, 28, 17, &dv_0, 28, 17);
    pg_delay(dv_0, &dvD38_0);
    pg_mul_float(mjr3inv, 28, 17, dvD38_0, 28, 17, &j2_0, 28, 17);
    pg_delay(j2_0, &j2D3_0);
    pg_delay(dv_0, &dvD2_0);
    pg_conv_float_to_float(dx_0, 34, 23, &node1474_0, 28, 17);
    pg_mul_float(dvD2_0, 28, 17, node1474_0, 28, 17, &node1475, 28, 17);
    pg_delay(vj_1, &vjD11_1);
    pg_shift_float(acc0by2_1, 28, 17, (UINT64)0x1LL, 10, 0, &acc0_1, 28, 17, 1);
    pg_delay(acc0_1, &acc0D7_1);
    pg_mul_float(dt, 28, 17, acc0D7_1, 28, 17, &node651_1, 28, 17);
    pg_add_float_d6(vjD11_1, 28, 17, node651_1, 28, 17, &node653_1, 28, 17);
    pg_mul_float(jerk0by6_1, 28, 17, (UINT64)0x4030000LL, 28, 17, &jerk0by2_1, 28, 17);
    pg_delay(jerk0by2_1, &jerk0by2D8_1);
    pg_mul_float(dt2, 28, 17, jerk0by2D8_1, 28, 17, &node652_1, 28, 17);
    pg_delay(node652_1, &node652D3_1);
    pg_add_float_d6(node653_1, 28, 17, node652D3_1, 28, 17, &vj1_1, 28, 17);
    pg_sub_float_d6(vj1_1, 28, 17, vi_1, 28, 17, &node1327_1, 28, 17);
    pg_conv_float_to_float(node1327_1, 28, 17, &dv_1, 28, 17);
    pg_delay(dv_1, &dvD2_1);
    pg_conv_float_to_float(dx_1, 34, 23, &node1474_1, 28, 17);
    pg_mul_float(dvD2_1, 28, 17, node1474_1, 28, 17, &node1476, 28, 17);
    pg_add_float_d6(node1475, 28, 17, node1476, 28, 17, &node1478, 28, 17);
    pg_delay(vj_2, &vjD11_2);
    pg_shift_float(acc0by2_2, 28, 17, (UINT64)0x1LL, 10, 0, &acc0_2, 28, 17, 1);
    pg_delay(acc0_2, &acc0D7_2);
    pg_mul_float(dt, 28, 17, acc0D7_2, 28, 17, &node651_2, 28, 17);
    pg_add_float_d6(vjD11_2, 28, 17, node651_2, 28, 17, &node653_2, 28, 17);
    pg_mul_float(jerk0by6_2, 28, 17, (UINT64)0x4030000LL, 28, 17, &jerk0by2_2, 28, 17);
    pg_delay(jerk0by2_2, &jerk0by2D8_2);
    pg_mul_float(dt2, 28, 17, jerk0by2D8_2, 28, 17, &node652_2, 28, 17);
    pg_delay(node652_2, &node652D3_2);
    pg_add_float_d6(node653_2, 28, 17, node652D3_2, 28, 17, &vj1_2, 28, 17);
    pg_sub_float_d6(vj1_2, 28, 17, vi_2, 28, 17, &node1327_2, 28, 17);
    pg_conv_float_to_float(node1327_2, 28, 17, &dv_2, 28, 17);
    pg_delay(dv_2, &dvD2_2);
    pg_conv_float_to_float(dx_2, 34, 23, &node1474_2, 28, 17);
    pg_mul_float(dvD2_2, 28, 17, node1474_2, 28, 17, &node1477, 28, 17);
    pg_delay(node1477, &node1477D6);
    pg_add_float_d6(node1478, 28, 17, node1477D6, 28, 17, &s, 28, 17);
    pg_delay(s, &sD18);
    pg_conv_float_to_float(mass, 34, 23, &node1493, 28, 17);
    pg_delay(node1493, &node1493D59);
    pg_conv_float_to_float(r5inv, 34, 23, &node1494, 28, 17);
    pg_mul_float(node1493D59, 28, 17, node1494, 28, 17, &mjr5inv, 28, 17);
    pg_mul_float(sD18, 28, 17, mjr5inv, 28, 17, &node1526, 28, 17);
    pg_conv_float_to_float(dx_0, 34, 23, &node1525_0, 28, 17);
    pg_delay(node1525_0, &node1525D36_0);
    pg_mul_float(node1526, 28, 17, node1525D36_0, 28, 17, &j1_0, 28, 17);
    pg_mul_float(j1_0, 28, 17, (UINT64)0x4030000LL, 28, 17, &node1960_0, 28, 17);
    pg_sub_float_d6(j2D3_0, 28, 17, node1960_0, 28, 17, &node1961_0, 28, 17);
    pg_shift_float(node1961_0, 28, 17, (UINT64)0x20LL, 10, 0, &node1963_0, 28, 17, 1);
    pg_conv_float_to_int(node1963_0, 28, 17, &node1964_0, 48, 0);
    pg_inc_int(node1964_0, 48, 0, jerk_0p, 48, 0, run_begin);

    // jerk_1
    pg_delay(dv_1, &dvD38_1);
    pg_mul_float(mjr3inv, 28, 17, dvD38_1, 28, 17, &j2_1, 28, 17);
    pg_delay(j2_1, &j2D3_1);
    pg_conv_float_to_float(dx_1, 34, 23, &node1525_1, 28, 17);
    pg_delay(node1525_1, &node1525D36_1);
    pg_mul_float(node1526, 28, 17, node1525D36_1, 28, 17, &j1_1, 28, 17);
    pg_mul_float(j1_1, 28, 17, (UINT64)0x4030000LL, 28, 17, &node1960_1, 28, 17);
    pg_sub_float_d6(j2D3_1, 28, 17, node1960_1, 28, 17, &node1961_1, 28, 17);
    pg_shift_float(node1961_1, 28, 17, (UINT64)0x20LL, 10, 0, &node1963_1, 28, 17, 1);
    pg_conv_float_to_int(node1963_1, 28, 17, &node1964_1, 48, 0);
    pg_inc_int(node1964_1, 48, 0, jerk_1p, 48, 0, run_begin);

    // jerk_2
    pg_delay(dv_2, &dvD38_2);
    pg_mul_float(mjr3inv, 28, 17, dvD38_2, 28, 17, &j2_2, 28, 17);
    pg_delay(j2_2, &j2D3_2);
    pg_conv_float_to_float(dx_2, 34, 23, &node1525_2, 28, 17);
    pg_delay(node1525_2, &node1525D36_2);
    pg_mul_float(node1526, 28, 17, node1525D36_2, 28, 17, &j1_2, 28, 17);
    pg_mul_float(j1_2, 28, 17, (UINT64)0x4030000LL, 28, 17, &node1960_2, 28, 17);
    pg_sub_float_d6(j2D3_2, 28, 17, node1960_2, 28, 17, &node1961_2, 28, 17);
    pg_shift_float(node1961_2, 28, 17, (UINT64)0x20LL, 10, 0, &node1963_2, 28, 17, 1);
    pg_conv_float_to_int(node1963_2, 28, 17, &node1964_2, 48, 0);
    pg_inc_int(node1964_2, 48, 0, jerk_2p, 48, 0, run_begin);


}

#ifdef MAINFUNC

typedef struct {
    void (*func)();
    char *usage;
} TestMode;

static void test_pipeline(void);
static void generate_testvector(void);

static TestMode testmode[] = {
    test_pipeline, "test pipeline emulator.",
    generate_testvector, "generate .tbl file taking input test vector from stdin",
};

static void
showusage(char *programname)
{
    int i;
    int nitems = sizeof(testmode)/sizeof(testmode[0]);

    fprintf(stderr, "usage: %s <test_program_ID> [warning_level]\n", programname);
    for (i = 0; i < nitems; i++) {
	fprintf(stderr, "  %2d) %s\n", i, testmode[i].usage);
    }
}

static void
set_warn_level(int level)
{
    warn_level = level;
}

static void
test_pipeline(void)
{
    UINT64 ti;
    UINT64 vi_0;
    UINT64 vi_1;
    UINT64 vi_2;
    UINT64 xi_0;
    UINT64 xi_1;
    UINT64 xi_2;
    UINT64 indexi;
    UINT64 epsi2;
    UINT64 jerk0by6_0;
    UINT64 jerk0by6_1;
    UINT64 jerk0by6_2;
    UINT64 mj;
    UINT64 xj_0;
    UINT64 vj_0;
    UINT64 acc0by2_0;
    UINT64 xj_1;
    UINT64 vj_1;
    UINT64 acc0by2_1;
    UINT64 xj_2;
    UINT64 vj_2;
    UINT64 acc0by2_2;
    UINT64 indexj;
    UINT64 tj;
    UINT64 acc_0;
    UINT64 acc_1;
    UINT64 acc_2;
    UINT64 jerk_0;
    UINT64 jerk_1;
    UINT64 jerk_2;

    printf("input ti vi_0 vi_1 vi_2 xi_0 xi_1 xi_2 indexi epsi2 jerk0by6_0 jerk0by6_1 jerk0by6_2 mj xj_0 vj_0 acc0by2_0 xj_1 vj_1 acc0by2_1 xj_2 vj_2 acc0by2_2 indexj tj : \n");

    scanf(" 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx",
        &ti, &vi_0, &vi_1, &vi_2, &xi_0, &xi_1, &xi_2, &indexi, &epsi2, &jerk0by6_0, &jerk0by6_1, &jerk0by6_2, &mj, &xj_0, &vj_0, &acc0by2_0, &xj_1, &vj_1, &acc0by2_1, &xj_2, &vj_2, &acc0by2_2, &indexj, &tj);

    printf("inputs:\n"
        "          ti: 0x%016llx\n"
        "        vi_0: 0x%016llx\n"
        "        vi_1: 0x%016llx\n"
        "        vi_2: 0x%016llx\n"
        "        xi_0: 0x%016llx\n"
        "        xi_1: 0x%016llx\n"
        "        xi_2: 0x%016llx\n"
        "      indexi: 0x%016llx\n"
        "       epsi2: 0x%016llx\n"
        "    jerk0by6_0: 0x%016llx\n"
        "    jerk0by6_1: 0x%016llx\n"
        "    jerk0by6_2: 0x%016llx\n"
        "          mj: 0x%016llx\n"
        "        xj_0: 0x%016llx\n"
        "        vj_0: 0x%016llx\n"
        "    acc0by2_0: 0x%016llx\n"
        "        xj_1: 0x%016llx\n"
        "        vj_1: 0x%016llx\n"
        "    acc0by2_1: 0x%016llx\n"
        "        xj_2: 0x%016llx\n"
        "        vj_2: 0x%016llx\n"
        "    acc0by2_2: 0x%016llx\n"
        "      indexj: 0x%016llx\n"
        "          tj: 0x%016llx\n"
        , ti, vi_0, vi_1, vi_2, xi_0, xi_1, xi_2, indexi, epsi2, jerk0by6_0, jerk0by6_1, jerk0by6_2, mj, xj_0, vj_0, acc0by2_0, xj_1, vj_1, acc0by2_1, xj_2, vj_2, acc0by2_2, indexj, tj);

    pipeline(    ti, 
        vi_0, vi_1, vi_2, xi_0, xi_1, xi_2, indexi, epsi2, 
        jerk0by6_0, jerk0by6_1, jerk0by6_2, mj, xj_0, vj_0, acc0by2_0, xj_1, vj_1, acc0by2_1, xj_2, vj_2, acc0by2_2, indexj, tj, 
        &acc_0, &acc_1, &acc_2, &jerk_0, &jerk_1, &jerk_2, 1);


    printf("outputs:\n"
        "       acc_0: 0x%016llx\n"
        "       acc_1: 0x%016llx\n"
        "       acc_2: 0x%016llx\n"
        "      jerk_0: 0x%016llx\n"
        "      jerk_1: 0x%016llx\n"
        "      jerk_2: 0x%016llx\n"
        , acc_0, acc_1, acc_2, jerk_0, jerk_1, jerk_2);

}


static int
ndigit(int nbit)
{
    return (nbit - 1) / 4 + 1;
}

static void
print_X(int n)
{
    int i;
    for (i = 0; i < n; i++) {
        printf("X");
    }
}

#define NVEC (1024)
static void
generate_testvector(void)
{
    int i, nvec, run_begin, nchar;
    int delay = 85;
    static char buf[2046];
    static char fmt[2046];
    static int run[NVEC];
    static UINT64 ti[NVEC], jerk0by6_0[NVEC], jerk0by6_1[NVEC], jerk0by6_2[NVEC], mj[NVEC], xj_0[NVEC], vj_0[NVEC], acc0by2_0[NVEC], xj_1[NVEC], vj_1[NVEC], acc0by2_1[NVEC], xj_2[NVEC], vj_2[NVEC], acc0by2_2[NVEC], indexj[NVEC], tj[NVEC], vi_0[NVEC], vi_1[NVEC], vi_2[NVEC], xi_0[NVEC], xi_1[NVEC], xi_2[NVEC], indexi[NVEC], epsi2[NVEC], acc_0[NVEC], acc_1[NVEC], acc_2[NVEC], jerk_0[NVEC], jerk_1[NVEC], jerk_2[NVEC];

    nvec = 0;
    while (!feof(stdin) && nvec + delay < NVEC) {
        fgets(buf, sizeof(buf), stdin);
        sscanf(buf, "%d " UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " "  UINT64XFMT " " ,
               &run[nvec], &ti[nvec], &jerk0by6_0[nvec], &jerk0by6_1[nvec], &jerk0by6_2[nvec], &mj[nvec], &xj_0[nvec], &vj_0[nvec], &acc0by2_0[nvec], &xj_1[nvec], &vj_1[nvec], &acc0by2_1[nvec], &xj_2[nvec], &vj_2[nvec], &acc0by2_2[nvec], &indexj[nvec], &tj[nvec], &vi_0[nvec], &vi_1[nvec], &vi_2[nvec], &xi_0[nvec], &xi_1[nvec], &xi_2[nvec], &indexi[nvec], &epsi2[nvec]);

        if (nvec == 0) { // do not set output of at 1st clk.
            nvec++;
            continue;
        }

        if (run[nvec] == 1) { // run the pipeline.
            if (run[nvec - 1] == 0) { // rising edge of run.
                run_begin = 1;
            }
            else {
                run_begin = 0;
            }
            pipeline(ti[nvec], 
                     jerk0by6_0[nvec], jerk0by6_1[nvec], jerk0by6_2[nvec], mj[nvec], xj_0[nvec], vj_0[nvec], acc0by2_0[nvec], xj_1[nvec], vj_1[nvec], acc0by2_1[nvec], xj_2[nvec], vj_2[nvec], acc0by2_2[nvec], indexj[nvec], tj[nvec], 
                     vi_0[nvec], vi_1[nvec], vi_2[nvec], xi_0[nvec], xi_1[nvec], xi_2[nvec], indexi[nvec], epsi2[nvec], 
                     &acc_0[nvec], &acc_1[nvec], &acc_2[nvec], &jerk_0[nvec], &jerk_1[nvec], &jerk_2[nvec], 
                     run_begin);
        }
        acc_0[nvec + 1]  = acc_0[nvec];
        acc_1[nvec + 1]  = acc_1[nvec];
        acc_2[nvec + 1]  = acc_2[nvec];
        jerk_0[nvec + 1]  = jerk_0[nvec];
        jerk_1[nvec + 1]  = jerk_1[nvec];
        jerk_2[nvec + 1]  = jerk_2[nvec];

        nvec++;
    }

    printf("# run ");

    sprintf(fmt, "%%-%ds ", ndigit(63));
    printf(fmt, "ti");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "jerk0by6_0");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "jerk0by6_1");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "jerk0by6_2");
    sprintf(fmt, "%%-%ds ", ndigit(34));
    printf(fmt, "mj");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xj_0");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vj_0");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "acc0by2_0");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xj_1");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vj_1");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "acc0by2_1");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xj_2");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vj_2");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "acc0by2_2");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "indexj");
    sprintf(fmt, "%%-%ds ", ndigit(63));
    printf(fmt, "tj");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vi_0");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vi_1");
    sprintf(fmt, "%%-%ds ", ndigit(28));
    printf(fmt, "vi_2");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xi_0");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xi_1");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "xi_2");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "indexi");
    sprintf(fmt, "%%-%ds ", ndigit(34));
    printf(fmt, "epsi2");
    printf(" = ");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "acc_0");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "acc_1");
    sprintf(fmt, "%%-%ds ", ndigit(64));
    printf(fmt, "acc_2");
    sprintf(fmt, "%%-%ds ", ndigit(48));
    printf(fmt, "jerk_0");
    sprintf(fmt, "%%-%ds ", ndigit(48));
    printf(fmt, "jerk_1");
    sprintf(fmt, "%%-%ds ", ndigit(48));
    printf(fmt, "jerk_2");

    printf("\n");

    for (i = 0; i < nvec; i++) {
        printf("  %d   ", run[i]);

        sprintf(fmt, "%%0%dllx ", ndigit(63));
        printf(fmt, ti[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, jerk0by6_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, jerk0by6_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, jerk0by6_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(34));
        printf(fmt, mj[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xj_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vj_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, acc0by2_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xj_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vj_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, acc0by2_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xj_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vj_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, acc0by2_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, indexj[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(63));
        printf(fmt, tj[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vi_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vi_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(28));
        printf(fmt, vi_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xi_0[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xi_1[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, xi_2[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(64));
        printf(fmt, indexi[i]);
        sprintf(fmt, "%%0%dllx ", ndigit(34));
        printf(fmt, epsi2[i]);
        
        printf(" = ");
        
        if (i < delay) {
            print_X(ndigit(64));
            printf(" ");
            print_X(ndigit(64));
            printf(" ");
            print_X(ndigit(64));
            printf(" ");
            print_X(ndigit(48));
            printf(" ");
            print_X(ndigit(48));
            printf(" ");
            print_X(ndigit(48));
            printf(" ");
        }
        else {
            sprintf(fmt, "%%0%dllx ", ndigit(64));
            printf(fmt, acc_0[i - delay]);
            sprintf(fmt, "%%0%dllx ", ndigit(64));
            printf(fmt, acc_1[i - delay]);
            sprintf(fmt, "%%0%dllx ", ndigit(64));
            printf(fmt, acc_2[i - delay]);
            jerk_0[i - delay] &= ((UINT64)1 << 48) - 1;
            sprintf(fmt, "%%0%dllx ", ndigit(48));
            printf(fmt, jerk_0[i - delay]);
            jerk_1[i - delay] &= ((UINT64)1 << 48) - 1;
            sprintf(fmt, "%%0%dllx ", ndigit(48));
            printf(fmt, jerk_1[i - delay]);
            jerk_2[i - delay] &= ((UINT64)1 << 48) - 1;
            sprintf(fmt, "%%0%dllx ", ndigit(48));
            printf(fmt, jerk_2[i - delay]);
        }
        printf("\n");

    }
}

int
main(int argc, char **argv)
{
    int mode;
    int wlevel = 0;

    if (argc < 2) {
	showusage(argv[0]);
	exit (1);
    }

    mode = atoi(argv[1]);
    if (mode < 0 ||  sizeof(testmode)/sizeof(testmode[0]) <= mode) {
	showusage(argv[0]);
	exit (1);
    }

    if (argc > 2) {
        wlevel = atoi(argv[2]);
    }
    set_warn_level(wlevel);

    testmode[mode].func();

    exit (0);
}

#endif // MAINFUNC
