#if defined(_WIN32)

#pragma warning(disable:4200)  //
#pragma warning(disable:4201)  // nameless struct/union
#pragma warning(disable:4214)  // bit field types other than int
#pragma warning(default:4200)
#pragma warning(default:4201)
#pragma warning(default:4214)

#include <DriverSpecs.h>
__user_code 
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "devioctl.h"
#include "strsafe.h"
#include <setupapi.h>
#include <basetyps.h>
#include "hibdrv.h"
#include "hibutil.h"

#else // !_WIN32  i.e., for normal OSes.

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <dirent.h>
#include "hibdrv.h"
#include "hibutil.h"

#endif // _WIN32

/* global variable(s) */
Hiberr hiberr;

/* HIB local definition(s) & variable(s) */
static Hib hib[NHIBMAX];

/*
 * Hardware dependent parameters for GRAPE-DR with KFCR GPCIe.
 */
#if defined(_WIN32)
static HibResource hibResourceGDRG;
#else // !_WIN32

static HibResource hibResourceGDRG  = {

    // buffer and memory space sizes.
    .dmabuf_bytes       = HIB_DMABUF_BYTES,
    .local_mem_bytes    = 32 * 1024,  // BAR0
    .backend_mem_bytes  =  4 * 1024,  // BAR1
    .piowbuf_bytes      = 32 * 1024,  // BAR2

    /*
     * register map of the local register mapped to BAR0.
     *
     * DMA channel0 for read (host->hib),
     * DMA channel1 for write (host<-hib).
     *
     * register map:
     *
     * 000h DMA0  lower 32bits of start address in PCI space.
     * 004h DMA0  higer 32bits of start address in PCI space.
     * 008h DMA0  start address in hib local space.
     * 00ch DMA1  transfer size in byte.
     * 010h DMA0  command/parameter
     *              0      : 0:fifo mode  1:ram mode
     *              1      : not used.
     *              3:2    : read latency. 00:1clk  01:2clk  10:3clk  11:4clk
     *              7:4    : be for DW transfer. not used for burst transfer.
     *              8      : direction. '0' for MemRd burst. '1' for MemWr burst.
     *              10:9   : not used.
     *              13:11  : TC. tied to "000".
     *              15:14  : ATTR. tied to "00".
     *              16     : kick off a transfer.
     *              17     : abort current transfer.
     * 014h DMA0  status
     *              0      : asserted when current transfer is done.
     * 040h DMA1  lower 32bits of start address in PCI space.
     * 044h DMA1  higer 32bits of start address in PCI space.
     * 048h DMA1  start address in hib local space.
     * 04ch DMA1  transfer size in byte, write to this register starts a new transfer.
     * 050h DMA1  command/parameter
     * 054h DMA1  status
     *
     * 120h command
     *                   31 : reset backend.                           (wo)
     *                   26 : piow buf1 is write locked.               (ro)
     *                   25 : piow buf0 is write locked.               (ro)
     *                   24 : swap the two piow buffers.               (wo)
     *     PIOWBUF_DEPTH..0 : piow buf0/1 write count (in byte unit).  (r/w)
     *
     * 128h bord function test mode, piow done.
     * 130h KFCR board info
     * 138h ASMI
     * 140h PLLCONF
     * 148h Remote Update
     */
    .dma0lsb     = 0x008,
    .dma0msb     = 0x004,
    .dma0laddr   = 0x000,
    .dma0size    = 0x00c,
    .dma0cmd     = 0x010,
    .dma0misc    = 0x014,

    .dma1lsb     = 0x048,
    .dma1msb     = 0x044,
    .dma1laddr   = 0x040,
    .dma1size    = 0x04c,
    .dma1cmd     = 0x050,
    .dma1misc    = 0x054,

    .command     = 0x120,
    .testmode    = 0x128,
    .boardinfo   = 0x130,
    .asmi        = 0x138,
    .pllconf     = 0x140,
    .rupdate     = 0x148,
    
    /*
     * HIB local register bit offset
     */
    .dma0cmd_kickoff           = 0x00000001, // write this value to start DMA.
    .dma0cmd_abort_bit         =  1,
    .dma0misc_done_bit         = 13,

    .dma1cmd_kickoff           = 0x00000001,
    .dma1cmd_abort_bit         =  1,
    .dma1misc_done_bit         = 13,

    .command_reset_backend_bit = 31,
    .command_sram1_wlock_bit   = 26,
    .command_sram0_wlock_bit   = 25,

    .dma0misc_swap_sram_bit    =  0,   /* not used */
    .dma0misc_sram_wcnt_bit    =  0,   /* not used */
    .command_dma_reset_bit     =  0,   /* not used */
    .command_swap_sram_bit     = 24,
    .command_sram_wcnt_bit     =  0,
    
    .asmi_error_bit            =  1,
    .asmi_busy_bit             =  0,
    .asmi_cmd_rsid             =  0xfffffd00,
    .asmi_cmd_wp               =  0xfffffe00,
    .asmi_cmd_eb               =  0xffffff00,

    .pllconf_write_bit         =  0,
    .pllconf_reconfig_bit      =  2,
    .pllconf_busy_bit          =  0,

};

/*
 * Hardware dependent parameters for GRAPE-DR with PLDA PCIe-Lite.
 */
static HibResource hibResourceGDRP  = {

    // buffer and memory space sizes.
    .dmabuf_bytes       = HIB_DMABUF_BYTES,
    .local_mem_bytes    = 32 * 1024,  // BAR0
    .backend_mem_bytes  =  4 * 1024,  // BAR1
    .piowbuf_bytes      = 32 * 1024,  // BAR2

    .dma0lsb     = 0x000,
    .dma0msb     = 0x004,
    .dma0laddr   = 0x000, // not used
    .dma0size    = 0x008,
    .dma0cmd     = 0x00c,
    .dma0misc    = 0x028,

    .dma1lsb     = 0x010,
    .dma1msb     = 0x014,
    .dma1laddr   = 0x000, // not used
    .dma1size    = 0x018,
    .dma1cmd     = 0x01c,
    .dma1misc    = 0x02c,

    .command     = 0x20,
    .testmode    = 0x24,
    .boardinfo   = 0x30,
    .asmi        = 0x38,
    .pllconf     = 0x40,
    
    /*
     * HIB local register bit offset
     */
    .dma0cmd_kickoff           = 0x00000001, // write this value to start DMA.
    .dma0misc_done_bit         = 13,
    .dma0misc_swap_sram_bit    = 15,
    .dma0misc_sram_wcnt_bit    = 16,

    .dma1cmd_kickoff           = 0x00000001,
    .dma1misc_done_bit         = 13,
    .command_reset_backend_bit = 30,
    .command_dma_reset_bit     = 31,

    .asmi_error_bit            =  1,
    .asmi_busy_bit             =  0,
    .asmi_cmd_rsid             =  0xfffffd00,
    .asmi_cmd_wp               =  0xfffffe00,
    .asmi_cmd_eb               =  0xffffff00,

    .pllconf_write_bit         =  0,
    .pllconf_reconfig_bit      =  2,
    .pllconf_busy_bit          =  0,

    .dma1cmd_abort_bit         =  0, // not used
    .dma0cmd_abort_bit         =  0, // not used
    .command_sram1_wlock_bit   =  0, // not used
    .command_sram0_wlock_bit   =  0, // not used
    .command_swap_sram_bit     =  0, // not used
    .command_sram_wcnt_bit     =  0, // not used

};

/*
 * Hardware dependent parameters for GRAPE-7 with PLDA PCI-X.
 */
static HibResource hibResourceG7X  = {

    // buffer and memory space sizes.
    .dmabuf_bytes       = HIB_DMABUF_BYTES,
    .piowbuf_bytes      = 4096,
    .local_mem_bytes    = 8192,
    .backend_mem_bytes  = 8192,

    .dma0lsb     = 0x000,
    .dma0msb     = 0x004,
    .dma0laddr   = 0x000, // not used
    .dma0size    = 0x008,
    .dma0cmd     = 0x00c,
    .dma0misc    = 0x028,

    .dma1lsb     = 0x010,
    .dma1msb     = 0x014,
    .dma1laddr   = 0x000, // not used
    .dma1size    = 0x018,
    .dma1cmd     = 0x01c,
    .dma1misc    = 0x02c,

    .command     = 0x20,
    .testmode    = 0x24,
    .boardinfo   = 0x30,
    .asmi        = 0x38,
    .pllconf     = 0x40,
    
    /*
     * HIB local register bit offset
     */
    .dma0cmd_kickoff           = 0x000000e6, // write this value to start DMA.
    .dma0misc_done_bit         = 28,
    .dma0misc_swap_sram_bit    = 27,
    .dma0misc_sram_wcnt_bit    = 12,

    .dma1cmd_kickoff           = 0x000000f6,
    .dma1misc_done_bit         = 29,
    .command_reset_backend_bit = 30,
    .command_dma_reset_bit     = 31,

    .asmi_error_bit            =  1,
    .asmi_busy_bit             =  0,
    .asmi_cmd_rsid             =  0xfffffd00,
    .asmi_cmd_wp               =  0xfffffe00,
    .asmi_cmd_eb               =  0xffffff00,

    .pllconf_write_bit         =  0,
    .pllconf_reconfig_bit      =  2,
    .pllconf_busy_bit          =  0,

    .dma1cmd_abort_bit         =  0, // not used
    .dma0cmd_abort_bit         =  0, // not used
    .command_sram1_wlock_bit   =  0, // not used
    .command_sram0_wlock_bit   =  0, // not used
    .command_swap_sram_bit     =  0, // not used
    .command_sram_wcnt_bit     =  0, // not used

};
#endif // _WIN32

/* HIB local function(s) */
static Hib* sub_openMC(int devid);
static int identify_hibtype(int devid);
static void init_envs(int devid);
static void lock_hib(int devid);
static int try_lock_hib(int devid);
static void unlock_hib(int devid);
static unsigned int roundup_to_page(unsigned int size);
static void TBmemMap(int devid);
static void TBgetDmaInfo(int devid,
			 unsigned long *dmarpa, UINT64 **dmarva,
			 unsigned long *dmawpa, UINT64 **dmawva);
static int devname_filterfunc(const struct dirent *entry);
static int count_devfile(char *devname);
static void sub_set_test_modeMC(int devid, int mode);

#if defined(_WIN32)
static void usleep(int t);
static HANDLE winOpenDevice(int devid);
static void winMunmap(int devid, int barid);
static BOOL winGetDevicePath(int devid, IN  LPGUID InterfaceGuid,
                             __out_ecount(bufLen) PCHAR devicePath, __in size_t bufLen);
#define sleep(t) Sleep(1000.0 * (t));
#define WARN(lv, fmt, ...) if (lv <= warn_level) fprintf(stderr, fmt, __VA_ARGS__);
#else // _WIN32
#define WARN(lv, fmt, args...) if (lv <= warn_level) fprintf(stderr, fmt, ## args);
#endif // _WIN32

static int warn_level = 2; /* warning message output level. the higher the more verbose.
                              0: no warning (may cause wrong result with ../scripts/check.csh)
                              1: minimum
                              2: default
                              3: for debugging purpose
                           */

int
hib_ndevice(void)
{
    static nhib = 0;

    if (nhib != 0) {
        return nhib;
    }

    // the very first call.
    // count up device files, i.e., /dev/hibdrv[0..NHIBMAX-1].
    nhib = count_devfile(HIB_DEVNAME);
    if (nhib <= 0) {
        WARN(1, "No hib found.\n");
        exit(2);
    }
    else if (nhib == 1) {
        WARN(3, "1 hib found.\n");
    }
    else if (nhib <= NHIBMAX) {
        WARN(3, "%d hibs found.\n", nhib);
    }
    else {
        WARN(1, "Too many hibs (%d) found.\n", nhib);
        exit(2);
    }
    return nhib;
}

void
hib_set_warn_level(int lv)
{
    warn_level = lv;
}

int
hib_recvMC(int devid, int size, UINT64 *buf)
{
    return hib[devid].recvMCfunc(devid, size, buf);
}

/*
 * open HIB
 * returns a ponter to the Hib object opened.
 */
Hib*
hib_openMC(int devid)
{
    init_envs(devid);

#if defined(_WIN32)

    hib[devid].hDev = winOpenDevice(devid);
    if (!hib[devid].hDev) {
        WARN(1, "Someone is using hib[%d]. Sleep...\n", devid);
    }
    while (!hib[devid].hDev) {
        sleep(1);
        WARN(1, ".");
        hib[devid].hDev = winOpenDevice(devid);
    }

#else // !_WIN32

    hib[devid].fd = open(hib[devid].devname, O_RDWR);
    if (hib[devid].fd < 0) {
	perror(hib[devid].devname);
	exit(2);
    }

    lock_hib(devid);

#endif // _WIN32

    return sub_openMC(devid);
}

/*
 * try to open HIB.
 * mostly the same as hib_openMC(), but this version does not block
 * when the device is already opend by another process.
 * returns a ponter to the Hib object if open succeeded, otherwise returns NULL.
 */
Hib*
hib_try_openMC(int devid)
{
    init_envs(devid);

#if defined(_WIN32)

    hib[devid].hDev = winOpenDevice(devid);
    if (!hib[devid].hDev) {
        WARN(3, "Someone is using hib[%d]. Sleep...\n", devid);
        return NULL;
    }

#else // !_WIN32

    hib[devid].fd = open(hib[devid].devname, O_RDWR);
    if (hib[devid].fd < 0) {
	perror(hib[devid].devname);
	exit(2);
    }

    if (try_lock_hib(devid) < 0) { // someone is using this device. giving up.
        return NULL;
    }

#endif // _WIN32

    return sub_openMC(devid);
}


/*
 * read configuration register 00h to identify HIB type.
 * return HIB_TYPE if identified. exit with 1 otherwise.
 */
static int
identify_hibtype(int devid)
{
#if defined(_WIN32)
    return HIB_GRAPEDRG;
#else // !_WIN32
    int i;
    UINT32 did_vid = hib_config_readMC(devid, 0);
    int ntype = sizeof(hibids) / sizeof(HibId);
    UINT32 did = did_vid >> 16;
    UINT32 vid = did_vid & 0x0000ffff;

    for (i = 0; i < ntype; i++) {
        if (vid == hibids[i].vendorid && did == hibids[i].deviceid) {
            return i;
        }
    }
    fprintf(stderr, "non-supported device. vendor id: 0x%04x  device id: 0x%04x\n", vid, did);
    exit(1);
#endif // _WIN32
}

static Hib*
sub_openMC(int devid)
{
#if defined(_WIN32)
    hib[devid].type = identify_hibtype(devid);

    // buffer and memory space sizes.
    hibResourceGDRG.dmabuf_bytes       = HIB_DMABUF_BYTES;
        
    hibResourceGDRG.piowbuf_bytes      = 32 * 1024,  // BAR0
    hibResourceGDRG.local_mem_bytes    =  4 * 1024,  // BAR1
    hibResourceGDRG.backend_mem_bytes  = 32 * 1024,  // BAR2

    hibResourceGDRG.dma0lsb     = 0x008;
    hibResourceGDRG.dma0msb     = 0x004;
    hibResourceGDRG.dma0laddr   = 0x000;
    hibResourceGDRG.dma0size    = 0x00c;
    hibResourceGDRG.dma0cmd     = 0x010;
    hibResourceGDRG.dma0misc    = 0x014;

    hibResourceGDRG.dma1lsb     = 0x048;
    hibResourceGDRG.dma1msb     = 0x044;
    hibResourceGDRG.dma1laddr   = 0x040;
    hibResourceGDRG.dma1size    = 0x04c;
    hibResourceGDRG.dma1cmd     = 0x050;
    hibResourceGDRG.dma1misc    = 0x054;

    hibResourceGDRG.command     = 0x120;
    hibResourceGDRG.testmode    = 0x128;
    hibResourceGDRG.boardinfo   = 0x130;
    hibResourceGDRG.asmi        = 0x138;
    hibResourceGDRG.pllconf     = 0x140;
    hibResourceGDRG.rupdate     = 0x148;
    
    /*
     * HIB local register bit offset
     */
    hibResourceGDRG.dma0cmd_kickoff           = 0x00000001; // write this value to start DMA.
    hibResourceGDRG.dma0cmd_abort_bit         =  1;
    hibResourceGDRG.dma0misc_done_bit         = 13;

    hibResourceGDRG.dma1cmd_kickoff           = 0x00000001;
    hibResourceGDRG.dma1cmd_abort_bit         =  1;
    hibResourceGDRG.dma1misc_done_bit         = 13;

    hibResourceGDRG.command_reset_backend_bit = 31;
    hibResourceGDRG.command_sram1_wlock_bit   = 26;
    hibResourceGDRG.command_sram0_wlock_bit   = 25;

    hibResourceGDRG.dma0misc_swap_sram_bit    =  0;   /* not used */
    hibResourceGDRG.dma0misc_sram_wcnt_bit    =  0;   /* not used */
    hibResourceGDRG.command_dma_reset_bit     =  0;   /* not used */
    hibResourceGDRG.command_swap_sram_bit     = 24;
    hibResourceGDRG.command_sram_wcnt_bit     =  0;
    
    hibResourceGDRG.asmi_error_bit            =  1;
    hibResourceGDRG.asmi_busy_bit             =  0;
    hibResourceGDRG.asmi_cmd_rsid             =  0xfffffd00;
    hibResourceGDRG.asmi_cmd_wp               =  0xfffffe00;
    hibResourceGDRG.asmi_cmd_eb               =  0xffffff00;

    hibResourceGDRG.pllconf_write_bit         =  0;
    hibResourceGDRG.pllconf_reconfig_bit      =  2;
    hibResourceGDRG.pllconf_busy_bit          =  0;

    /*
     * set hardware-dependent parameters.
     */
    switch(hib[devid].type) {
      case HIB_GRAPEDRG:
        hib[devid].r = &hibResourceGDRG;
        break;

      default:
        fprintf(stderr, "hib_openMC: unknown HIB type.\n", hib[devid].type);
        exit(1);
    }

#else // !_WIN32
    hib[devid].write_cfg = HIB_WRITE_CFG;
    hib[devid].read_cfg = HIB_READ_CFG;
    hib[devid].get_dmaw_pa = HIB_GET_DMAW_PA;
    hib[devid].get_dmar_pa = HIB_GET_DMAR_PA;
    hib[devid].set_map_mode = HIB_SET_MAP_MODE;

    hib[devid].map_hibmem = HIB_MAP_HIBMEM;
    hib[devid].map_dmarbuf = HIB_MAP_DMARBUF;
    hib[devid].map_dmawbuf = HIB_MAP_DMAWBUF;
    hib[devid].map_backend = HIB_MAP_BACKEND;
    hib[devid].map_dblbuf = HIB_MAP_DBLBUF;
    hib[devid].type = identify_hibtype(devid);

    /*
     * set hardware-dependent parameters.
     */
    switch(hib[devid].type) {
      case HIB_GRAPEDRG:
        hib[devid].r = &hibResourceGDRG;
        break;

      case HIB_GRAPEDRP:
        hib[devid].r = &hibResourceGDRP;
        break;

      case HIB_GRAPE7X:
        hib[devid].r = &hibResourceG7X;
        break;

      default:
        fprintf(stderr, "hib_openMC: unknown HIB type.\n", hib[devid].type);
        exit(1);
    }
#endif // _WIN32

#if 0
#warning this version of hib_openMC does not initialize memory map nor does DMA engine.
#else

    TBmemMap(devid);

    /*
     * maps DMA buffers
     */ 
    TBgetDmaInfo(devid,
		 &(hib[devid].dmar_pa), &(hib[devid].dmar_buf),
		 &(hib[devid].dmaw_pa), &(hib[devid].dmaw_buf));
    /*
     * dmar/w_buf: virtual address of DMA read/write buffer
     * dmar/w_pa: its physical address
     */
    WARN(3, "hib[%d].dmar_buf: 0x%016lx pa: %016lx\n",
         devid, hib[devid].dmar_buf, hib[devid].dmar_pa);
    WARN(3, "hib[%d].dmaw_buf: 0x%016lx pa: %016lx\n",
         devid, hib[devid].dmaw_buf, hib[devid].dmaw_pa);

    /* initialize local registers */
    hib_mem_writeMC(devid, hib[devid].r->dma0lsb, hib[devid].dmar_pa); /* LSB of DMA0 start address in PCI space */
    hib_mem_writeMC(devid, hib[devid].r->dma1lsb, hib[devid].dmaw_pa); /* LSB of DMA1 start address in PCI space */
    hib_mem_writeMC(devid, hib[devid].r->dma0msb, 0x0);
    hib_mem_writeMC(devid, hib[devid].r->dma1msb, 0x0);
    if (hib[devid].type == HIB_GRAPEDRG) {
        hib_mem_writeMC(devid, hib[devid].r->dma0laddr, 0x0); /* start address in local space */
        hib_mem_writeMC(devid, hib[devid].r->dma1laddr, 0x0); /* start address in local space */
        hib_mem_writeMC(devid, hib[devid].r->dma0cmd, 0); // set default param
        hib_mem_writeMC(devid, hib[devid].r->dma1cmd, 0); // set default param
    }

    sub_set_test_modeMC(devid, TESTMODE_NONE);

    hib[devid].sendMCfunc = hib_dmarMC;
    hib[devid].recvMCfunc = hib_dmawMC;

#endif
    WARN(3, "hib[%d] opened.\n", devid);

    return &(hib[devid]);
}

/* close HIB */
void
hib_closeMC(int devid)
{
#if defined(_WIN32)

    /* when a mapping request is dispatched via ioctl, 
       old memory mapping is automatically unmapped.
       therefore, the lines below are not really necessary. */
    winMunmap(devid, 0);
    winMunmap(devid, 1);
    winMunmap(devid, 2);

    CloseHandle(hib[devid].hDev);
    hib[devid].hDev = NULL;
    hib[devid].fd = -1;

#else // !_WIN32

    unlock_hib(devid);

#if 0
#warning this version of hib_closeMC does not clean up memory map nor does DMA engine.
#else
    munmap((void *)hib[devid].local_reg,   roundup_to_page(hib[devid].r->local_mem_bytes));
    munmap((void *)hib[devid].backend,     roundup_to_page(hib[devid].r->backend_mem_bytes));
    munmap((void *)hib[devid].piow_dblbuf, roundup_to_page(hib[devid].r->piowbuf_bytes));
    munmap((void *)hib[devid].dmar_buf,    roundup_to_page(hib[devid].r->dmabuf_bytes));
    munmap((void *)hib[devid].dmaw_buf,    roundup_to_page(hib[devid].r->dmabuf_bytes));
#endif

    close(hib[devid].fd);
    hib[devid].fd = -1;

#endif // _WIN32

    WARN(3, "hib[%d] closed.\n", devid);
}

static void
sub_set_test_modeMC(int devid, int mode)
{
    hib_mem_writeMC(devid, hib[devid].r->testmode, mode);
}

void
hib_set_test_modeMC(int devid, int mode)
{
    // wait for a moment before switching the test mode,
    // so that preceding transfers, if any, safely complete.
    fflush(stdout);
    usleep(10);

    sub_set_test_modeMC(devid, mode);
}

void
hib_set_sendfuncMC(int devid, int func)
{
    switch (func) {
      case SENDFUNC_PIOW:
	  hib[devid].sendMCfunc = hib_piowMC;
	  break;
      case SENDFUNC_DMAR:
	  hib[devid].sendMCfunc = hib_dmarMC;
	  break;
      default:
	  break;
    }
}

void
hib_sendMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
  hib[devid].sendMCfunc(devid, size, buf);
}

#if !defined(_WIN32)

#define PREFETCH(mem)     asm volatile("prefetchnta %0"        : : "m"(mem))
#define LOADDQ(mem, reg)  asm volatile("movdqa %0, %"reg       : : "m"(mem));
#define STOREDQ(reg, mem) asm volatile("movntdq %"reg " , %0"  : : "m"(mem));

static void
streamwrite16(int size, UINT64 *src, UINT64 *dst) // size in 8-byte word
{
    int j;

    // args need to be 16-byte aligned.
    assert(((unsigned long)src & 0xf) == 0);
    assert(((unsigned long)dst & 0xf) == 0);

    for (j = 0; j < size; j += 2) {
        PREFETCH(src[j+2]);
        LOADDQ(src[j], "%xmm0");
        STOREDQ("%xmm0", dst[j]);
    }
}
#endif // _WIN32

void
hib_piowMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
    int i, j;
    int s = (hib[devid].r->piowbuf_bytes>>3);

    s = 128;

    for (i = 0; i < size; i += s) {

      if (i + s > size) {
	s = size - i;
      }

#if 1
      for (j = 0; j < s; j++) {
          hib[devid].piow_dblbuf[j] = buf[i + j];
      }
#else
      streamwrite16(s, buf + i, (UINT64 *)hib[devid].piow_dblbuf);
#endif


      if (hib[devid].type == HIB_GRAPEDRG) {
          hib_mem_writeMC(devid, hib[devid].r->command,
                          (1<<(hib[devid].r->command_swap_sram_bit)) |
                          ((s*8)<<(hib[devid].r->command_sram_wcnt_bit))); // s in byte unit.
          // asm volatile("mfence":::"memory");
      }
      else {
          hib_mem_writeMC(devid, hib[devid].r->dma0misc,
                          (1<<(hib[devid].r->dma0misc_swap_sram_bit)) |
                          (s<<(hib[devid].r->dma0misc_sram_wcnt_bit))); // s in 8-byte unit.
      }
    }
}

/* DMA read (host -> hib) using DMA0 channel */
void
hib_dmarMC(int devid, int size, UINT64 *buf) // size in 8-byte word
{
    hib_start_dmarMC(devid, size, buf);
    hib_finish_dmarMC(devid);
}

void
hib_start_dmarMC(int devid, int size, UINT64 *buf) /* size: transfer size in 8-byte word */
{
    long offset;

    if (size > (hib[devid].r->dmabuf_bytes>>3)) {
	fprintf(stderr, "hib_dmarMC(): size too large.\n");
	fprintf(stderr, "size: %d\n", size);
	exit(2);
    }

    offset = buf - hib[devid].dmar_buf; // offset in 8-byte word.
    if (offset < 0 || hib[devid].r->dmabuf_bytes < offset * 8) {
	fprintf(stderr, "hib_start_dmarMC: address offset (0x%016llx) out of range.\n", offset);
	exit(2);
    }

    if (hib[devid].type == HIB_GRAPEDRG) {
        // write two 32-bit registers by a single 64-bit write access.
        // write PCI lower 32-bit address register and transfer size register at once,
        // and then kick off a DMA transfer.
        hib_mem_write64MC(devid, hib[devid].r->dma0lsb,
                          ((UINT64)(size*8) << 32) | (hib[devid].dmar_pa + offset * 8));
    }
    else {
        hib_mem_writeMC(devid, hib[devid].r->dma0lsb, hib[devid].dmar_pa + offset * 8); // lower 32-bit of start address in PCI space.
        hib_mem_writeMC(devid, hib[devid].r->dma0msb, 0x0);                             // higher 32-bit of start address in PCI space.
        hib_mem_writeMC(devid, hib[devid].r->dma0size, size*8);                         // size in byte.
        hib_mem_writeMC(devid, hib[devid].r->dma0cmd, hib[devid].r->dma0cmd_kickoff);
    }
}

void
hib_finish_dmarMC(int devid)
{
    volatile unsigned long misc;

    /*
     * wait till the DMA transfer completes
     * dmadone(1:0) is mapped to DMADONE(1:0)
     */

    do {
	misc = hib_mem_readMC(devid, hib[devid].r->dma0misc);

    } while ((misc & (1<<hib[devid].r->dma0misc_done_bit)) == 0); /* wait until dmadone0 is asserted */
}

/* DMA write (host <- hib) using DMA1 channel */
int
hib_dmawMC(int devid, int size, UINT64 *buf) // size in 8-byte word
{
    hib_start_dmawMC(devid, size, buf);
    return hib_finish_dmawMC(devid);
}

/* start DMA write (host <- hib) using DMA1 channel */
#define NAN_AT_EOD (0x12345678ULL)
UINT64 *eobuf[NHIBMAX];

void
hib_start_dmawMC(int devid, int size, UINT64 *buf) // size in 8-byte word
{
    long offset;
    if (size > (hib[devid].r->dmabuf_bytes>>3)) {
        fprintf(stderr, "hib_dmawMC(): size too large.\n");
        fprintf(stderr, "size: %d\n", size);
        exit(2);
    }

    offset = buf - hib[devid].dmaw_buf; // offset in 8-byte word.

    if (offset < 0 || hib[devid].r->dmabuf_bytes < offset * 8) {
	fprintf(stderr, "hib_start_dmawMC: address offset (0x%016llx) out of range.\n", offset);
	exit(2);
    }

    // set a dummy data at the end of buffer.
    buf[size - 1] = NAN_AT_EOD;
    eobuf[devid] = &(buf[size - 1]);

    if (hib[devid].type == HIB_GRAPEDRG) {
        // write two 32-bit registers by a single 64-bit write access.
        // write PCI lower 32-bit address register and transfer size register at once,
        // and then kick off a DMA transfer.
        hib_mem_write64MC(devid, hib[devid].r->dma1lsb,
                          ((UINT64)(size*8) << 32) | (hib[devid].dmaw_pa + offset * 8));
    }
    else {
        hib_mem_writeMC(devid, hib[devid].r->dma1lsb, hib[devid].dmaw_pa + offset * 8); // lower 32-bit of start address in PCI space.
        hib_mem_writeMC(devid, hib[devid].r->dma1msb, 0x0);                             // higher 32-bit of start address in PCI space.
        hib_mem_writeMC(devid, hib[devid].r->dma1size, size*8);                         // size in byte.
        hib_mem_writeMC(devid, hib[devid].r->dma1cmd, hib[devid].r->dma1cmd_kickoff);
    }
}

/*
 * wait completion of the DMA write (host <- hib) transaction
 * kicked off by hib_start_dmawMC()
 */
int
hib_finish_dmawMC(int devid)
{
    volatile unsigned long misc;
    int ret = 0;

    /*
     * wait till the DMA transfer completes
     */
#if 0
    // wait done bit of DMA1MISC register is asserted.
    do {
        misc = hib_mem_readMC(devid, hib[devid].r->dma1misc);
        asm volatile("mfence":::"memory");
    } while ((misc & (1<<hib[devid].r->dma1misc_done_bit)) == 0); /* wait until dmadone1 is asserted */
#else
    // wait until a dummy data at the end of buffer is overwritten by a data received from hib.
    while (*eobuf[devid] == NAN_AT_EOD);
#endif

    return ret;
}

/*
 * check if dmar is done or not yet.
 */
int
hib_dmar_is_completedMC(int devid)
{
    volatile unsigned long misc;
    int ret = 0; /* -1 on error */

    misc = hib_mem_readMC(devid, hib[devid].r->dma0misc);
    if ((misc & (1<<hib[devid].r->dma0misc_done_bit)) == 0) {
        ret = 0;
    }
    else {
        ret = 1;
    }
    return ret;
}


/*
 * check if dmaw is done or not yet.
 */
int
hib_dmaw_is_completedMC(int devid)
{
    volatile unsigned long misc;
    int ret = 0; /* -1 on error */

    misc = hib_mem_readMC(devid, hib[devid].r->dma1misc);
    if ((misc & (1<<hib[devid].r->dma1misc_done_bit)) == 0) {
        ret = 0;
    }
    else {
        ret = 1;
    }
    return ret;
}

UINT32
hib_config_readMC(int devid, UINT32 addr)
{
#if defined(_WIN32)

    UINT success;
    int nBytesReturned;
    IOCTL_PACKET ip;
    HANDLE hDev = hib[devid].hDev;

    ip.addr = addr;
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_CONFIG_READ,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_PACKET),
                              &ip,                        // data to read from the device.
                              sizeof(IOCTL_PACKET),
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "hib_config_readMC failed %!STATUS\n", GetLastError());
    }
    return ip.data;

#else // !_WIN32

    struct long_access ca;
    ca.addr = addr;
    ca.data = 0;

    if (hib[devid].fd < 0) {
	fprintf(stderr, "open hib[%d] first\n", devid);
	exit (1);
    }
    ioctl(hib[devid].fd, hib[devid].read_cfg, &ca);
    return (ca.data);

#endif // _WIN32
}

void
hib_config_writeMC(int devid, UINT32 addr, UINT32 value)
{
#if defined(_WIN32)

    UINT success;
    int nBytesReturned;
    IOCTL_PACKET ip;
    HANDLE hDev = hib[devid].hDev;

    ip.addr = addr;
    ip.data = value;
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_CONFIG_WRITE,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_PACKET),
                              NULL,                       // data to read from the device.
                              NULL,
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "hib_config_writeMC failed %!STATUS\n", GetLastError());
    }

#else // !_WIN32

    struct long_access ca;
    ca.addr = addr;
    ca.data = value;

    if (hib[devid].fd < 0) {
	fprintf(stderr, "open hib[%d] first\n", devid);
	exit (1);
    }
    ioctl(hib[devid].fd, hib[devid].write_cfg, &ca);

#endif // _WIN32
}

void
hib_mem_writeMC(int devid, UINT32 addr, UINT32 value)
{
    WARN(4, "hib[%d] hib_mem_writeMC: 0x%08x pa: %08x\n",
         devid, addr, value);

    //    asm volatile("mfence":::"memory");
    hib[devid].local_reg[addr>>2] = value; // bar0, 32-bit address
    //    asm volatile("mfence":::"memory");

    //    hib[devid].backend[addr>>3] = value; // bar1, 64-bit address
    //    hib[devid].piow_dblbuf[addr>>3] = value; // bar2, 64-bit address
}

UINT32
hib_mem_readMC(int devid, UINT32 addr)
{
    UINT32 ret;

    ret =  hib[devid].local_reg[addr>>2]; // bar0, 32-bit address
    // ret =  hib[devid].backend[addr>>3]; // bar1, 64-bit address
    // ret =  hib[devid].piow_dblbuf[addr>>3]; // bar2, 64-bit address
    return (ret);
}

void
hib_mem_write64MC(int devid, UINT32 addr, UINT64 value)
{
    //    asm volatile("mfence":::"memory");
    *((UINT64 *)&(hib[devid].local_reg[addr>>2])) = value;
    //    asm volatile("mfence":::"memory");
}

UINT64
hib_mem_read64MC(int devid, UINT32 addr)
{
    UINT64 ret;

    ret =  *((UINT64*)&(hib[devid].local_reg[addr>>2]));

    return (ret);
}

void
TBgetDmaInfo(int devid,
	     unsigned long *dmarpa, UINT64 **dmarva,
	     unsigned long *dmawpa, UINT64 **dmawva)
     /*
      * pa: physical addr of DMA read/write buf alloced in the kernel space
      * va: virtual addr (in the user process context) pa is mapped to
      */
{
#if defined(_WIN32)
    UINT success;
    int nBytesReturned;
    IOCTL_DMAPACKET ip;
    HANDLE hDev = hib[devid].hDev;

    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_DMABUF_DESC,
                              NULL,
                              NULL,
                              &ip,                        // data to read from the device.
                              sizeof(IOCTL_DMAPACKET),
                              (PULONG) &nBytesReturned,
                              NULL);
    if(success == FALSE) {
        fprintf("TBgetDmaInfo failed %!STATUS\n", GetLastError());
        return NULL;
    }

    hib[devid].dmar_pa = ip.dmarpa;
    hib[devid].dmar_buf = ip.dmarva;
    hib[devid].dmaw_pa = ip.dmawpa;
    hib[devid].dmaw_buf = ip.dmawva;
    hib[devid].r->dmabuf_bytes = (ip.dmarsize > ip.dmawsize ? ip.dmawsize : ip.dmarsize); // set the smaller one.

    WARN(3, "dmar pa:0x%08x  va:0x%08x  size:0x%08x\n", hib[devid].dmar_pa, hib[devid].dmar_buf, hib[devid].r->dmabuf_bytes);
    WARN(3, "dmaw pa:0x%08x  va:0x%08x  size:0x%08x\n", hib[devid].dmaw_pa, hib[devid].dmaw_buf, hib[devid].r->dmabuf_bytes);

#else // !_WIN32

    int vsize;

    /* map DMA read buffer */
    if (-1 == ioctl(hib[devid].fd, hib[devid].get_dmar_pa, dmarpa)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_dmarbuf)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
    vsize = roundup_to_page(hib[devid].r->dmabuf_bytes);
    *dmarva = mmap(NULL, vsize,
		   PROT_READ | PROT_WRITE,
		   MAP_SHARED,
		   hib[devid].fd,
		   0);
    if ((long int)(*dmarva) == -1) {
	perror("TBgetDmaInfo");
	exit (1);
    }

    /* map DMA write buffer */
    if (-1 == ioctl(hib[devid].fd, hib[devid].get_dmaw_pa, dmawpa)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_dmawbuf)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
    vsize = roundup_to_page(hib[devid].r->dmabuf_bytes);
    *dmawva = mmap(NULL, vsize,
		   PROT_READ | PROT_WRITE,
		   MAP_SHARED,
		   hib[devid].fd,
		   0);
    if ((long int)(*dmawva) == -1) {
	perror("TBgetDmaInfo");
	exit (1);
    }
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_hibmem)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
#endif // _WIN32
}

#if ! defined(_WIN32)
static unsigned int
roundup_to_page(unsigned int size)
{
    size_t psz = getpagesize();
    return ((size - 1) / psz + 1) * psz;
}
#endif

static void
TBmemMap(int devid)
{
#if defined(_WIN32)
    UINT success;
    int nBytesReturned;
    IOCTL_MMAPPACKET ip;
    HANDLE hDev = hib[devid].hDev;

    /*
     * maps HIB local register (BAR0)
     */
    ip.barid = 0;
    ip.cacheType = 0; // 0:MmNonCached  1:MmCached  2:MmWriteCombined
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_BAR_MMAP,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_MMAPPACKET),
                              &ip,                        // data to read from the device.
                              sizeof(IOCTL_MMAPPACKET),
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "TBmemMap failed %!STATUS\n", GetLastError());
        return NULL;
    }
    hib[devid].local_reg = ip.uaddr;
    hib[devid].r->local_mem_bytes = ip.nbyte;
    WARN(3, "hib[%d].local_reg: 0x%016lx\n", devid, hib[devid].local_reg);

    /*
     * maps BAR1 (HIB backend)
     */
    ip.barid = 1;
    ip.cacheType = 0; // 0:MmNonCached  1:MmCached  2:MmWriteCombined
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_BAR_MMAP,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_MMAPPACKET),
                              &ip,                        // data to read from the device.
                              sizeof(IOCTL_MMAPPACKET),
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "TBmemMap failed %!STATUS\n", GetLastError());
        return NULL;
    }
    hib[devid].backend = ip.uaddr;
    hib[devid].r->backend_mem_bytes = ip.nbyte;
    WARN(3, "hib[%d].backend: 0x%016lx\n", devid, hib[devid].backend);

    /*
     * maps BAR2 (double buffer for PIO write transfer)
     */
    ip.barid = 2;
    ip.cacheType = 2; // 0:MmNonCached  1:MmCached  2:MmWriteCombined
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_BAR_MMAP,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_MMAPPACKET),
                              &ip,                        // data to read from the device.
                              sizeof(IOCTL_MMAPPACKET),
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "TBmemMap failed %!STATUS\n", GetLastError());
        return NULL;
    }
    hib[devid].piow_dblbuf = ip.uaddr;
    hib[devid].r->piowbuf_bytes = ip.nbyte;
    WARN(3, "hib[%d].piow_dblbuf: 0x%016lx\n", devid, hib[devid].piow_dblbuf);

#else // !_WIN32

    int vsize;

    /*
     * maps HIB local register (BAR0)
     */
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_hibmem)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
    vsize = roundup_to_page(hib[devid].r->local_mem_bytes);
    hib[devid].local_reg = mmap(NULL, vsize,
                                 PROT_READ | PROT_WRITE,
                                 MAP_SHARED,
                                 hib[devid].fd,
                                 0); /* device driver takes care of bar offset */
    if ((long int)(hib[devid].local_reg) == -1) {
	fprintf(stderr, "%s%d\n", hib[devid].devname, devid);
	exit (1);
    }
    WARN(3, "hib[%d].local_reg: 0x%016lx\n", devid, hib[devid].local_reg);


    /*
     * maps BAR1 (HIB backend)
     */
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_backend)) {
	perror("TBmemMap() failed");
	exit (1);
    };
    vsize = roundup_to_page(hib[devid].r->backend_mem_bytes);
    hib[devid].backend = mmap(NULL, vsize,
                              PROT_READ | PROT_WRITE,
                              MAP_SHARED,
                              hib[devid].fd,
                              0); /* device driver takes care of bar offset */
    if ((long int)(hib[devid].backend) == -1) {
	fprintf(stderr, "%s%d\n", hib[devid].devname, devid);
	exit (1);
    }
    WARN(3, "hib[%d].backend: 0x%016lx\n", devid, hib[devid].backend);


    /*
     * maps BAR2 (double buffer for PIO write transfer)
     */
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_dblbuf)) {
	perror("TBmemMap() failed");
	exit (1);
    };
    vsize = roundup_to_page(hib[devid].r->piowbuf_bytes);
    hib[devid].piow_dblbuf = mmap(NULL, vsize,
                                  PROT_READ | PROT_WRITE,
                                  MAP_SHARED,
                                  hib[devid].fd,
                                  0); /* device driver takes care of bar offset */
    if ((long int)(hib[devid].backend) == -1) {
	fprintf(stderr, "%s%d\n", hib[devid].devname, devid);
	exit (1);
    }
    WARN(3, "hib[%d].piow_dblbuf: 0x%016lx\n", devid, hib[devid].piow_dblbuf);


    /* house keeping */
    if (-1 == ioctl(hib[devid].fd, hib[devid].set_map_mode, hib[devid].map_hibmem)) {
	perror("TBgetDmaInfo() failed");
	exit (1);
    };
#endif // _WIN32
}

static void
init_envs(int devid)
{
    static int *firstcall = NULL;
    int i, nhib;
    char *p;

    nhib = hib_ndevice();

    // the very first call.
    if (firstcall == NULL) {
        firstcall = (int *)malloc(sizeof(int) * nhib);
        for (i = 0; i < nhib; i++) {
            firstcall[i] = 1;
        }
    }

    if (devid >= nhib) {
        fprintf(stderr, "Too large devid (%d).\n", devid);
        exit(1);
    }

    // first call for a hib[devid] in question.
    if (firstcall[devid]) {
	firstcall[devid] = 0;

        hib[devid].fd = -1;
        sprintf(hib[devid].devname, "/dev/%s%d", HIB_DEVNAME, devid);

	p = getenv("HIB_WARNLEVEL");
        if (p) {
            int tmp, lv;
            tmp = atoi(strtok(p, " "));
            if (0 <= tmp) {
                lv = tmp;
                hib_set_warn_level(lv);
            }
        }
    }
}

static int
count_devfile(char *devname)
{
#if defined(_WIN32)

    int nhib;
    BOOL res;
    HDEVINFO                         devInfo;
    SP_DEVICE_INTERFACE_DATA         devIfData;
    LPGUID InterfaceGuid = (LPGUID) &GUID_DEVINTERFACE_GPCIEHIB;

    devInfo = SetupDiGetClassDevs(InterfaceGuid,
                                  NULL,
                                  NULL,
                                  (DIGCF_PRESENT | DIGCF_DEVICEINTERFACE));
    if (devInfo == INVALID_HANDLE_VALUE) {
        fprintf(stderr, "SetupDiGetClassDevs failed.\n");
        return FALSE;
    }
    devIfData.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA);

    nhib = 0;
    res = TRUE;
    while (res) {
        res = SetupDiEnumDeviceInterfaces(devInfo,
                                          0,
                                          InterfaceGuid,
                                          nhib,
                                          &devIfData);
        if (res) {
            nhib++;
        }
    }
    SetupDiDestroyDeviceInfoList(devInfo);
    return nhib;

#else // !_WIN32

    struct dirent **namelist;
    int n, nhib;

    nhib = n = scandir("/dev", &namelist, devname_filterfunc, 0);
    if (n < 0) {
        perror("hibutil:count_devfile:scandir");
        exit(2);
    }
    while (n--) {
        // WARN(1, "found /dev/%s\n", namelist[n]->d_name);
        free(namelist[n]);
    }
    free(namelist);

    return nhib;
#endif // _WIN32
}

#if defined(_WIN32)

/*
 * return a file handle on success.
 * return NULL if the device is in use.
 * abort if the device file is not found.
 */
static HANDLE
winOpenDevice(int devid)
{
    HANDLE hDev;
    char devname[1024];

    if (!winGetDevicePath(devid,
                          (LPGUID) &GUID_DEVINTERFACE_GPCIEHIB,
                          devname,
                          sizeof(devname)))
    {
        // no device found.
        fprintf(stderr, "winGetDevicePath failed. abort.\n");
        exit(1);
    }

    WARN(5, "device name: %s\n", devname);

    hDev = CreateFile(devname,
                      GENERIC_WRITE | GENERIC_READ,

                      //                      FILE_SHARE_WRITE | FILE_SHARE_READ,
                      0, // exclusive open

                      NULL,
                      OPEN_EXISTING,
                      FILE_ATTRIBUTE_NORMAL,
                      NULL);

    if (hDev == INVALID_HANDLE_VALUE) {
        WARN(5, "winOpenDevice failed. %d", GetLastError());
        return NULL;
    }

    return hDev;
}

static void
winMunmap(int devid, int barid)
{
    UINT success;
    int nBytesReturned;
    IOCTL_MMAPPACKET ip;
    HANDLE hDev = hib[devid].hDev;

    ip.barid = barid;
    success = DeviceIoControl(hDev,
                              IOCTL_GPCIEHIB_BAR_MUNMAP,
                              &ip,                        // data to write to the device.
                              sizeof(IOCTL_MMAPPACKET),
                              NULL,
                              NULL,
                              (PULONG) &nBytesReturned,
                              NULL);
    if (success == FALSE) {
        fprintf(stderr, "winMunmap failed %!STATUS\n", GetLastError());
        exit(1);
    }
}

static BOOL
winGetDevicePath(int devid,
                 IN  LPGUID InterfaceGuid,
                 __out_ecount(bufLen) PCHAR devicePath,
                 __in size_t bufLen)
{
    HDEVINFO                         devInfo;
    SP_DEVICE_INTERFACE_DATA         devIfData;
    PSP_DEVICE_INTERFACE_DETAIL_DATA devIfDetailData = NULL;
    ULONG len, lenRequired;
    BOOL res;
    HRESULT hres;

    devInfo = SetupDiGetClassDevs(InterfaceGuid,
                                  NULL,
                                  NULL,
                                  (DIGCF_PRESENT | DIGCF_DEVICEINTERFACE));
    if (devInfo == INVALID_HANDLE_VALUE) {
        fprintf(stderr, "SetupDiGetClassDevs failed.\n");
        return FALSE;
    }

    devIfData.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA);

    res = SetupDiEnumDeviceInterfaces(devInfo,
                                      0,
                                      InterfaceGuid,
                                      devid,
                                      &devIfData);
    if (res == FALSE) {
        fprintf(stderr, "SetupDiEnumDeviceInterfaces failed. %s\n", GetLastError());
        SetupDiDestroyDeviceInfoList(devInfo);
        return FALSE;
    }

    SetupDiGetDeviceInterfaceDetail(devInfo,
                                    &devIfData,
                                    NULL,
                                    0,
                                    &lenRequired,
                                    NULL);

    devIfDetailData = (PSP_DEVICE_INTERFACE_DETAIL_DATA)LocalAlloc(LMEM_FIXED, lenRequired);
    if (devIfDetailData == NULL) {
        SetupDiDestroyDeviceInfoList(devInfo);
        fprintf(stderr, "LocalAlloc failed..\n");
        return FALSE;
    }

    devIfDetailData->cbSize = sizeof(SP_DEVICE_INTERFACE_DETAIL_DATA);
    len = lenRequired;
    res = SetupDiGetDeviceInterfaceDetail(devInfo,
                                          &devIfData,
                                          devIfDetailData,
                                          len,
                                          &lenRequired,
                                          NULL);
    if (res == FALSE) {
        fprintf(stderr, "SetupDiGetDeviceInterfaceDetail failed. %s\n", GetLastError());
        SetupDiDestroyDeviceInfoList(devInfo);
        LocalFree(devIfDetailData);
        return FALSE;
    }

    hres = StringCchCopy(devicePath, bufLen, devIfDetailData->DevicePath);
    if (FAILED(hres)) {
        SetupDiDestroyDeviceInfoList(devInfo);
        LocalFree(devIfDetailData);
        return FALSE;
    }

    SetupDiDestroyDeviceInfoList(devInfo);
    LocalFree(devIfDetailData);

    return TRUE;
}

static void
usleep(int t)
{
    t /= 1000;
    if (t = 0) {
        t = 1;
    }
    Sleep(t);
}

#else // _WIN32

static void
lock_hib(int devid)
{
    int locked; // 0:success  -1:failure
    int cnt = 0;

    locked = lockf(hib[devid].fd, F_TLOCK, 0);
    if (locked == 0) return; // returns successfully.

    if ((errno == EACCES) || (errno == EAGAIN)) {
	WARN(1, "Someone is using hib[%d]. Sleep...\n", devid);
    }
    else {
	perror("hib_openMC");
	exit(2);
    }
    locked = lockf(hib[devid].fd, F_LOCK, 0);
}

static int
try_lock_hib(int devid)
{
    int locked; // 0:success  -1:failure
    int cnt = 0;

    locked = lockf(hib[devid].fd, F_TLOCK, 0);
    if (locked == 0) return 0; // returns successfully.

    if ((errno == EACCES) || (errno == EAGAIN)) {
        struct flock fl = {0}; // need to be zero cleared. I don't know why.
        char buf[256];
        fcntl(hib[devid].fd, F_GETLK, &fl);
        WARN(3, "pid:%d is using hib[%d]. Sleep...\n", fl.l_pid, devid);
        hiberr.locked_by_pid = fl.l_pid;
        return -1;
    }
    else {
	perror("hib_openMC");
	exit(2);
    }
}

static void
unlock_hib(int devid)
{
    if (lockf(hib[devid].fd, F_ULOCK, 0)) {
	perror("unlock_hibMC");
	exit (2);
    }
}

static int
devname_filterfunc(const struct dirent *entry)
{
    int i;
    char keyword[128];
    char *str;
    strncpy(keyword, "hibdrv", sizeof(keyword));
    str = strstr(entry->d_name, keyword);

    if (!str) {
        return 0; // no match.
    }
    if (str != entry->d_name) {
        return 0; // "hibdrv" not at the top (e.g. foohibdrv0).
    }

    str = (char *)entry->d_name + strlen(keyword); // trailing device minor id is stored to str.
    for (i = 0; i < strlen(str); i++) {
        if (!isdigit(str[i])) {
            return 0; // "hibdrv" followed by nondigit (e.g. hibdrvabc).
        }
    }

    return 1;
}

#endif // !_WIN32
