CSRs I/O refactoring
This commit is contained in:
@@ -76,21 +76,20 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
||||
return err;
|
||||
}
|
||||
|
||||
int vx_csr_get_l(vx_device_h device, int core_id, int addr, int addr_h, uint64_t* value) {
|
||||
int ret = 0;
|
||||
unsigned value_lo, value_hi;
|
||||
ret |= vx_csr_get(device, core_id, addr, &value_lo);
|
||||
ret |= vx_csr_get(device, core_id, addr_h, &value_hi);
|
||||
*value = (uint64_t(value_hi) << 32) | value_lo;
|
||||
return ret;
|
||||
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
return value_lo;
|
||||
}*/
|
||||
|
||||
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32];
|
||||
return (uint64_t(value_hi) << 32) | value_lo;
|
||||
}
|
||||
|
||||
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned num_cores;
|
||||
vx_csr_get(device, 0, CSR_NC, &num_cores);
|
||||
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
|
||||
@@ -127,12 +126,23 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
uint64_t mem_stalls = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
#endif
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
|
||||
uint64_t instrs_per_core, cycles_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_INSTRET, CSR_INSTRET_H, &instrs_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_CYCLE, CSR_CYCLE_H, &cycles_per_core);
|
||||
unsigned num_cores;
|
||||
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vx_buffer_h staging_buf;
|
||||
ret |= vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
ret |= vx_copy_from_dev(staging_buf, IO_ADDR_CSR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
|
||||
uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET);
|
||||
uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE);
|
||||
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
|
||||
instrs += instrs_per_core;
|
||||
@@ -141,133 +151,110 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
// ibuffer_stall
|
||||
uint64_t ibuffer_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_IBUF_ST, CSR_MPM_IBUF_ST_H, &ibuffer_stalls_per_core);
|
||||
uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
|
||||
ibuffer_stalls += ibuffer_stalls_per_core;
|
||||
// scoreboard_stall
|
||||
uint64_t scoreboard_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SCRB_ST, CSR_MPM_SCRB_ST_H, &scoreboard_stalls_per_core);
|
||||
uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
|
||||
scoreboard_stalls += scoreboard_stalls_per_core;
|
||||
// alu_stall
|
||||
uint64_t alu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ALU_ST, CSR_MPM_ALU_ST_H, &alu_stalls_per_core);
|
||||
uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
alu_stalls += alu_stalls_per_core;
|
||||
// lsu_stall
|
||||
uint64_t lsu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_LSU_ST, CSR_MPM_LSU_ST_H, &lsu_stalls_per_core);
|
||||
uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
lsu_stalls += lsu_stalls_per_core;
|
||||
// csr_stall
|
||||
uint64_t csr_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||
uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||
uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
fpu_stalls += fpu_stalls_per_core;
|
||||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_GPU_ST, CSR_MPM_GPU_ST_H, &gpu_stalls_per_core);
|
||||
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
|
||||
// PERF: Icache
|
||||
// total reads
|
||||
uint64_t icache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_READS, CSR_MPM_ICACHE_READS_H, &icache_reads_per_core);
|
||||
uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
|
||||
icache_reads += icache_reads_per_core;
|
||||
// read misses
|
||||
uint64_t icache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MISS_R, CSR_MPM_ICACHE_MISS_R_H, &icache_miss_r_per_core);
|
||||
uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R);
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
|
||||
icache_read_misses += icache_miss_r_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t icache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_PIPE_ST, CSR_MPM_ICACHE_PIPE_ST_H, &icache_pipe_st_per_core);
|
||||
uint64_t icache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core);
|
||||
icache_pipe_stalls += icache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t icache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_CRSP_ST, CSR_MPM_ICACHE_CRSP_ST_H, &icache_crsp_st_per_core);
|
||||
uint64_t icache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
|
||||
icache_rsp_stalls += icache_crsp_st_per_core;
|
||||
|
||||
// PERF: Dcache
|
||||
// total reads
|
||||
uint64_t dcache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_READS, CSR_MPM_DCACHE_READS_H, &dcache_reads_per_core);
|
||||
uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
|
||||
dcache_reads += dcache_reads_per_core;
|
||||
// total write
|
||||
uint64_t dcache_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_WRITES, CSR_MPM_DCACHE_WRITES_H, &dcache_writes_per_core);
|
||||
uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
|
||||
dcache_writes += dcache_writes_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_R, CSR_MPM_DCACHE_MISS_R_H, &dcache_miss_r_per_core);
|
||||
uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
|
||||
dcache_read_misses += dcache_miss_r_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_w_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_BANK_ST, CSR_MPM_DCACHE_BANK_ST_H, &dcache_bank_st_per_core);
|
||||
uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST);
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
|
||||
dcache_bank_stalls += dcache_bank_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t dcache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MSHR_ST, CSR_MPM_DCACHE_MSHR_ST_H, &dcache_mshr_st_per_core);
|
||||
uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
|
||||
dcache_mshr_stalls += dcache_mshr_st_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core);
|
||||
uint64_t dcache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core);
|
||||
uint64_t dcache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
|
||||
// PERF: SMEM
|
||||
// total reads
|
||||
uint64_t smem_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_READS, CSR_MPM_SMEM_READS_H, &smem_reads_per_core);
|
||||
uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
|
||||
smem_reads += smem_reads_per_core;
|
||||
// total write
|
||||
uint64_t smem_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_WRITES, CSR_MPM_SMEM_WRITES_H, &smem_writes_per_core);
|
||||
uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
|
||||
smem_writes += smem_writes_per_core;
|
||||
// bank_stalls
|
||||
uint64_t smem_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_BANK_ST, CSR_MPM_SMEM_BANK_ST_H, &smem_bank_st_per_core);
|
||||
uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST);
|
||||
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
|
||||
smem_bank_stalls += smem_bank_st_per_core;
|
||||
|
||||
// PERF: memory
|
||||
uint64_t mem_reads_per_core, mem_writes_per_core, mem_stalls_per_core, mem_lat_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_READS, CSR_MPM_MEM_READS_H, &mem_reads_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_WRITES, CSR_MPM_MEM_WRITES_H, &mem_writes_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_ST, CSR_MPM_MEM_ST_H, &mem_stalls_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_LAT, CSR_MPM_MEM_LAT_H, &mem_lat_per_core);
|
||||
uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS);
|
||||
uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES);
|
||||
uint64_t mem_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_ST);
|
||||
uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT);
|
||||
int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100);
|
||||
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
|
||||
|
||||
@@ -59,12 +59,6 @@ int vx_start(vx_device_h hdevice);
|
||||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
// set device constant registers
|
||||
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
|
||||
|
||||
// get device constant registers
|
||||
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
|
||||
@@ -37,25 +37,20 @@
|
||||
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
|
||||
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
|
||||
#define CMD_RUN AFU_IMAGE_CMD_RUN
|
||||
#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ
|
||||
#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE
|
||||
|
||||
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
|
||||
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
|
||||
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
|
||||
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
|
||||
#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4)
|
||||
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
|
||||
#define MMIO_CSR_CORE (AFU_IMAGE_MMIO_CSR_CORE * 4)
|
||||
#define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4)
|
||||
#define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4)
|
||||
#define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
unsigned implementation_id;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
unsigned num_threads;
|
||||
@@ -89,7 +84,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = device->implementation_id;
|
||||
*value = device->version;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = device->num_cores;
|
||||
@@ -195,21 +190,22 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = ALLOC_BASE_ADDR;
|
||||
|
||||
|
||||
{
|
||||
// Load device CAPS
|
||||
int ret = 0;
|
||||
ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id);
|
||||
ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores);
|
||||
ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps);
|
||||
ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads);
|
||||
uint64_t dev_caps;
|
||||
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
|
||||
if (ret != FPGA_OK) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
device->version = (dev_caps >> 0) & 0xffff;
|
||||
device->num_cores = (dev_caps >> 16) & 0xffff;
|
||||
device->num_warps = (dev_caps >> 32) & 0xffff;
|
||||
device->num_threads = (dev_caps >> 48) & 0xffff;
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
|
||||
device->version, device->num_cores, device->num_warps, device->num_threads);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -470,52 +466,5 @@ extern int vx_start(vx_device_h hdevice) {
|
||||
// start execution
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// set device constant registers
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// get device constant registers
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice || nullptr == value)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
uint64_t value64;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_READ, &value64));
|
||||
*value = (unsigned)value64;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -7,21 +7,16 @@
|
||||
|
||||
#define AFU_ACCEL_NAME "vortex_afu"
|
||||
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
|
||||
#define AFU_IMAGE_CMD_CSR_READ 4
|
||||
#define AFU_IMAGE_CMD_CSR_WRITE 5
|
||||
#define AFU_IMAGE_CMD_MEM_READ 1
|
||||
#define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
#define AFU_IMAGE_CMD_RUN 3
|
||||
#define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
#define AFU_IMAGE_MMIO_CSR_ADDR 26
|
||||
#define AFU_IMAGE_MMIO_CSR_CORE 24
|
||||
#define AFU_IMAGE_MMIO_CSR_DATA 28
|
||||
#define AFU_IMAGE_MMIO_CSR_READ 30
|
||||
#define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
#define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
#define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
#define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
#define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
#define AFU_IMAGE_MMIO_STATUS 18
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
||||
@@ -144,28 +144,6 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.set_csr(core_id, addr, value);
|
||||
while (simulator_.csr_req_active()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.get_csr(core_id, addr, value);
|
||||
while (simulator_.csr_req_active()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
@@ -330,22 +308,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
||||
@@ -376,22 +376,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
||||
@@ -42,12 +42,4 @@ extern int vx_start(vx_device_h /*hdevice*/) {
|
||||
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Reference in New Issue
Block a user