From a60bfc5e01fbb056ae5f4dda7237380c5b86f4dc Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 15 Aug 2021 05:10:46 -0700 Subject: [PATCH] extending tracing feature for advanced debugging --- driver/opae/vlsim/opae_sim.cpp | 32 +++++++++++++++++---- driver/opae/vlsim/opae_sim.h | 3 +- driver/rtlsim/vortex.cpp | 21 +++++++------- hw/dpi/util_dpi.cpp | 22 +++++++++----- hw/dpi/util_dpi.vh | 2 ++ hw/simulate/simulator.cpp | 52 ++++++++++++++++++++++++++++------ hw/simulate/simulator.h | 4 +-- 7 files changed, 100 insertions(+), 36 deletions(-) diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index 8346186d..40890908 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -10,8 +10,12 @@ #define ENABLE_MEM_STALLS -#ifndef TRACE_DELAY -#define TRACE_DELAY 0 +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull #endif #ifndef MEM_LATENCY @@ -30,8 +34,6 @@ #define VERILATOR_RESET_VALUE 2 #endif -uint64_t sim_trace_delay = TRACE_DELAY; - static uint64_t timestamp = 0; double sc_time_stamp() { @@ -55,6 +57,23 @@ static void __aligned_free(void *ptr) { /////////////////////////////////////////////////////////////////////////////// +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +/////////////////////////////////////////////////////////////////////////////// + opae_sim::opae_sim() : stop_(false) , host_buffer_ids_(0) @@ -205,7 +224,7 @@ void opae_sim::step() { void opae_sim::eval() { vortex_afu_->eval(); #ifdef VCD_OUTPUT - if (timestamp >= sim_trace_delay) { + if (sim_trace_enabled()) { trace_->dump(timestamp); } #endif @@ -349,7 +368,7 @@ void opae_sim::avs_bus() { } /*printf("%0ld: [sim] MEM Wr Req: bank=%d, addr=%x, data=", timestamp, b, base_addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { - printf("%0x", data[(MEM_BLOCK_SIZE-1)-i]); + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); } printf("\n");*/ } @@ -360,6 +379,7 @@ void opae_sim::avs_bus() { mem_req.cycles_left = MEM_LATENCY; for (auto& rsp : mem_reads_[b]) { if (mem_req.addr == rsp.addr) { + // duplicate requests receive the same cycle delay mem_req.cycles_left = rsp.cycles_left; break; } diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 46b165dd..3d20180a 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -1,7 +1,6 @@ #pragma once -#include "verilated.h" -//#include "verilated_stub.h" +#include #include "Vvortex_afu_shim.h" #include "Vvortex_afu_shim__Syms.h" diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index 40c80b16..de52ba63 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -21,7 +21,6 @@ inline size_t align_size(size_t size, size_t alignment) { /////////////////////////////////////////////////////////////////////////////// class vx_device; - class vx_buffer { public: vx_buffer(size_t size, vx_device* device) @@ -84,11 +83,11 @@ public: if (dest_addr + asize > ram_.size()) return -1; - /*printf("VXDRV: upload %d bytes from 0x%lx to 0x%lx", size, (uint8_t*)src + src_offset, dest_addr); - if (size <= 1024) { - printf(": "); - for (int i = asize-1; i >= 0; --i) { - printf("%x", *((uint8_t*)src + src_offset + i)); + /*printf("VXDRV: upload %ld bytes from 0x%lx:", size, uintptr_t((uint8_t*)src + src_offset)); + for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) { + printf("\n0x%08lx=", dest_addr + i * CACHE_BLOCK_SIZE); + for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) { + printf("%02x", *((uint8_t*)src + src_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j)); } } printf("\n");*/ @@ -104,11 +103,11 @@ public: ram_.read(src_addr, asize, (uint8_t*)dest + dest_offset); - /*printf("VXDRV: download %d bytes from 0x%lx to 0x%lx", size, src_addr, (uint8_t*)dest + dest_offset); - if (size <= 1024) { - printf(": "); - for (int i = asize-1; i >= 0; --i) { - printf("%x", *((uint8_t*)dest + dest_offset + i)); + /*printf("VXDRV: download %ld bytes to 0x%lx:", size, uintptr_t((uint8_t*)dest + dest_offset)); + for (int i = 0; i < (asize / CACHE_BLOCK_SIZE); ++i) { + printf("\n0x%08lx=", src_addr + i * CACHE_BLOCK_SIZE); + for (int j = 0; j < CACHE_BLOCK_SIZE; ++j) { + printf("%02x", *((uint8_t*)dest + dest_offset + i * CACHE_BLOCK_SIZE + CACHE_BLOCK_SIZE - 1 - j)); } } printf("\n");*/ diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index cecf9974..ef13e696 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -15,12 +15,13 @@ extern "C" { int dpi_register(); void dpi_assert(int inst, bool cond, int delay); - void dpi_trace(const char* format, ...); + void dpi_trace(const char* format, ...); + void dpi_trace_start(); + void dpi_trace_stop(); } -double sc_time_stamp(); - -extern uint64_t sim_trace_delay; +bool sim_trace_enabled(); +void sim_trace_enable(bool enable); class ShiftRegister { public: @@ -141,12 +142,19 @@ void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) { } } -void dpi_trace(const char* format, ...) { - uint64_t timestamp = (uint64_t)sc_time_stamp(); - if (timestamp < sim_trace_delay) +void dpi_trace(const char* format, ...) { + if (!sim_trace_enabled()) return; va_list va; va_start(va, format); vprintf(format, va); va_end(va); +} + +void dpi_trace_start() { + sim_trace_enable(true); +} + +void dpi_trace_stop() { + sim_trace_enable(false); } \ No newline at end of file diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index 6e685bd2..07e81259 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -8,5 +8,7 @@ import "DPI-C" function int dpi_register(); import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay); import "DPI-C" function void dpi_trace(input string format /*verilator sformat*/); +import "DPI-C" function void dpi_trace_start(); +import "DPI-C" function void dpi_trace_stop(); `endif \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 14020940..ccb37bf2 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -5,8 +5,12 @@ #define ENABLE_MEM_STALLS -#ifndef TRACE_DELAY -#define TRACE_DELAY 0 +#ifndef TRACE_START_TIME +#define TRACE_START_TIME 0ull +#endif + +#ifndef TRACE_STOP_TIME +#define TRACE_STOP_TIME -1ull #endif #ifndef MEM_LATENCY @@ -28,14 +32,31 @@ #define VL_WDATA_GETW(lwp, i, n, w) \ VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w) -uint64_t sim_trace_delay = TRACE_DELAY; - static uint64_t timestamp = 0; double sc_time_stamp() { return timestamp; } +/////////////////////////////////////////////////////////////////////////////// + +static bool trace_enabled = false; +static uint64_t trace_start_time = TRACE_START_TIME; +static uint64_t trace_stop_time = TRACE_STOP_TIME; + +bool sim_trace_enabled() { + if (timestamp >= trace_start_time + && timestamp < trace_stop_time) + return true; + return trace_enabled; +} + +void sim_trace_enable(bool enable) { + trace_enabled = enable; +} + +/////////////////////////////////////////////////////////////////////////////// + Simulator::Simulator() { // force random values for unitialized signals Verilated::randReset(VERILATOR_RESET_VALUE); @@ -127,7 +148,7 @@ void Simulator::step() { void Simulator::eval() { vortex_->eval(); #ifdef VCD_OUTPUT - if (timestamp >= sim_trace_delay) { + if (sim_trace_enabled()) { trace_->dump(timestamp); } #endif @@ -169,7 +190,14 @@ void Simulator::eval_mem_bus() { if (!mem_rsp_active_) { if (has_response) { vortex_->mem_rsp_valid = 1; - std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + /* + printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); vortex_->mem_rsp_tag = mem_rsp_it->tag; mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); @@ -214,20 +242,28 @@ void Simulator::eval_mem_bus() { } } } else { + /* + printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ for (int i = 0; i < MEM_BLOCK_SIZE; i++) { if ((byteen >> i) & 0x1) { (*ram_)[base_addr + i] = data[i]; } } } - } else { + } else { mem_req_t mem_req; mem_req.tag = vortex_->mem_req_tag; - mem_req.addr = vortex_->mem_req_addr; + mem_req.addr = (vortex_->mem_req_addr * MEM_BLOCK_SIZE); ram_->read(vortex_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.block.data()); mem_req.cycles_left = MEM_LATENCY; for (auto& rsp : mem_rsp_vec_[req_bank]) { if (mem_req.addr == rsp.addr) { + // duplicate requests receive the same cycle delay mem_req.cycles_left = rsp.cycles_left; break; } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index e9fe9389..fe64babe 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -1,8 +1,8 @@ #pragma once +#include #include "VVortex.h" #include "VVortex__Syms.h" -#include "verilated.h" #ifdef VCD_OUTPUT #include @@ -51,7 +51,7 @@ private: typedef struct { int cycles_left; std::array block; - uint32_t addr; + uint64_t addr; uint64_t tag; } mem_req_t;