diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm index d8d18df9..644db9d7 100755 Binary files a/benchmarks/opencl/sgemm/sgemm and b/benchmarks/opencl/sgemm/sgemm differ diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index ac38faca..0d1cd965 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -15,13 +15,13 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE -#DBG_FLAGS += $(DBG_PRINT_FLAGS) -#DBG_FLAGS += -DDBG_CORE_REQ_INFO +DBG_FLAGS += $(DBG_PRINT_FLAGS) +DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 +#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 +CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1 #DEBUG=1 #SCOPE=1 @@ -58,7 +58,7 @@ VL_FLAGS += verilator.vlt # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS) + VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else VL_FLAGS += -DNDEBUG diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index 9374c7ab..8190820f 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -31,9 +31,9 @@ opae_sim::opae_sim() { #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); + trace_ = new VerilatedFstC(); vortex_afu_->trace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif this->reset(); @@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) { *ioaddr = host_buffers_[wsid].ioaddr; } +void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { + std::lock_guard guard(mutex_); + + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; + vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; + this->step(); + vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); + *value = vortex_afu_->af2cp_sTxPort_c2_data; +} + void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) { std::lock_guard guard(mutex_); @@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8); this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid); -} - -void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) { - std::lock_guard guard(mutex_); - - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1; - vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0; - this->step(); - assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid); - assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid); - *value = vortex_afu_->af2cp_sTxPort_c2_data; + vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; } void opae_sim::flush() { @@ -117,24 +117,41 @@ void opae_sim::flush() { /////////////////////////////////////////////////////////////////////////////// void opae_sim::reset() { - vortex_afu_->reset = 1; - this->step(); - vortex_afu_->reset = 0; + + host_buffers_.clear(); + dram_reads_.clear(); + cci_reads_.clear(); + cci_writes_.clear(); + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0; + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0; + vortex_afu_->avs_readdatavalid = 0; + vortex_afu_->avs_waitrequest = 0; + vortex_afu_->reset = 1; + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); + + vortex_afu_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void opae_sim::step() { - vortex_afu_->clk = 0; - this->eval(); - - vortex_afu_->clk = 1; - this->eval(); this->sRxPort_bus(); this->sTxPort_bus(); this->avs_bus(); + + vortex_afu_->clk = 0; + this->eval(); + vortex_afu_->clk = 1; + this->eval(); #ifndef NDEBUG fflush(stdout); @@ -149,100 +166,105 @@ void opae_sim::eval() { ++timestamp; } -void opae_sim::sRxPort_bus() { +void opae_sim::sRxPort_bus() { + // check mmio request + bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid + || vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid; + // schedule CCI read responses - int cci_rd_index = -1; - for (int i = 0; i < cci_reads_.size(); i++) { - if (cci_reads_[i].cycles_left > 0) { - cci_reads_[i].cycles_left -= 1; - } - if ((cci_rd_index == -1) - && (cci_reads_[i].cycles_left == 0)) { - cci_rd_index = i; + std::list::iterator cci_rd_it(cci_reads_.end()); + for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_rd_it == ie) && (it->cycles_left == 0)) { + cci_rd_it = it; } } // schedule CCI write responses - int cci_wr_index = -1; - for (int i = 0; i < cci_writes_.size(); i++) { - if (cci_writes_[i].cycles_left > 0) { - cci_writes_[i].cycles_left -= 1; + std::list::iterator cci_wr_it(cci_writes_.end()); + for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) { + if (it->cycles_left > 0) + it->cycles_left -= 1; + if ((cci_wr_it == ie) && (it->cycles_left == 0)) { + cci_wr_it = it; } - if ((cci_wr_index == -1) - && (cci_writes_[i].cycles_left == 0)) { - cci_wr_index = i; - } - } - - // send CCI read response - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; - if (cci_rd_index != -1) { - vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; - memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE); - vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata; - cci_reads_.erase(cci_reads_.begin() + cci_rd_index); } // send CCI write response vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0; - if (cci_wr_index != -1) { + if (cci_wr_it != cci_writes_.end()) { vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1; - vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata; - cci_writes_.erase(cci_writes_.begin() + cci_wr_index); + vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata; + cci_writes_.erase(cci_wr_it); } - // mmio - vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0; - vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0; + // send CCI read response (ensure mmio disabled) + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0; + if (!mmio_req_enabled + && (cci_rd_it != cci_reads_.end())) { + vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1; + memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE); + vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata; + printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata); + for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) { + printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]); + } + printf("\n"); + fflush(stdout); + cci_reads_.erase(cci_rd_it); + } } void opae_sim::sTxPort_bus() { - // check read queue size - vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE); - - // check write queue size - vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE); - // process read requests - if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c0_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull); cci_rd_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); + cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address; cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE); memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE); - cci_reads_.push_back(cci_req); + printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata); + fflush(stdout); + cci_reads_.emplace_back(cci_req); } // process write requests - if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) { + if (vortex_afu_->af2cp_sTxPort_c1_valid) { + assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull); cci_wr_req_t cci_req; cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD); cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata; auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE); memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE); - cci_writes_.push_back(cci_req); + cci_writes_.emplace_back(cci_req); } + + // check queues overflow + vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1)); + vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1)); } void opae_sim::avs_bus() { // schedule DRAM read responses - int dram_rd_index = -1; - for (int i = 0; i < dram_reads_.size(); i++) { - if (dram_reads_[i].cycles_left > 0) { - dram_reads_[i].cycles_left -= 1; + std::list::iterator dram_rd_it(dram_reads_.end()); + for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dram_rd_index == -1) - && (dram_reads_[i].cycles_left == 0)) { - dram_rd_index = i; + if ((it != ie) && (it->cycles_left == 0)) { + dram_rd_it = it; } } // send DRAM response vortex_afu_->avs_readdatavalid = 0; - if (dram_rd_index != -1) { + if (dram_rd_it != dram_reads_.end()) { vortex_afu_->avs_readdatavalid = 1; - memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE); - dram_reads_.erase(dram_reads_.begin() + dram_rd_index); + memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); + dram_reads_.erase(dram_rd_it); } // handle DRAM stalls @@ -275,7 +297,7 @@ void opae_sim::avs_bus() { dram_req.cycles_left = DRAM_LATENCY; unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); - dram_reads_.push_back(dram_req); + dram_reads_.emplace_back(dram_req); } } diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 9a4906eb..58b57757 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -5,7 +5,7 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include @@ -13,7 +13,7 @@ #include #include -#include +#include #include #define CACHE_BLOCK_SIZE 64 @@ -41,18 +41,19 @@ private: typedef struct { int cycles_left; std::array block; - unsigned tag; + uint32_t tag; } dram_rd_req_t; typedef struct { int cycles_left; std::array block; - unsigned mdata; + uint64_t addr; + uint32_t mdata; } cci_rd_req_t; typedef struct { int cycles_left; - unsigned mdata; + uint32_t mdata; } cci_wr_req_t; typedef struct { @@ -76,17 +77,17 @@ private: std::unordered_map host_buffers_; - std::vector dram_reads_; + std::list dram_reads_; - std::vector cci_reads_; + std::list cci_reads_; - std::vector cci_writes_; + std::list cci_writes_; std::mutex mutex_; RAM ram_; Vvortex_afu_shim *vortex_afu_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index edd26cb4..2bb09c4a 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,6 +1,6 @@ #pragma once -//#define HANG_TIMEOUT 60 +#define HANG_TIMEOUT 60 int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index 69a5ec80..46cb364a 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -1,7 +1,7 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain VORTEX_RT_PATH ?= $(wildcard ../../../runtime) -OPTS ?= -n32 +OPTS ?= -n64 VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ diff --git a/hw/opae/README b/hw/opae/README index 84e08e88..e05b1df2 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -76,7 +76,7 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt # compress VCD trace tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd -tar -zcvf trace.vcd.tar.gz trace.vcd +tar -zcvf trace.fst.tar.gz trace.fst run.log tar -zcvf run.log.tar.gz run.log tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index e23c4caf..c019e54c 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -74,103 +74,103 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; -logic [127:0] afu_id = `AFU_ACCEL_UUID; +localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); +localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW; -typedef enum logic[3:0] { - STATE_IDLE, - STATE_READ, - STATE_WRITE, - STATE_START, - STATE_RUN, - STATE_CLFLUSH, - STATE_CSR_READ, - STATE_CSR_WRITE -} state_t; - -typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag; -typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data; - -state_t state; +localparam STATE_IDLE = 0; +localparam STATE_READ = 1; +localparam STATE_WRITE = 2; +localparam STATE_START = 3; +localparam STATE_RUN = 4; +localparam STATE_CLFLUSH = 5; +localparam STATE_CSR_READ = 6; +localparam STATE_CSR_WRITE = 7; +localparam STATE_MAX_VALUE = 8; +localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); `ifdef SCOPE `SCOPE_DECL_SIGNALS `endif +wire [127:0] afu_id = `AFU_ACCEL_UUID; + +reg [STATE_WIDTH-1:0] state; + // Vortex ports /////////////////////////////////////////////////////////////// -logic vx_dram_req_valid; -logic vx_dram_req_rw; -logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; -logic vx_dram_req_ready; +wire vx_dram_req_valid; +wire vx_dram_req_rw; +wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; +wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; +wire vx_dram_req_ready; -logic vx_dram_rsp_valid; -logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; -logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; -logic vx_dram_rsp_ready; +wire vx_dram_rsp_valid; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; +wire vx_dram_rsp_ready; -logic vx_snp_req_valid; -logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; -logic vx_snp_req_invalidate = 0; -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; -logic vx_snp_req_ready; +reg vx_snp_req_valid; +reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +wire vx_snp_req_invalidate = 0; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; +wire vx_snp_req_ready; -logic vx_snp_rsp_valid; +reg vx_snp_rsp_valid; `DEBUG_BEGIN -logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; `DEBUG_END -logic vx_snp_rsp_ready; +reg vx_snp_rsp_ready; -logic vx_csr_io_req_valid; -logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; -logic [11:0] vx_csr_io_req_addr; -logic vx_csr_io_req_rw; -logic [31:0] vx_csr_io_req_data; -logic vx_csr_io_req_ready; +wire vx_csr_io_req_valid; +wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; +wire [11:0] vx_csr_io_req_addr; +wire vx_csr_io_req_rw; +wire [31:0] vx_csr_io_req_data; +wire vx_csr_io_req_ready; -logic vx_csr_io_rsp_valid; -logic [31:0] vx_csr_io_rsp_data; -logic vx_csr_io_rsp_ready; +wire vx_csr_io_rsp_valid; +wire [31:0] vx_csr_io_rsp_data; +wire vx_csr_io_rsp_ready; -logic vx_reset; -logic vx_busy; +reg vx_reset; +wire vx_busy; // AVS Queues ///////////////////////////////////////////////////////////////// -logic avs_rtq_push; -logic avs_rtq_pop; +wire avs_rtq_push; +wire avs_rtq_pop; `DEBUG_BEGIN -logic avs_rtq_empty; -logic avs_rtq_full; +wire avs_rtq_empty; +wire avs_rtq_full; `DEBUG_BEGIN -logic avs_rdq_push; -logic avs_rdq_pop; +wire avs_rdq_push; +wire avs_rdq_pop; t_local_mem_data avs_rdq_dout; -logic avs_rdq_empty; +wire avs_rdq_empty; `DEBUG_BEGIN -logic avs_rdq_full; +wire avs_rdq_full; `DEBUG_END // CMD variables ////////////////////////////////////////////////////////////// t_ccip_clAddr cmd_io_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; -logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size; +reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; +reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; `ifdef SCOPE -logic [63:0] cmd_scope_rdata; -logic [63:0] cmd_scope_wdata; -logic cmd_scope_read; -logic cmd_scope_write; +wire [63:0] cmd_scope_rdata; +wire [63:0] cmd_scope_wdata; +wire cmd_scope_read; +wire cmd_scope_write; `endif -logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -logic [11:0] cmd_csr_addr; -logic [31:0] cmd_csr_rdata; -logic [31:0] cmd_csr_wdata; +reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; +reg [11:0] cmd_csr_addr; +reg [31:0] cmd_csr_rdata; +reg [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// @@ -193,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm `DEBUG_BEGIN wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; +wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid; +wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid; +wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull; +wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull; wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; @@ -212,8 +216,7 @@ initial begin end `endif -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin `ifndef VERILATOR $asserton; // enable assertions @@ -316,7 +319,7 @@ begin MMIO_STATUS: begin mmio_tx.data <= 64'(state); `ifdef DBG_PRINT_OPAE - if (state != state_t'(mmio_tx.data)) begin + if (state != STATE_WIDTH'(mmio_tx.data)) begin $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); end `endif @@ -349,14 +352,13 @@ end // COMMAND FSM //////////////////////////////////////////////////////////////// -logic cmd_read_done; -logic cmd_write_done; -logic cmd_clflush_done; -logic cmd_csr_done; -logic cmd_run_done; +wire cmd_read_done; +wire cmd_write_done; +wire cmd_clflush_done; +wire cmd_csr_done; +wire cmd_run_done; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin state <= STATE_IDLE; vx_reset <= 0; @@ -479,27 +481,28 @@ end // AVS Controller ///////////////////////////////////////////////////////////// -logic vortex_enabled; -logic cci_rdq_empty; -t_cci_rdq_data cci_rdq_dout; +wire vortex_enabled; +wire cci_rdq_empty; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; -logic cci_dram_rd_req_fire; -logic cci_dram_wr_req_fire; -logic vx_dram_rd_req_fire; +wire cci_dram_rd_req_fire; +wire cci_dram_wr_req_fire; +wire vx_dram_rd_req_fire; `DEBUG_BEGIN -logic vx_dram_wr_req_fire; +wire vx_dram_wr_req_fire; `DEBUG_END -logic vx_dram_rd_rsp_fire; +wire vx_dram_rd_rsp_fire; t_local_mem_byte_mask vx_dram_req_byteen_; -logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next; -logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; +reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads; +wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next; +wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; -logic cci_dram_rd_req_enable, cci_dram_wr_req_enable; -logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; +wire cci_dram_rd_req_enable, cci_dram_wr_req_enable; +wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; -logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); @@ -535,11 +538,10 @@ end else begin assign vx_dram_req_byteen_ = vx_dram_req_byteen; end -always_comb -begin +always @(*) begin case (state) CMD_MEM_READ: avs_address = cci_dram_rd_req_addr; - CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout))); + CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; endcase @@ -550,8 +552,8 @@ begin endcase case (state) - CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)]; - default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset; + CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; + default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset; endcase end @@ -560,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin mem_bank_select <= 0; @@ -594,7 +595,7 @@ begin end if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); + cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); @@ -654,7 +655,7 @@ VX_generic_queue #( // AVS data read response queue /////////////////////////////////////////////// -logic cci_wr_req_fire; +wire cci_wr_req_fire; assign avs_rdq_push = avs_readdatavalid; assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; @@ -676,31 +677,37 @@ VX_generic_queue #( // CCI-P Read Request /////////////////////////////////////////////////////////// -logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next; +reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; +wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; +wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; +wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; +reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; t_ccip_clAddr cci_rd_req_addr; -t_cci_rdq_tag cci_rd_rsp_ctr; -logic cci_rd_req_fire, cci_rd_rsp_fire; -logic cci_rd_req_enable, cci_rd_req_wait; +wire cci_rd_req_fire, cci_rd_rsp_fire; +reg cci_rd_req_enable, cci_rd_req_wait; -logic cci_rdq_push, cci_rdq_pop; -t_cci_rdq_data cci_rdq_din; +wire cci_rdq_push, cci_rdq_pop; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; -always_comb begin +always @(*) begin af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; - af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr)); + af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); end assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull; assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; +assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); +assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); + assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); assign cci_rdq_pop = cci_dram_wr_req_fire; assign cci_rdq_push = cci_rd_rsp_fire; -assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)}; +assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; assign cci_pending_reads_next = cci_pending_reads + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : @@ -709,8 +716,7 @@ assign cci_pending_reads_next = cci_pending_reads assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; // Send read requests to CCI -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin cci_rd_req_addr <= 0; cci_rd_req_ctr <= 0; @@ -738,21 +744,23 @@ begin if (cci_rd_req_fire) begin cci_rd_req_addr <= cci_rd_req_addr + 1; cci_rd_req_ctr <= cci_rd_req_ctr_next; - if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 1; // end current request batch + if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 1; // end current request batch + $display("*** %t: CCI Rd Rsp: STOP", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); + $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); `endif end if (cci_rd_rsp_fire) begin - cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1); - if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin - cci_rd_req_wait <= 0; // restart new request batch + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); + if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 0; // restart new request batch + $display("*** %t: CCI Rd Rsp: START", $time); end `ifdef DBG_PRINT_OPAE - $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr); + $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr); `endif end @@ -763,12 +771,11 @@ begin end cci_pending_reads <= cci_pending_reads_next; - end end VX_generic_queue #( - .DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)), + .DATAW(CCI_RD_RQ_DATAW), .SIZE(CCI_RD_QUEUE_SIZE) ) cci_rd_req_queue ( .clk (clk), @@ -782,14 +789,37 @@ VX_generic_queue #( `UNUSED_PIN (size) ); +`DEBUG_BEGIN +reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; +always @(posedge clk) begin + if (reset) begin + dbg_cci_rd_rsp_mask <= 0; + end else begin + if (cci_rd_rsp_fire) begin + if (cci_rd_rsp_ctr == 0) begin + dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); + end else begin + if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin + $display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data); + assert(0); + end + dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; + end + end + end +end +`DEBUG_END + // CCI-P Write Request ////////////////////////////////////////////////////////// -logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next; -logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; +reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; t_ccip_clAddr cci_wr_req_addr; -logic cci_wr_req_enable, cci_wr_rsp_fire; +reg cci_wr_req_enable; +wire cci_wr_rsp_fire; -always_comb begin +always @(*) begin af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode @@ -808,7 +838,7 @@ assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; // Send write requests to CCI -always_ff @(posedge clk) +always @(posedge clk) begin if (reset) begin cci_wr_req_addr <= 0; @@ -833,7 +863,7 @@ begin cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout); `endif end @@ -849,12 +879,12 @@ end // Vortex cache snooping ////////////////////////////////////////////////////// -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next; -logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; +reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next; -logic vx_snp_req_fire, vx_snp_rsp_fire; +wire vx_snp_req_fire, vx_snp_rsp_fire; if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; @@ -872,8 +902,7 @@ assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'( assign cmd_clflush_done = (0 == snp_rsp_ctr); -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin vx_snp_req_valid <= 0; vx_snp_req_addr <= 0; @@ -911,7 +940,7 @@ begin vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE - $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next)); + $display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); `endif end @@ -928,7 +957,7 @@ end // CSRs/////////////////////////////////////////////////////////////////////// -logic csr_io_req_sent; +reg csr_io_req_sent; assign vx_csr_io_req_valid = !csr_io_req_sent && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); @@ -941,8 +970,7 @@ assign vx_csr_io_rsp_ready = 1; assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; -always_ff @(posedge clk) -begin +always @(posedge clk) begin if (reset) begin csr_io_req_sent <= 0; cmd_csr_rdata <= 0; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index a7a2e0ef..9c8b19dd 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -6,11 +6,6 @@ /////////////////////////////////////////////////////////////////////////////// -// `define SYNTHESIS 1 -// `define ASIC 1 - -/////////////////////////////////////////////////////////////////////////////// - `define NW_BITS `LOG2UP(`NUM_WARPS) `define NT_BITS `LOG2UP(`NUM_THREADS) diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index f60f1964..05833b9d 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -10,131 +10,24 @@ module VX_gpr_ram ( output wire [`NUM_THREADS-1:0][31:0] rs1_data, output wire [`NUM_THREADS-1:0][31:0] rs2_data ); - `ifndef ASIC - - reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; - reg [`NUM_THREADS-1:0][31:0] q1, q2; - - always @(posedge clk) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - if (we[i]) begin - mem[waddr][i][0] <= wdata[i][07:00]; - mem[waddr][i][1] <= wdata[i][15:08]; - mem[waddr][i][2] <= wdata[i][23:16]; - mem[waddr][i][3] <= wdata[i][31:24]; - end - end - q1 <= mem[rs1]; - q2 <= mem[rs2]; - end - - assign rs1_data = q1; - assign rs2_data = q2; - - `else - - wire [`NUM_THREADS-1:0][31:0] write_bit_mask; + reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0]; + reg [`NUM_THREADS-1:0][31:0] q1, q2; + + always @(posedge clk) begin for (integer i = 0; i < `NUM_THREADS; i++) begin - assign write_bit_mask[i] = {32{~we[i]}}; - end - - wire cenb = 0; - wire cena_1 = 0; - wire cena_2 = 0; - - wire [`NUM_THREADS-1:0][31:0] tmp_a; - wire [`NUM_THREADS-1:0][31:0] tmp_b; - - `ifndef SYNTHESIS - for (integer i = 0; i < `NUM_THREADS; i++) begin - for (integer j = 0; j < 32; j++) begin - assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; - assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; + if (we[i]) begin + mem[waddr][i][0] <= wdata[i][07:00]; + mem[waddr][i][1] <= wdata[i][15:08]; + mem[waddr][i][2] <= wdata[i][23:16]; + mem[waddr][i][3] <= wdata[i][31:24]; end end - `else - assign rs1_data = tmp_a; - assign rs2_data = tmp_b; - `endif - for (integer i = 0; i < 'NT; i=i+4) begin - `IGNORE_WARNINGS_BEGIN - rf2_32x128_wm1 first_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_a[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_1), - .AA(rs1[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); + q1 <= mem[rs1]; + q2 <= mem[rs2]; + end - rf2_`NUM_GPRSx128_wm1 second_ram ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(tmp_b[(i+3):(i)]), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena_2), - .AA(rs2[(i+3):(i)]), - .CLKB(clk), - .CENB(cenb), - .WENB(write_bit_mask[(i+3):(i)]), - .AB(waddr[(i+3):(i)]), - .DB(wdata[(i+3):(i)]), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - `IGNORE_WARNINGS_END - end - - `endif + assign rs1_data = q1; + assign rs2_data = q2; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1b957271..6bb52123 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -20,8 +20,8 @@ module VX_icache_stage #( ); `UNUSED_VAR (reset) - reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; - reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0]; + `NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0]; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; diff --git a/hw/rtl/VX_ipdom_stack.v b/hw/rtl/VX_ipdom_stack.v index e00097ae..4e7d42f9 100644 --- a/hw/rtl/VX_ipdom_stack.v +++ b/hw/rtl/VX_ipdom_stack.v @@ -16,8 +16,8 @@ module VX_ipdom_stack #( ); localparam STACK_SIZE = 2 ** DEPTH; - reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; - reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; + `NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1]; reg is_part [0:STACK_SIZE-1]; reg [DEPTH-1:0] rd_ptr, wr_ptr; diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index d30120dd..783743ee 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -52,7 +52,7 @@ /////////////////////////////////////////////////////////////////////////////// `define USE_FAST_BRAM (* ramstyle="mlab" *) -`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *) +`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index b5f6350d..d3e31162 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -306,9 +306,9 @@ module VX_bank #( assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped - //decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req - assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : - mrvq_pop_unqual ? mrvq_addr_st0 : + //Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req + assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 : + dfpq_pop_unqual ? dfpq_addr_st0 : reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 9f201223..f56d638e 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -56,7 +56,7 @@ module VX_cache_miss_resrv #( output wire miss_resrv_is_snp_st0, output wire miss_resrv_snp_invalidate_st0 ); - reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; + wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MRVQ_SIZE-1:0] valid_table; @@ -72,8 +72,8 @@ module VX_cache_miss_resrv #( assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock - wire enqueue_possible = !miss_resrv_full; - wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; + wire enqueue_possible = !miss_resrv_full; + wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready_push; @@ -86,11 +86,11 @@ module VX_cache_miss_resrv #( assign pending_hazard_st1 = |(valid_address_match); - wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; + wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; assign miss_resrv_valid_st0 = dequeue_possible; - assign miss_resrv_addr_st0 = addr_table[dequeue_index]; + assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, @@ -98,7 +98,7 @@ module VX_cache_miss_resrv #( miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, - miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; + miss_resrv_snp_invalidate_st0} = metadata_table; wire mrvq_push = miss_add && enqueue_possible && !is_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; @@ -125,7 +125,6 @@ module VX_cache_miss_resrv #( valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; - metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); end else if (increment_head) begin valid_table[head_ptr] <= 0; @@ -155,6 +154,22 @@ module VX_cache_miss_resrv #( end end + VX_dp_ram #( + .DATAW(`MRVQ_METADATA_WIDTH), + .SIZE(MRVQ_SIZE), + .BYTEENW(1), + .BUFFERED(0), + .RWCHECK(1) + ) metadata_ram ( + .clk(clk), + .waddr(enqueue_index), + .raddr(dequeue_index), + .wren(mrvq_push), + .rden(1'b1), + .din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}), + .dout(metadata_table) + ); + `ifdef DBG_PRINT_CACHE_MSRQ always @(posedge clk) begin if (mrvq_push || mrvq_pop || increment_head || recover_state) begin diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index a1b80838..6d6d8572 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -183,15 +183,15 @@ module VX_tag_data_access #( if (valid_req_st1) begin if ((| use_write_enable)) begin if (writefill_st1) begin - $display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); + $display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); end else begin - $display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); + $display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); end end else if (miss_st1) begin - $display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); + $display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); end else begin - $display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); + $display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end diff --git a/hw/rtl/cache/VX_tag_data_store.v b/hw/rtl/cache/VX_tag_data_store.v index c0594471..d3a022b2 100644 --- a/hw/rtl/cache/VX_tag_data_store.v +++ b/hw/rtl/cache/VX_tag_data_store.v @@ -78,7 +78,7 @@ module VX_tag_data_store #( .SIZE(`BANK_LINE_COUNT), .BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(write_addr), diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index b7d70789..01a0a167 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -6,6 +6,7 @@ module VX_dp_ram #( parameter BYTEENW = 1, parameter BUFFERED = 1, parameter RWCHECK = 1, + parameter RWBYPASS = 0, parameter ADDRW = $clog2(SIZE), parameter SIZEW = $clog2(SIZE+1) ) ( @@ -29,19 +30,46 @@ module VX_dp_ram #( if (wren[i]) mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; end - if (rden) - dout_r <= mem[raddr]; end end else begin always @(posedge clk) begin if (wren) mem[waddr] <= din; - if (rden) - dout_r <= mem[raddr]; end - end - + end + + always @(posedge clk) begin + if (rden) + dout_r <= mem[raddr]; + end + + if (RWBYPASS) begin + reg [DATAW-1:0] din_r; + wire writing; + + if (BYTEENW > 1) begin + assign writing = (| wren); + always @(posedge clk) begin + for (integer i = 0; i < BYTEENW; i++) begin + din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8]; + end + end + end else begin + assign writing = wren; + always @(posedge clk) begin + din_r <= din; + end + end + + reg bypass_r; + always @(posedge clk) begin + bypass_r <= writing && (raddr == waddr); + end + + assign dout = bypass_r ? din_r : dout_r; + end else begin assign dout = dout_r; + end end else begin @@ -65,7 +93,7 @@ module VX_dp_ram #( end end - `ifdef SYNTHESIS + if (RWBYPASS) begin reg [DATAW-1:0] din_r; wire writing; @@ -89,13 +117,13 @@ module VX_dp_ram #( end assign dout = bypass_r ? din_r : mem[raddr]; - `else + end else begin assign dout = mem[raddr]; - `endif + end end else begin - reg [DATAW-1:0] mem [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0]; if (BYTEENW > 1) begin always @(posedge clk) begin diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index 68db0d4d..bb5010b7 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -85,7 +85,7 @@ module VX_generic_queue #( .DATAW(DATAW), .SIZE(SIZE), .BUFFERED(0), - .RWCHECK(0) + .RWCHECK(1) ) dp_ram ( .clk(clk), .waddr(wr_ptr_a), diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 9490d6b3..8b089259 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -36,8 +36,9 @@ module VX_scope #( localparam GET_COUNT = 3'd3; localparam GET_OFFSET = 3'd6; - reg [DATAW-1:0] data_store [SIZE-1:0]; - reg [DELTAW-1:0] delta_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0]; + `NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0]; + reg [UPDW-1:0] prev_trigger_id; reg [DELTAW-1:0] delta; reg [BUSW-1:0] bus_out_r; diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 20e7e85b..88ac722c 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -44,7 +44,7 @@ gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG) + verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG) gen-st: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) @@ -53,7 +53,7 @@ gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG) gen-mt: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) @@ -77,11 +77,12 @@ build-mt: gen-mt (cd obj_dir && make -j -f VVortex.mk) run: run-s + run-s: build-s (cd obj_dir && ./VVortex) run-sd: build-sd - (cd obj_dir && valgrind ./VVortex) + (cd obj_dir && ./VVortex) run-st: build-st (cd obj_dir && ./VVortex) diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 60fde196..2698cc74 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -28,15 +28,11 @@ Simulator::Simulator() { ram_ = nullptr; vortex_ = new VVortex(); - dram_rsp_active_ = false; - snp_req_active_ = false; - csr_req_active_ = false; - #ifdef VCD_OUTPUT Verilated::traceEverOn(true); - trace_ = new VerilatedVcdC(); + trace_ = new VerilatedFstC(); vortex_->trace(trace_, 99); - trace_->open("trace.vcd"); + trace_->open("trace.fst"); #endif // reset the device @@ -66,27 +62,49 @@ void Simulator::reset() { std::cout << timestamp << ": [sim] reset()" << std::endl; #endif - vortex_->reset = 1; - this->step(); - vortex_->reset = 0; - + print_bufs_.clear(); dram_rsp_vec_.clear(); + dram_rsp_active_ = false; + snp_req_active_ = false; + csr_req_active_ = false; + + snp_req_size_ = 0; + pending_snp_reqs_ = 0; + csr_rsp_value_ = nullptr; + + vortex_->dram_rsp_valid = 0; + vortex_->dram_req_ready = 0; + vortex_->io_req_ready = 0; + vortex_->io_rsp_valid = 0; + vortex_->snp_req_valid = 0; + vortex_->snp_rsp_ready = 0; + vortex_->csr_io_req_valid = 0; + vortex_->csr_io_rsp_ready = 0; + + vortex_->reset = 1; + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); + + vortex_->reset = 0; + // Turn on assertion after reset Verilated::assertOn(true); } void Simulator::step() { - vortex_->clk = 0; - this->eval(); - - vortex_->clk = 1; - this->eval(); - this->eval_dram_bus(); this->eval_io_bus(); this->eval_csr_bus(); this->eval_snp_bus(); + + vortex_->clk = 0; + this->eval(); + vortex_->clk = 1; + this->eval(); } void Simulator::eval() { @@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() { } // schedule DRAM responses - int dequeue_index = -1; - for (int i = 0; i < dram_rsp_vec_.size(); i++) { - if (dram_rsp_vec_[i].cycles_left > 0) { - dram_rsp_vec_[i].cycles_left -= 1; + std::list::iterator dram_rsp_it(dram_rsp_vec_.end()); + for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) { + if (it->cycles_left > 0) { + it->cycles_left -= 1; } - if ((dequeue_index == -1) - && (dram_rsp_vec_[i].cycles_left == 0)) { - dequeue_index = i; + if ((dram_rsp_it == ie) && (it->cycles_left == 0)) { + dram_rsp_it = it; } } @@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() { dram_rsp_active_ = false; } if (!dram_rsp_active_) { - if (dequeue_index != -1) { + if (dram_rsp_it != dram_rsp_vec_.end()) { vortex_->dram_rsp_valid = 1; - memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE); - vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag; - dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index); + memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE); + vortex_->dram_rsp_tag = dram_rsp_it->tag; + dram_rsp_vec_.erase(dram_rsp_it); dram_rsp_active_ = true; } else { vortex_->dram_rsp_valid = 0; @@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() { dram_req.cycles_left = DRAM_LATENCY; dram_req.tag = vortex_->dram_req_tag; ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); - dram_rsp_vec_.push_back(dram_req); + dram_rsp_vec_.emplace_back(dram_req); } } } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index cfea9bec..0dcf8a3b 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -5,13 +5,14 @@ #include "verilated.h" #ifdef VCD_OUTPUT -#include +#include #endif #include #include "ram.h" #include +#include #include #include #include @@ -62,7 +63,7 @@ private: void eval_csr_bus(); void eval_snp_bus(); - std::vector dram_rsp_vec_; + std::list dram_rsp_vec_; bool dram_rsp_active_; bool snp_req_active_; @@ -75,6 +76,6 @@ private: RAM *ram_; VVortex *vortex_; #ifdef VCD_OUTPUT - VerilatedVcdC *trace_; + VerilatedFstC *trace_; #endif }; \ No newline at end of file