vlsim fix, verilator fst trace, use ram optimization
This commit is contained in:
Binary file not shown.
@@ -15,13 +15,13 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
#DEBUG=1
|
||||
#SCOPE=1
|
||||
@@ -58,7 +58,7 @@ VL_FLAGS += verilator.vlt
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
|
||||
@@ -31,9 +31,9 @@ opae_sim::opae_sim() {
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC();
|
||||
trace_ = new VerilatedFstC();
|
||||
vortex_afu_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
trace_->open("trace.fst");
|
||||
#endif
|
||||
|
||||
this->reset();
|
||||
@@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
|
||||
*ioaddr = host_buffers_[wsid].ioaddr;
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vortex_afu_->af2cp_sTxPort_c2_data;
|
||||
}
|
||||
|
||||
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
@@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
|
||||
this->step();
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid);
|
||||
}
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
|
||||
this->step();
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid);
|
||||
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
|
||||
*value = vortex_afu_->af2cp_sTxPort_c2_data;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
}
|
||||
|
||||
void opae_sim::flush() {
|
||||
@@ -117,24 +117,41 @@ void opae_sim::flush() {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void opae_sim::reset() {
|
||||
vortex_afu_->reset = 1;
|
||||
this->step();
|
||||
vortex_afu_->reset = 0;
|
||||
|
||||
host_buffers_.clear();
|
||||
dram_reads_.clear();
|
||||
cci_reads_.clear();
|
||||
cci_writes_.clear();
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
vortex_afu_->avs_waitrequest = 0;
|
||||
|
||||
vortex_afu_->reset = 1;
|
||||
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
vortex_afu_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void opae_sim::step() {
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
this->sRxPort_bus();
|
||||
this->sTxPort_bus();
|
||||
this->avs_bus();
|
||||
|
||||
vortex_afu_->clk = 0;
|
||||
this->eval();
|
||||
vortex_afu_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
@@ -149,100 +166,105 @@ void opae_sim::eval() {
|
||||
++timestamp;
|
||||
}
|
||||
|
||||
void opae_sim::sRxPort_bus() {
|
||||
void opae_sim::sRxPort_bus() {
|
||||
// check mmio request
|
||||
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|
||||
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
|
||||
|
||||
// schedule CCI read responses
|
||||
int cci_rd_index = -1;
|
||||
for (int i = 0; i < cci_reads_.size(); i++) {
|
||||
if (cci_reads_[i].cycles_left > 0) {
|
||||
cci_reads_[i].cycles_left -= 1;
|
||||
}
|
||||
if ((cci_rd_index == -1)
|
||||
&& (cci_reads_[i].cycles_left == 0)) {
|
||||
cci_rd_index = i;
|
||||
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
|
||||
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// schedule CCI write responses
|
||||
int cci_wr_index = -1;
|
||||
for (int i = 0; i < cci_writes_.size(); i++) {
|
||||
if (cci_writes_[i].cycles_left > 0) {
|
||||
cci_writes_[i].cycles_left -= 1;
|
||||
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
|
||||
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0)
|
||||
it->cycles_left -= 1;
|
||||
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
|
||||
cci_wr_it = it;
|
||||
}
|
||||
if ((cci_wr_index == -1)
|
||||
&& (cci_writes_[i].cycles_left == 0)) {
|
||||
cci_wr_index = i;
|
||||
}
|
||||
}
|
||||
|
||||
// send CCI read response
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (cci_rd_index != -1) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata;
|
||||
cci_reads_.erase(cci_reads_.begin() + cci_rd_index);
|
||||
}
|
||||
|
||||
// send CCI write response
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
|
||||
if (cci_wr_index != -1) {
|
||||
if (cci_wr_it != cci_writes_.end()) {
|
||||
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
|
||||
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata;
|
||||
cci_writes_.erase(cci_writes_.begin() + cci_wr_index);
|
||||
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
|
||||
cci_writes_.erase(cci_wr_it);
|
||||
}
|
||||
|
||||
// mmio
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
|
||||
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
|
||||
// send CCI read response (ensure mmio disabled)
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
|
||||
if (!mmio_req_enabled
|
||||
&& (cci_rd_it != cci_reads_.end())) {
|
||||
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
|
||||
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
|
||||
printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
|
||||
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) {
|
||||
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
|
||||
}
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
cci_reads_.erase(cci_rd_it);
|
||||
}
|
||||
}
|
||||
|
||||
void opae_sim::sTxPort_bus() {
|
||||
// check read queue size
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE);
|
||||
|
||||
// check write queue size
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE);
|
||||
|
||||
// process read requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) {
|
||||
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
|
||||
cci_rd_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
|
||||
cci_reads_.push_back(cci_req);
|
||||
printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
|
||||
fflush(stdout);
|
||||
cci_reads_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// process write requests
|
||||
if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) {
|
||||
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
|
||||
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
|
||||
cci_wr_req_t cci_req;
|
||||
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
|
||||
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
|
||||
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
|
||||
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
|
||||
cci_writes_.push_back(cci_req);
|
||||
cci_writes_.emplace_back(cci_req);
|
||||
}
|
||||
|
||||
// check queues overflow
|
||||
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
|
||||
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
|
||||
}
|
||||
|
||||
void opae_sim::avs_bus() {
|
||||
// schedule DRAM read responses
|
||||
int dram_rd_index = -1;
|
||||
for (int i = 0; i < dram_reads_.size(); i++) {
|
||||
if (dram_reads_[i].cycles_left > 0) {
|
||||
dram_reads_[i].cycles_left -= 1;
|
||||
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
|
||||
for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0) {
|
||||
it->cycles_left -= 1;
|
||||
}
|
||||
if ((dram_rd_index == -1)
|
||||
&& (dram_reads_[i].cycles_left == 0)) {
|
||||
dram_rd_index = i;
|
||||
if ((it != ie) && (it->cycles_left == 0)) {
|
||||
dram_rd_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
// send DRAM response
|
||||
vortex_afu_->avs_readdatavalid = 0;
|
||||
if (dram_rd_index != -1) {
|
||||
if (dram_rd_it != dram_reads_.end()) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE);
|
||||
dram_reads_.erase(dram_reads_.begin() + dram_rd_index);
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
}
|
||||
|
||||
// handle DRAM stalls
|
||||
@@ -275,7 +297,7 @@ void opae_sim::avs_bus() {
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
||||
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_reads_.push_back(dram_req);
|
||||
dram_reads_.emplace_back(dram_req);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#include <verilated_fst_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
#include <ostream>
|
||||
#include <future>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
@@ -41,18 +41,19 @@ private:
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||
unsigned tag;
|
||||
uint32_t tag;
|
||||
} dram_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
|
||||
unsigned mdata;
|
||||
uint64_t addr;
|
||||
uint32_t mdata;
|
||||
} cci_rd_req_t;
|
||||
|
||||
typedef struct {
|
||||
int cycles_left;
|
||||
unsigned mdata;
|
||||
uint32_t mdata;
|
||||
} cci_wr_req_t;
|
||||
|
||||
typedef struct {
|
||||
@@ -76,17 +77,17 @@ private:
|
||||
|
||||
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
|
||||
|
||||
std::vector<dram_rd_req_t> dram_reads_;
|
||||
std::list<dram_rd_req_t> dram_reads_;
|
||||
|
||||
std::vector<cci_rd_req_t> cci_reads_;
|
||||
std::list<cci_rd_req_t> cci_reads_;
|
||||
|
||||
std::vector<cci_wr_req_t> cci_writes_;
|
||||
std::list<cci_wr_req_t> cci_writes_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM ram_;
|
||||
Vvortex_afu_shim *vortex_afu_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
VerilatedFstC *trace_;
|
||||
#endif
|
||||
};
|
||||
@@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
//#define HANG_TIMEOUT 60
|
||||
#define HANG_TIMEOUT 60
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n32
|
||||
OPTS ?= -n64
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
|
||||
@@ -76,7 +76,7 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt
|
||||
# compress VCD trace
|
||||
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
|
||||
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
|
||||
tar -zcvf trace.vcd.tar.gz trace.vcd
|
||||
tar -zcvf trace.fst.tar.gz trace.fst run.log
|
||||
tar -zcvf run.log.tar.gz run.log
|
||||
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
|
||||
tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd
|
||||
|
||||
@@ -74,103 +74,103 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR;
|
||||
localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
|
||||
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
|
||||
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
|
||||
localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW;
|
||||
|
||||
typedef enum logic[3:0] {
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_WRITE,
|
||||
STATE_START,
|
||||
STATE_RUN,
|
||||
STATE_CLFLUSH,
|
||||
STATE_CSR_READ,
|
||||
STATE_CSR_WRITE
|
||||
} state_t;
|
||||
|
||||
typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
|
||||
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
|
||||
|
||||
state_t state;
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_READ = 1;
|
||||
localparam STATE_WRITE = 2;
|
||||
localparam STATE_START = 3;
|
||||
localparam STATE_RUN = 4;
|
||||
localparam STATE_CLFLUSH = 5;
|
||||
localparam STATE_CSR_READ = 6;
|
||||
localparam STATE_CSR_WRITE = 7;
|
||||
localparam STATE_MAX_VALUE = 8;
|
||||
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
||||
|
||||
`ifdef SCOPE
|
||||
`SCOPE_DECL_SIGNALS
|
||||
`endif
|
||||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
reg [STATE_WIDTH-1:0] state;
|
||||
|
||||
// Vortex ports ///////////////////////////////////////////////////////////////
|
||||
|
||||
logic vx_dram_req_valid;
|
||||
logic vx_dram_req_rw;
|
||||
logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
|
||||
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
|
||||
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||
logic vx_dram_req_ready;
|
||||
wire vx_dram_req_valid;
|
||||
wire vx_dram_req_rw;
|
||||
wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
|
||||
wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
|
||||
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
|
||||
wire vx_dram_req_ready;
|
||||
|
||||
logic vx_dram_rsp_valid;
|
||||
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
|
||||
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||
logic vx_dram_rsp_ready;
|
||||
wire vx_dram_rsp_valid;
|
||||
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
|
||||
wire vx_dram_rsp_ready;
|
||||
|
||||
logic vx_snp_req_valid;
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
|
||||
logic vx_snp_req_invalidate = 0;
|
||||
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
|
||||
logic vx_snp_req_ready;
|
||||
reg vx_snp_req_valid;
|
||||
reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
|
||||
wire vx_snp_req_invalidate = 0;
|
||||
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
|
||||
wire vx_snp_req_ready;
|
||||
|
||||
logic vx_snp_rsp_valid;
|
||||
reg vx_snp_rsp_valid;
|
||||
`DEBUG_BEGIN
|
||||
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
|
||||
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
|
||||
`DEBUG_END
|
||||
logic vx_snp_rsp_ready;
|
||||
reg vx_snp_rsp_ready;
|
||||
|
||||
logic vx_csr_io_req_valid;
|
||||
logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
|
||||
logic [11:0] vx_csr_io_req_addr;
|
||||
logic vx_csr_io_req_rw;
|
||||
logic [31:0] vx_csr_io_req_data;
|
||||
logic vx_csr_io_req_ready;
|
||||
wire vx_csr_io_req_valid;
|
||||
wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
|
||||
wire [11:0] vx_csr_io_req_addr;
|
||||
wire vx_csr_io_req_rw;
|
||||
wire [31:0] vx_csr_io_req_data;
|
||||
wire vx_csr_io_req_ready;
|
||||
|
||||
logic vx_csr_io_rsp_valid;
|
||||
logic [31:0] vx_csr_io_rsp_data;
|
||||
logic vx_csr_io_rsp_ready;
|
||||
wire vx_csr_io_rsp_valid;
|
||||
wire [31:0] vx_csr_io_rsp_data;
|
||||
wire vx_csr_io_rsp_ready;
|
||||
|
||||
logic vx_reset;
|
||||
logic vx_busy;
|
||||
reg vx_reset;
|
||||
wire vx_busy;
|
||||
|
||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
logic avs_rtq_push;
|
||||
logic avs_rtq_pop;
|
||||
wire avs_rtq_push;
|
||||
wire avs_rtq_pop;
|
||||
`DEBUG_BEGIN
|
||||
logic avs_rtq_empty;
|
||||
logic avs_rtq_full;
|
||||
wire avs_rtq_empty;
|
||||
wire avs_rtq_full;
|
||||
`DEBUG_BEGIN
|
||||
|
||||
logic avs_rdq_push;
|
||||
logic avs_rdq_pop;
|
||||
wire avs_rdq_push;
|
||||
wire avs_rdq_pop;
|
||||
t_local_mem_data avs_rdq_dout;
|
||||
logic avs_rdq_empty;
|
||||
wire avs_rdq_empty;
|
||||
`DEBUG_BEGIN
|
||||
logic avs_rdq_full;
|
||||
wire avs_rdq_full;
|
||||
`DEBUG_END
|
||||
|
||||
// CMD variables //////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_clAddr cmd_io_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
|
||||
|
||||
`ifdef SCOPE
|
||||
logic [63:0] cmd_scope_rdata;
|
||||
logic [63:0] cmd_scope_wdata;
|
||||
logic cmd_scope_read;
|
||||
logic cmd_scope_write;
|
||||
wire [63:0] cmd_scope_rdata;
|
||||
wire [63:0] cmd_scope_wdata;
|
||||
wire cmd_scope_read;
|
||||
wire cmd_scope_write;
|
||||
`endif
|
||||
|
||||
logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
|
||||
logic [11:0] cmd_csr_addr;
|
||||
logic [31:0] cmd_csr_rdata;
|
||||
logic [31:0] cmd_csr_wdata;
|
||||
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
|
||||
reg [11:0] cmd_csr_addr;
|
||||
reg [31:0] cmd_csr_rdata;
|
||||
reg [31:0] cmd_csr_wdata;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -193,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm
|
||||
`DEBUG_BEGIN
|
||||
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
|
||||
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
|
||||
wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid;
|
||||
wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid;
|
||||
wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull;
|
||||
wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull;
|
||||
wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address;
|
||||
wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
|
||||
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
|
||||
@@ -212,8 +216,7 @@ initial begin
|
||||
end
|
||||
`endif
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
`ifndef VERILATOR
|
||||
$asserton; // enable assertions
|
||||
@@ -316,7 +319,7 @@ begin
|
||||
MMIO_STATUS: begin
|
||||
mmio_tx.data <= 64'(state);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (state != state_t'(mmio_tx.data)) begin
|
||||
if (state != STATE_WIDTH'(mmio_tx.data)) begin
|
||||
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
|
||||
end
|
||||
`endif
|
||||
@@ -349,14 +352,13 @@ end
|
||||
|
||||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
logic cmd_read_done;
|
||||
logic cmd_write_done;
|
||||
logic cmd_clflush_done;
|
||||
logic cmd_csr_done;
|
||||
logic cmd_run_done;
|
||||
wire cmd_read_done;
|
||||
wire cmd_write_done;
|
||||
wire cmd_clflush_done;
|
||||
wire cmd_csr_done;
|
||||
wire cmd_run_done;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 0;
|
||||
@@ -479,27 +481,28 @@ end
|
||||
|
||||
// AVS Controller /////////////////////////////////////////////////////////////
|
||||
|
||||
logic vortex_enabled;
|
||||
logic cci_rdq_empty;
|
||||
t_cci_rdq_data cci_rdq_dout;
|
||||
wire vortex_enabled;
|
||||
wire cci_rdq_empty;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
|
||||
|
||||
logic cci_dram_rd_req_fire;
|
||||
logic cci_dram_wr_req_fire;
|
||||
logic vx_dram_rd_req_fire;
|
||||
wire cci_dram_rd_req_fire;
|
||||
wire cci_dram_wr_req_fire;
|
||||
wire vx_dram_rd_req_fire;
|
||||
`DEBUG_BEGIN
|
||||
logic vx_dram_wr_req_fire;
|
||||
wire vx_dram_wr_req_fire;
|
||||
`DEBUG_END
|
||||
logic vx_dram_rd_rsp_fire;
|
||||
wire vx_dram_rd_rsp_fire;
|
||||
|
||||
t_local_mem_byte_mask vx_dram_req_byteen_;
|
||||
logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next;
|
||||
logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
|
||||
reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads;
|
||||
wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next;
|
||||
wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
|
||||
|
||||
logic cci_dram_rd_req_enable, cci_dram_wr_req_enable;
|
||||
logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
|
||||
wire cci_dram_rd_req_enable, cci_dram_wr_req_enable;
|
||||
wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
|
||||
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
|
||||
|
||||
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
|
||||
@@ -535,11 +538,10 @@ end else begin
|
||||
assign vx_dram_req_byteen_ = vx_dram_req_byteen;
|
||||
end
|
||||
|
||||
always_comb
|
||||
begin
|
||||
always @(*) begin
|
||||
case (state)
|
||||
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
|
||||
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
|
||||
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
|
||||
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
endcase
|
||||
|
||||
@@ -550,8 +552,8 @@ begin
|
||||
endcase
|
||||
|
||||
case (state)
|
||||
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
|
||||
default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset;
|
||||
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
|
||||
default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset;
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -560,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
|
||||
|
||||
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset)
|
||||
begin
|
||||
mem_bank_select <= 0;
|
||||
@@ -594,7 +595,7 @@ begin
|
||||
end
|
||||
|
||||
if (cci_dram_wr_req_fire) begin
|
||||
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
|
||||
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
|
||||
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
|
||||
@@ -654,7 +655,7 @@ VX_generic_queue #(
|
||||
|
||||
// AVS data read response queue ///////////////////////////////////////////////
|
||||
|
||||
logic cci_wr_req_fire;
|
||||
wire cci_wr_req_fire;
|
||||
|
||||
assign avs_rdq_push = avs_readdatavalid;
|
||||
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire;
|
||||
@@ -676,31 +677,37 @@ VX_generic_queue #(
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next;
|
||||
reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
|
||||
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
|
||||
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
|
||||
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
|
||||
t_ccip_clAddr cci_rd_req_addr;
|
||||
t_cci_rdq_tag cci_rd_rsp_ctr;
|
||||
|
||||
logic cci_rd_req_fire, cci_rd_rsp_fire;
|
||||
logic cci_rd_req_enable, cci_rd_req_wait;
|
||||
wire cci_rd_req_fire, cci_rd_rsp_fire;
|
||||
reg cci_rd_req_enable, cci_rd_req_wait;
|
||||
|
||||
logic cci_rdq_push, cci_rdq_pop;
|
||||
t_cci_rdq_data cci_rdq_din;
|
||||
wire cci_rdq_push, cci_rdq_pop;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
|
||||
|
||||
always_comb begin
|
||||
always @(*) begin
|
||||
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
|
||||
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr));
|
||||
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
|
||||
end
|
||||
|
||||
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
|
||||
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
|
||||
|
||||
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
|
||||
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
|
||||
|
||||
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
|
||||
|
||||
assign cci_rdq_pop = cci_dram_wr_req_fire;
|
||||
assign cci_rdq_push = cci_rd_rsp_fire;
|
||||
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
|
||||
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
|
||||
|
||||
assign cci_pending_reads_next = cci_pending_reads
|
||||
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
|
||||
@@ -709,8 +716,7 @@ assign cci_pending_reads_next = cci_pending_reads
|
||||
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
|
||||
|
||||
// Send read requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cci_rd_req_addr <= 0;
|
||||
cci_rd_req_ctr <= 0;
|
||||
@@ -738,21 +744,23 @@ begin
|
||||
if (cci_rd_req_fire) begin
|
||||
cci_rd_req_addr <= cci_rd_req_addr + 1;
|
||||
cci_rd_req_ctr <= cci_rd_req_ctr_next;
|
||||
if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
|
||||
cci_rd_req_wait <= 1; // end current request batch
|
||||
if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
|
||||
cci_rd_req_wait <= 1; // end current request batch
|
||||
$display("*** %t: CCI Rd Rsp: STOP", $time);
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
|
||||
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_rd_rsp_fire) begin
|
||||
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1);
|
||||
if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
|
||||
cci_rd_req_wait <= 0; // restart new request batch
|
||||
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
|
||||
if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
|
||||
cci_rd_req_wait <= 0; // restart new request batch
|
||||
$display("*** %t: CCI Rd Rsp: START", $time);
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr);
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr);
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -763,12 +771,11 @@ begin
|
||||
end
|
||||
|
||||
cci_pending_reads <= cci_pending_reads_next;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)),
|
||||
.DATAW(CCI_RD_RQ_DATAW),
|
||||
.SIZE(CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
@@ -782,14 +789,37 @@ VX_generic_queue #(
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`DEBUG_BEGIN
|
||||
reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
dbg_cci_rd_rsp_mask <= 0;
|
||||
end else begin
|
||||
if (cci_rd_rsp_fire) begin
|
||||
if (cci_rd_rsp_ctr == 0) begin
|
||||
dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag);
|
||||
end else begin
|
||||
if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin
|
||||
$display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data);
|
||||
assert(0);
|
||||
end
|
||||
dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`DEBUG_END
|
||||
|
||||
// CCI-P Write Request //////////////////////////////////////////////////////////
|
||||
|
||||
logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next;
|
||||
logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
|
||||
reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes;
|
||||
wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
|
||||
t_ccip_clAddr cci_wr_req_addr;
|
||||
logic cci_wr_req_enable, cci_wr_rsp_fire;
|
||||
reg cci_wr_req_enable;
|
||||
wire cci_wr_rsp_fire;
|
||||
|
||||
always_comb begin
|
||||
always @(*) begin
|
||||
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
|
||||
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
|
||||
@@ -808,7 +838,7 @@ assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
|
||||
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty;
|
||||
|
||||
// Send write requests to CCI
|
||||
always_ff @(posedge clk)
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if (reset) begin
|
||||
cci_wr_req_addr <= 0;
|
||||
@@ -833,7 +863,7 @@ begin
|
||||
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout);
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -849,12 +879,12 @@ end
|
||||
|
||||
// Vortex cache snooping //////////////////////////////////////////////////////
|
||||
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next;
|
||||
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next;
|
||||
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
|
||||
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
|
||||
reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr;
|
||||
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next;
|
||||
|
||||
logic vx_snp_req_fire, vx_snp_rsp_fire;
|
||||
wire vx_snp_req_fire, vx_snp_rsp_fire;
|
||||
|
||||
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
|
||||
assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
|
||||
@@ -872,8 +902,7 @@ assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(
|
||||
|
||||
assign cmd_clflush_done = (0 == snp_rsp_ctr);
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
vx_snp_req_valid <= 0;
|
||||
vx_snp_req_addr <= 0;
|
||||
@@ -911,7 +940,7 @@ begin
|
||||
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
|
||||
snp_req_ctr <= snp_req_ctr_next;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next));
|
||||
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next));
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -928,7 +957,7 @@ end
|
||||
|
||||
// CSRs///////////////////////////////////////////////////////////////////////
|
||||
|
||||
logic csr_io_req_sent;
|
||||
reg csr_io_req_sent;
|
||||
|
||||
assign vx_csr_io_req_valid = !csr_io_req_sent
|
||||
&& ((STATE_CSR_READ == state || STATE_CSR_WRITE == state));
|
||||
@@ -941,8 +970,7 @@ assign vx_csr_io_rsp_ready = 1;
|
||||
|
||||
assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_io_req_sent <= 0;
|
||||
cmd_csr_rdata <= 0;
|
||||
|
||||
@@ -6,11 +6,6 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// `define SYNTHESIS 1
|
||||
// `define ASIC 1
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
|
||||
@@ -10,131 +10,24 @@ module VX_gpr_ram (
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rs2_data
|
||||
);
|
||||
`ifndef ASIC
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
end
|
||||
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
`else
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign write_bit_mask[i] = {32{~we[i]}};
|
||||
end
|
||||
|
||||
wire cenb = 0;
|
||||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_a;
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer j = 0; j < 32; j++) begin
|
||||
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||
if (we[i]) begin
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
`else
|
||||
assign rs1_data = tmp_a;
|
||||
assign rs2_data = tmp_b;
|
||||
`endif
|
||||
for (integer i = 0; i < 'NT; i=i+4) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(tmp_a[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(rs1[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
end
|
||||
|
||||
rf2_`NUM_GPRSx128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(tmp_b[(i+3):(i)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(rs2[(i+3):(i)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(i+3):(i)]),
|
||||
.AB(waddr[(i+3):(i)]),
|
||||
.DB(wdata[(i+3):(i)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
|
||||
`endif
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
endmodule
|
||||
@@ -20,8 +20,8 @@ module VX_icache_stage #(
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
|
||||
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
|
||||
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
|
||||
@@ -16,8 +16,8 @@ module VX_ipdom_stack #(
|
||||
);
|
||||
localparam STACK_SIZE = 2 ** DEPTH;
|
||||
|
||||
reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
|
||||
reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
|
||||
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
|
||||
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
|
||||
reg is_part [0:STACK_SIZE-1];
|
||||
|
||||
reg [DEPTH-1:0] rd_ptr, wr_ptr;
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* ramstyle="mlab" *)
|
||||
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
6
hw/rtl/cache/VX_bank.v
vendored
6
hw/rtl/cache/VX_bank.v
vendored
@@ -306,9 +306,9 @@ module VX_bank #(
|
||||
|
||||
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
|
||||
|
||||
//decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req
|
||||
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
|
||||
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
|
||||
29
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
29
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -56,7 +56,7 @@ module VX_cache_miss_resrv #(
|
||||
output wire miss_resrv_is_snp_st0,
|
||||
output wire miss_resrv_snp_invalidate_st0
|
||||
);
|
||||
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
|
||||
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] valid_table;
|
||||
@@ -72,8 +72,8 @@ module VX_cache_miss_resrv #(
|
||||
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
|
||||
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||
@@ -86,11 +86,11 @@ module VX_cache_miss_resrv #(
|
||||
|
||||
assign pending_hazard_st1 = |(valid_address_match);
|
||||
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
||||
|
||||
assign miss_resrv_valid_st0 = dequeue_possible;
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign {miss_resrv_data_st0,
|
||||
miss_resrv_tid_st0,
|
||||
miss_resrv_tag_st0,
|
||||
@@ -98,7 +98,7 @@ module VX_cache_miss_resrv #(
|
||||
miss_resrv_byteen_st0,
|
||||
miss_resrv_wsel_st0,
|
||||
miss_resrv_is_snp_st0,
|
||||
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
|
||||
miss_resrv_snp_invalidate_st0} = metadata_table;
|
||||
|
||||
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
@@ -125,7 +125,6 @@ module VX_cache_miss_resrv #(
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
||||
addr_table[enqueue_index] <= miss_add_addr;
|
||||
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
|
||||
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
|
||||
end else if (increment_head) begin
|
||||
valid_table[head_ptr] <= 0;
|
||||
@@ -155,6 +154,22 @@ module VX_cache_miss_resrv #(
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`MRVQ_METADATA_WIDTH),
|
||||
.SIZE(MRVQ_SIZE),
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) metadata_ram (
|
||||
.clk(clk),
|
||||
.waddr(enqueue_index),
|
||||
.raddr(dequeue_index),
|
||||
.wren(mrvq_push),
|
||||
.rden(1'b1),
|
||||
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
|
||||
.dout(metadata_table)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
|
||||
8
hw/rtl/cache/VX_tag_data_access.v
vendored
8
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -183,15 +183,15 @@ module VX_tag_data_access #(
|
||||
if (valid_req_st1) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1) begin
|
||||
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
|
||||
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
|
||||
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
|
||||
end
|
||||
end else
|
||||
if (miss_st1) begin
|
||||
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
|
||||
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
2
hw/rtl/cache/VX_tag_data_store.v
vendored
2
hw/rtl/cache/VX_tag_data_store.v
vendored
@@ -78,7 +78,7 @@ module VX_tag_data_store #(
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
|
||||
@@ -6,6 +6,7 @@ module VX_dp_ram #(
|
||||
parameter BYTEENW = 1,
|
||||
parameter BUFFERED = 1,
|
||||
parameter RWCHECK = 1,
|
||||
parameter RWBYPASS = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1)
|
||||
) (
|
||||
@@ -29,19 +30,46 @@ module VX_dp_ram #(
|
||||
if (wren[i])
|
||||
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||
end
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
|
||||
if (RWBYPASS) begin
|
||||
reg [DATAW-1:0] din_r;
|
||||
wire writing;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
assign writing = (| wren);
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign writing = wren;
|
||||
always @(posedge clk) begin
|
||||
din_r <= din;
|
||||
end
|
||||
end
|
||||
|
||||
reg bypass_r;
|
||||
always @(posedge clk) begin
|
||||
bypass_r <= writing && (raddr == waddr);
|
||||
end
|
||||
|
||||
assign dout = bypass_r ? din_r : dout_r;
|
||||
end else begin
|
||||
assign dout = dout_r;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -65,7 +93,7 @@ module VX_dp_ram #(
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
if (RWBYPASS) begin
|
||||
reg [DATAW-1:0] din_r;
|
||||
wire writing;
|
||||
|
||||
@@ -89,13 +117,13 @@ module VX_dp_ram #(
|
||||
end
|
||||
|
||||
assign dout = bypass_r ? din_r : mem[raddr];
|
||||
`else
|
||||
end else begin
|
||||
assign dout = mem[raddr];
|
||||
`endif
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -85,7 +85,7 @@ module VX_generic_queue #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_a),
|
||||
|
||||
@@ -36,8 +36,9 @@ module VX_scope #(
|
||||
localparam GET_COUNT = 3'd3;
|
||||
localparam GET_OFFSET = 3'd6;
|
||||
|
||||
reg [DATAW-1:0] data_store [SIZE-1:0];
|
||||
reg [DELTAW-1:0] delta_store [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
|
||||
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
|
||||
|
||||
reg [UPDW-1:0] prev_trigger_id;
|
||||
reg [DELTAW-1:0] delta;
|
||||
reg [BUSW-1:0] bus_out_r;
|
||||
|
||||
@@ -44,7 +44,7 @@ gen-s:
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
@@ -53,7 +53,7 @@ gen-m:
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md:
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG)
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG)
|
||||
|
||||
gen-mt:
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
@@ -77,11 +77,12 @@ build-mt: gen-mt
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
run: run-s
|
||||
|
||||
run-s: build-s
|
||||
(cd obj_dir && ./VVortex)
|
||||
|
||||
run-sd: build-sd
|
||||
(cd obj_dir && valgrind ./VVortex)
|
||||
(cd obj_dir && ./VVortex)
|
||||
|
||||
run-st: build-st
|
||||
(cd obj_dir && ./VVortex)
|
||||
|
||||
@@ -28,15 +28,11 @@ Simulator::Simulator() {
|
||||
ram_ = nullptr;
|
||||
vortex_ = new VVortex();
|
||||
|
||||
dram_rsp_active_ = false;
|
||||
snp_req_active_ = false;
|
||||
csr_req_active_ = false;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC();
|
||||
trace_ = new VerilatedFstC();
|
||||
vortex_->trace(trace_, 99);
|
||||
trace_->open("trace.vcd");
|
||||
trace_->open("trace.fst");
|
||||
#endif
|
||||
|
||||
// reset the device
|
||||
@@ -66,27 +62,49 @@ void Simulator::reset() {
|
||||
std::cout << timestamp << ": [sim] reset()" << std::endl;
|
||||
#endif
|
||||
|
||||
vortex_->reset = 1;
|
||||
this->step();
|
||||
vortex_->reset = 0;
|
||||
|
||||
print_bufs_.clear();
|
||||
dram_rsp_vec_.clear();
|
||||
|
||||
dram_rsp_active_ = false;
|
||||
snp_req_active_ = false;
|
||||
csr_req_active_ = false;
|
||||
|
||||
snp_req_size_ = 0;
|
||||
pending_snp_reqs_ = 0;
|
||||
csr_rsp_value_ = nullptr;
|
||||
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_req_ready = 0;
|
||||
vortex_->io_req_ready = 0;
|
||||
vortex_->io_rsp_valid = 0;
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->snp_rsp_ready = 0;
|
||||
vortex_->csr_io_req_valid = 0;
|
||||
vortex_->csr_io_rsp_ready = 0;
|
||||
|
||||
vortex_->reset = 1;
|
||||
|
||||
vortex_->clk = 0;
|
||||
this->eval();
|
||||
vortex_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
vortex_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
void Simulator::step() {
|
||||
vortex_->clk = 0;
|
||||
this->eval();
|
||||
|
||||
vortex_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
this->eval_dram_bus();
|
||||
this->eval_io_bus();
|
||||
this->eval_csr_bus();
|
||||
this->eval_snp_bus();
|
||||
|
||||
vortex_->clk = 0;
|
||||
this->eval();
|
||||
vortex_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
void Simulator::eval() {
|
||||
@@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() {
|
||||
}
|
||||
|
||||
// schedule DRAM responses
|
||||
int dequeue_index = -1;
|
||||
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
|
||||
if (dram_rsp_vec_[i].cycles_left > 0) {
|
||||
dram_rsp_vec_[i].cycles_left -= 1;
|
||||
std::list<dram_req_t>::iterator dram_rsp_it(dram_rsp_vec_.end());
|
||||
for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) {
|
||||
if (it->cycles_left > 0) {
|
||||
it->cycles_left -= 1;
|
||||
}
|
||||
if ((dequeue_index == -1)
|
||||
&& (dram_rsp_vec_[i].cycles_left == 0)) {
|
||||
dequeue_index = i;
|
||||
if ((dram_rsp_it == ie) && (it->cycles_left == 0)) {
|
||||
dram_rsp_it = it;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() {
|
||||
dram_rsp_active_ = false;
|
||||
}
|
||||
if (!dram_rsp_active_) {
|
||||
if (dequeue_index != -1) {
|
||||
if (dram_rsp_it != dram_rsp_vec_.end()) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
|
||||
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_it->tag;
|
||||
dram_rsp_vec_.erase(dram_rsp_it);
|
||||
dram_rsp_active_ = true;
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
@@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() {
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.tag = vortex_->dram_req_tag;
|
||||
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_rsp_vec_.push_back(dram_req);
|
||||
dram_rsp_vec_.emplace_back(dram_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,13 +5,14 @@
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_vcd_c.h>
|
||||
#include <verilated_fst_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
#include "ram.h"
|
||||
|
||||
#include <ostream>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
@@ -62,7 +63,7 @@ private:
|
||||
void eval_csr_bus();
|
||||
void eval_snp_bus();
|
||||
|
||||
std::vector<dram_req_t> dram_rsp_vec_;
|
||||
std::list<dram_req_t> dram_rsp_vec_;
|
||||
bool dram_rsp_active_;
|
||||
|
||||
bool snp_req_active_;
|
||||
@@ -75,6 +76,6 @@ private:
|
||||
RAM *ram_;
|
||||
VVortex *vortex_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *trace_;
|
||||
VerilatedFstC *trace_;
|
||||
#endif
|
||||
};
|
||||
Reference in New Issue
Block a user