vlsim fix, verilator fst trace, use ram optimization

This commit is contained in:
Blaise Tine
2020-10-25 16:40:50 -07:00
parent 81dc8c7279
commit 43ae82e788
23 changed files with 424 additions and 422 deletions

Binary file not shown.

View File

@@ -15,13 +15,13 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#DEBUG=1
#SCOPE=1
@@ -58,7 +58,7 @@ VL_FLAGS += verilator.vlt
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG

View File

@@ -31,9 +31,9 @@ opae_sim::opae_sim() {
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
trace_ = new VerilatedFstC();
vortex_afu_->trace(trace_, 99);
trace_->open("trace.vcd");
trace_->open("trace.fst");
#endif
this->reset();
@@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
*ioaddr = host_buffers_[wsid].ioaddr;
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
}
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
@@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid);
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid);
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}
void opae_sim::flush() {
@@ -117,8 +117,25 @@ void opae_sim::flush() {
///////////////////////////////////////////////////////////////////////////////
void opae_sim::reset() {
host_buffers_.clear();
dram_reads_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
vortex_afu_->avs_readdatavalid = 0;
vortex_afu_->avs_waitrequest = 0;
vortex_afu_->reset = 1;
this->step();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
vortex_afu_->reset = 0;
// Turn on assertion after reset
@@ -126,16 +143,16 @@ void opae_sim::reset() {
}
void opae_sim::step() {
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
this->sRxPort_bus();
this->sTxPort_bus();
this->avs_bus();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
#endif
@@ -150,99 +167,104 @@ void opae_sim::eval() {
}
void opae_sim::sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
// schedule CCI read responses
int cci_rd_index = -1;
for (int i = 0; i < cci_reads_.size(); i++) {
if (cci_reads_[i].cycles_left > 0) {
cci_reads_[i].cycles_left -= 1;
}
if ((cci_rd_index == -1)
&& (cci_reads_[i].cycles_left == 0)) {
cci_rd_index = i;
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
cci_rd_it = it;
}
}
// schedule CCI write responses
int cci_wr_index = -1;
for (int i = 0; i < cci_writes_.size(); i++) {
if (cci_writes_[i].cycles_left > 0) {
cci_writes_[i].cycles_left -= 1;
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
cci_wr_it = it;
}
if ((cci_wr_index == -1)
&& (cci_writes_[i].cycles_left == 0)) {
cci_wr_index = i;
}
}
// send CCI read response
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (cci_rd_index != -1) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata;
cci_reads_.erase(cci_reads_.begin() + cci_rd_index);
}
// send CCI write response
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_index != -1) {
if (cci_wr_it != cci_writes_.end()) {
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata;
cci_writes_.erase(cci_writes_.begin() + cci_wr_index);
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
cci_writes_.erase(cci_wr_it);
}
// mmio
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
// send CCI read response (ensure mmio disabled)
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) {
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
}
printf("\n");
fflush(stdout);
cci_reads_.erase(cci_rd_it);
}
}
void opae_sim::sTxPort_bus() {
// check read queue size
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE);
// check write queue size
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE);
// process read requests
if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
cci_reads_.push_back(cci_req);
printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
fflush(stdout);
cci_reads_.emplace_back(cci_req);
}
// process write requests
if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
cci_wr_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.push_back(cci_req);
cci_writes_.emplace_back(cci_req);
}
// check queues overflow
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void opae_sim::avs_bus() {
// schedule DRAM read responses
int dram_rd_index = -1;
for (int i = 0; i < dram_reads_.size(); i++) {
if (dram_reads_[i].cycles_left > 0) {
dram_reads_[i].cycles_left -= 1;
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0) {
it->cycles_left -= 1;
}
if ((dram_rd_index == -1)
&& (dram_reads_[i].cycles_left == 0)) {
dram_rd_index = i;
if ((it != ie) && (it->cycles_left == 0)) {
dram_rd_it = it;
}
}
// send DRAM response
vortex_afu_->avs_readdatavalid = 0;
if (dram_rd_index != -1) {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_reads_.begin() + dram_rd_index);
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_rd_it);
}
// handle DRAM stalls
@@ -275,7 +297,7 @@ void opae_sim::avs_bus() {
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_reads_.push_back(dram_req);
dram_reads_.emplace_back(dram_req);
}
}

View File

@@ -5,7 +5,7 @@
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#include <verilated_fst_c.h>
#endif
#include <VX_config.h>
@@ -13,7 +13,7 @@
#include <ostream>
#include <future>
#include <vector>
#include <list>
#include <unordered_map>
#define CACHE_BLOCK_SIZE 64
@@ -41,18 +41,19 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned tag;
uint32_t tag;
} dram_rd_req_t;
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned mdata;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
unsigned mdata;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
@@ -76,17 +77,17 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::vector<dram_rd_req_t> dram_reads_;
std::list<dram_rd_req_t> dram_reads_;
std::vector<cci_rd_req_t> cci_reads_;
std::list<cci_rd_req_t> cci_reads_;
std::vector<cci_wr_req_t> cci_writes_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM ram_;
Vvortex_afu_shim *vortex_afu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
VerilatedFstC *trace_;
#endif
};

View File

@@ -1,6 +1,6 @@
#pragma once
//#define HANG_TIMEOUT 60
#define HANG_TIMEOUT 60
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);

View File

@@ -1,7 +1,7 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n32
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++

View File

@@ -76,7 +76,7 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
tar -zcvf trace.vcd.tar.gz trace.vcd
tar -zcvf trace.fst.tar.gz trace.fst run.log
tar -zcvf run.log.tar.gz run.log
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd

View File

@@ -74,103 +74,103 @@ localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR;
localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
logic [127:0] afu_id = `AFU_ACCEL_UUID;
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW;
typedef enum logic[3:0] {
STATE_IDLE,
STATE_READ,
STATE_WRITE,
STATE_START,
STATE_RUN,
STATE_CLFLUSH,
STATE_CSR_READ,
STATE_CSR_WRITE
} state_t;
typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
state_t state;
localparam STATE_IDLE = 0;
localparam STATE_READ = 1;
localparam STATE_WRITE = 2;
localparam STATE_START = 3;
localparam STATE_RUN = 4;
localparam STATE_CLFLUSH = 5;
localparam STATE_CSR_READ = 6;
localparam STATE_CSR_WRITE = 7;
localparam STATE_MAX_VALUE = 8;
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
`ifdef SCOPE
`SCOPE_DECL_SIGNALS
`endif
wire [127:0] afu_id = `AFU_ACCEL_UUID;
reg [STATE_WIDTH-1:0] state;
// Vortex ports ///////////////////////////////////////////////////////////////
logic vx_dram_req_valid;
logic vx_dram_req_rw;
logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready;
wire vx_dram_req_valid;
wire vx_dram_req_rw;
wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
wire vx_dram_req_ready;
logic vx_dram_rsp_valid;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready;
wire vx_dram_rsp_valid;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
wire vx_dram_rsp_ready;
logic vx_snp_req_valid;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
logic vx_snp_req_invalidate = 0;
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
logic vx_snp_req_ready;
reg vx_snp_req_valid;
reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
wire vx_snp_req_invalidate = 0;
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
wire vx_snp_req_ready;
logic vx_snp_rsp_valid;
reg vx_snp_rsp_valid;
`DEBUG_BEGIN
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
`DEBUG_END
logic vx_snp_rsp_ready;
reg vx_snp_rsp_ready;
logic vx_csr_io_req_valid;
logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
logic [11:0] vx_csr_io_req_addr;
logic vx_csr_io_req_rw;
logic [31:0] vx_csr_io_req_data;
logic vx_csr_io_req_ready;
wire vx_csr_io_req_valid;
wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
wire [11:0] vx_csr_io_req_addr;
wire vx_csr_io_req_rw;
wire [31:0] vx_csr_io_req_data;
wire vx_csr_io_req_ready;
logic vx_csr_io_rsp_valid;
logic [31:0] vx_csr_io_rsp_data;
logic vx_csr_io_rsp_ready;
wire vx_csr_io_rsp_valid;
wire [31:0] vx_csr_io_rsp_data;
wire vx_csr_io_rsp_ready;
logic vx_reset;
logic vx_busy;
reg vx_reset;
wire vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push;
logic avs_rtq_pop;
wire avs_rtq_push;
wire avs_rtq_pop;
`DEBUG_BEGIN
logic avs_rtq_empty;
logic avs_rtq_full;
wire avs_rtq_empty;
wire avs_rtq_full;
`DEBUG_BEGIN
logic avs_rdq_push;
logic avs_rdq_pop;
wire avs_rdq_push;
wire avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
wire avs_rdq_empty;
`DEBUG_BEGIN
logic avs_rdq_full;
wire avs_rdq_full;
`DEBUG_END
// CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr;
logic[DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
logic[DRAM_ADDR_WIDTH-1:0] cmd_data_size;
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
`ifdef SCOPE
logic [63:0] cmd_scope_rdata;
logic [63:0] cmd_scope_wdata;
logic cmd_scope_read;
logic cmd_scope_write;
wire [63:0] cmd_scope_rdata;
wire [63:0] cmd_scope_wdata;
wire cmd_scope_read;
wire cmd_scope_write;
`endif
logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
logic [11:0] cmd_csr_addr;
logic [31:0] cmd_csr_rdata;
logic [31:0] cmd_csr_wdata;
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
reg [11:0] cmd_csr_addr;
reg [31:0] cmd_csr_rdata;
reg [31:0] cmd_csr_wdata;
// MMIO controller ////////////////////////////////////////////////////////////
@@ -193,6 +193,10 @@ assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mm
`DEBUG_BEGIN
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid;
wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid;
wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull;
wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull;
wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address;
wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
@@ -212,8 +216,7 @@ initial begin
end
`endif
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
`ifndef VERILATOR
$asserton; // enable assertions
@@ -316,7 +319,7 @@ begin
MMIO_STATUS: begin
mmio_tx.data <= 64'(state);
`ifdef DBG_PRINT_OPAE
if (state != state_t'(mmio_tx.data)) begin
if (state != STATE_WIDTH'(mmio_tx.data)) begin
$display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state);
end
`endif
@@ -349,14 +352,13 @@ end
// COMMAND FSM ////////////////////////////////////////////////////////////////
logic cmd_read_done;
logic cmd_write_done;
logic cmd_clflush_done;
logic cmd_csr_done;
logic cmd_run_done;
wire cmd_read_done;
wire cmd_write_done;
wire cmd_clflush_done;
wire cmd_csr_done;
wire cmd_run_done;
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
vx_reset <= 0;
@@ -479,27 +481,28 @@ end
// AVS Controller /////////////////////////////////////////////////////////////
logic vortex_enabled;
logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout;
wire vortex_enabled;
wire cci_rdq_empty;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
logic cci_dram_rd_req_fire;
logic cci_dram_wr_req_fire;
logic vx_dram_rd_req_fire;
wire cci_dram_rd_req_fire;
wire cci_dram_wr_req_fire;
wire vx_dram_rd_req_fire;
`DEBUG_BEGIN
logic vx_dram_wr_req_fire;
wire vx_dram_wr_req_fire;
`DEBUG_END
logic vx_dram_rd_rsp_fire;
wire vx_dram_rd_rsp_fire;
t_local_mem_byte_mask vx_dram_req_byteen_;
logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next;
logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads;
wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next;
wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
logic cci_dram_rd_req_enable, cci_dram_wr_req_enable;
logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
wire cci_dram_rd_req_enable, cci_dram_wr_req_enable;
wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
@@ -535,11 +538,10 @@ end else begin
assign vx_dram_req_byteen_ = vx_dram_req_byteen;
end
always_comb
begin
always @(*) begin
case (state)
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
endcase
@@ -550,8 +552,8 @@ begin
endcase
case (state)
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset;
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset;
endcase
end
@@ -560,8 +562,7 @@ assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size);
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset)
begin
mem_bank_select <= 0;
@@ -594,7 +595,7 @@ begin
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
@@ -654,7 +655,7 @@ VX_generic_queue #(
// AVS data read response queue ///////////////////////////////////////////////
logic cci_wr_req_fire;
wire cci_wr_req_fire;
assign avs_rdq_push = avs_readdatavalid;
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire;
@@ -676,31 +677,37 @@ VX_generic_queue #(
// CCI-P Read Request ///////////////////////////////////////////////////////////
logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next;
reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
t_ccip_clAddr cci_rd_req_addr;
t_cci_rdq_tag cci_rd_rsp_ctr;
logic cci_rd_req_fire, cci_rd_rsp_fire;
logic cci_rd_req_enable, cci_rd_req_wait;
wire cci_rd_req_fire, cci_rd_rsp_fire;
reg cci_rd_req_enable, cci_rd_req_wait;
logic cci_rdq_push, cci_rdq_pop;
t_cci_rdq_data cci_rdq_din;
wire cci_rdq_push, cci_rdq_pop;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
always_comb begin
always @(*) begin
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr));
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
assign cci_pending_reads_next = cci_pending_reads
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
@@ -709,8 +716,7 @@ assign cci_pending_reads_next = cci_pending_reads
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
// Send read requests to CCI
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
cci_rd_req_addr <= 0;
cci_rd_req_ctr <= 0;
@@ -738,21 +744,23 @@ begin
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr_next;
if (t_cci_rdq_tag'(cci_rd_req_ctr) == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
$display("*** %t: CCI Rd Rsp: STOP", $time);
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
`endif
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + t_cci_rdq_tag'(1);
if (cci_rd_rsp_ctr == $bits(t_cci_rdq_tag)'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
$display("*** %t: CCI Rd Rsp: START", $time);
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr);
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr);
`endif
end
@@ -763,12 +771,11 @@ begin
end
cci_pending_reads <= cci_pending_reads_next;
end
end
VX_generic_queue #(
.DATAW($bits(t_ccip_clData) + $bits(t_cci_rdq_tag)),
.DATAW(CCI_RD_RQ_DATAW),
.SIZE(CCI_RD_QUEUE_SIZE)
) cci_rd_req_queue (
.clk (clk),
@@ -782,14 +789,37 @@ VX_generic_queue #(
`UNUSED_PIN (size)
);
`DEBUG_BEGIN
reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask;
always @(posedge clk) begin
if (reset) begin
dbg_cci_rd_rsp_mask <= 0;
end else begin
if (cci_rd_rsp_fire) begin
if (cci_rd_rsp_ctr == 0) begin
dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag);
end else begin
if (dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] != 0) begin
$display("*** %t: Assert: CCI Rd Rsp: idx=%0d, ctr=%0d, mask=%0h, meta=%0h, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, dbg_cci_rd_rsp_mask, cp2af_sRxPort.c0.hdr.mdata, cp2af_sRxPort.c0.data);
assert(0);
end
dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1;
end
end
end
end
`DEBUG_END
// CCI-P Write Request //////////////////////////////////////////////////////////
logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes;
wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
t_ccip_clAddr cci_wr_req_addr;
logic cci_wr_req_enable, cci_wr_rsp_fire;
reg cci_wr_req_enable;
wire cci_wr_rsp_fire;
always_comb begin
always @(*) begin
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
@@ -808,7 +838,7 @@ assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty;
// Send write requests to CCI
always_ff @(posedge clk)
always @(posedge clk)
begin
if (reset) begin
cci_wr_req_addr <= 0;
@@ -833,7 +863,7 @@ begin
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout);
`endif
end
@@ -849,12 +879,12 @@ end
// Vortex cache snooping //////////////////////////////////////////////////////
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr;
wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next;
logic vx_snp_req_fire, vx_snp_rsp_fire;
wire vx_snp_req_fire, vx_snp_rsp_fire;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
@@ -872,8 +902,7 @@ assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(
assign cmd_clflush_done = (0 == snp_rsp_ctr);
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
vx_snp_req_valid <= 0;
vx_snp_req_addr <= 0;
@@ -911,7 +940,7 @@ begin
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
snp_req_ctr <= snp_req_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next));
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next));
`endif
end
@@ -928,7 +957,7 @@ end
// CSRs///////////////////////////////////////////////////////////////////////
logic csr_io_req_sent;
reg csr_io_req_sent;
assign vx_csr_io_req_valid = !csr_io_req_sent
&& ((STATE_CSR_READ == state || STATE_CSR_WRITE == state));
@@ -941,8 +970,7 @@ assign vx_csr_io_rsp_ready = 1;
assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid;
always_ff @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
csr_io_req_sent <= 0;
cmd_csr_rdata <= 0;

View File

@@ -6,11 +6,6 @@
///////////////////////////////////////////////////////////////////////////////
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)

View File

@@ -10,131 +10,24 @@ module VX_gpr_ram (
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
);
`ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
q1 <= mem[rs1];
q2 <= mem[rs2];
end
assign rs1_data = q1;
assign rs2_data = q2;
`else
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
assign write_bit_mask[i] = {32{~we[i]}};
end
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
wire [`NUM_THREADS-1:0][31:0] tmp_a;
wire [`NUM_THREADS-1:0][31:0] tmp_b;
`ifndef SYNTHESIS
for (integer i = 0; i < `NUM_THREADS; i++) begin
for (integer j = 0; j < 32; j++) begin
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
`else
assign rs1_data = tmp_a;
assign rs2_data = tmp_b;
`endif
for (integer i = 0; i < 'NT; i=i+4) begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
q1 <= mem[rs1];
q2 <= mem[rs2];
end
rf2_`NUM_GPRSx128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
`IGNORE_WARNINGS_END
end
`endif
assign rs1_data = q1;
assign rs2_data = q2;
endmodule

View File

@@ -20,8 +20,8 @@ module VX_icache_stage #(
);
`UNUSED_VAR (reset)
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;

View File

@@ -16,8 +16,8 @@ module VX_ipdom_stack #(
);
localparam STACK_SIZE = 2 ** DEPTH;
reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;

View File

@@ -52,7 +52,7 @@
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* ramstyle="mlab" *)
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////

View File

@@ -306,9 +306,9 @@ module VX_bank #(
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
//decides which request to deal with. Priority: 1) DRAM fill, 2) Miss reserve 3) Core req 4) Snp req
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
dfpq_pop_unqual ? dfpq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;

View File

@@ -56,7 +56,7 @@ module VX_cache_miss_resrv #(
output wire miss_resrv_is_snp_st0,
output wire miss_resrv_snp_invalidate_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MRVQ_SIZE-1:0] valid_table;
@@ -72,8 +72,8 @@ module VX_cache_miss_resrv #(
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
@@ -86,11 +86,11 @@ module VX_cache_miss_resrv #(
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
assign miss_resrv_valid_st0 = dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0,
miss_resrv_tid_st0,
miss_resrv_tag_st0,
@@ -98,7 +98,7 @@ module VX_cache_miss_resrv #(
miss_resrv_byteen_st0,
miss_resrv_wsel_st0,
miss_resrv_is_snp_st0,
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
miss_resrv_snp_invalidate_st0} = metadata_table;
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
@@ -125,7 +125,6 @@ module VX_cache_miss_resrv #(
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
@@ -155,6 +154,22 @@ module VX_cache_miss_resrv #(
end
end
VX_dp_ram #(
.DATAW(`MRVQ_METADATA_WIDTH),
.SIZE(MRVQ_SIZE),
.BYTEENW(1),
.BUFFERED(0),
.RWCHECK(1)
) metadata_ram (
.clk(clk),
.waddr(enqueue_index),
.raddr(dequeue_index),
.wren(mrvq_push),
.rden(1'b1),
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
.dout(metadata_table)
);
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin

View File

@@ -183,15 +183,15 @@ module VX_tag_data_access #(
if (valid_req_st1) begin
if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
end else begin
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
end
end else
if (miss_st1) begin
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
end else begin
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
end
end
end

View File

@@ -78,7 +78,7 @@ module VX_tag_data_store #(
.SIZE(`BANK_LINE_COUNT),
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
.BUFFERED(0),
.RWCHECK(0)
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(write_addr),

View File

@@ -6,6 +6,7 @@ module VX_dp_ram #(
parameter BYTEENW = 1,
parameter BUFFERED = 1,
parameter RWCHECK = 1,
parameter RWBYPASS = 0,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
@@ -29,19 +30,46 @@ module VX_dp_ram #(
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
if (rden)
dout_r <= mem[raddr];
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
if (rden)
dout_r <= mem[raddr];
end
end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : dout_r;
end else begin
assign dout = dout_r;
end
end else begin
@@ -65,7 +93,7 @@ module VX_dp_ram #(
end
end
`ifdef SYNTHESIS
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
@@ -89,13 +117,13 @@ module VX_dp_ram #(
end
assign dout = bypass_r ? din_r : mem[raddr];
`else
end else begin
assign dout = mem[raddr];
`endif
end
end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin

View File

@@ -85,7 +85,7 @@ module VX_generic_queue #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(0),
.RWCHECK(0)
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_a),

View File

@@ -36,8 +36,9 @@ module VX_scope #(
localparam GET_COUNT = 3'd3;
localparam GET_OFFSET = 3'd6;
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;

View File

@@ -44,7 +44,7 @@ gen-s:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
gen-sd:
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace $(DBG)
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG)
gen-st:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
@@ -53,7 +53,7 @@ gen-m:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md:
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace $(DBG)
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG)
gen-mt:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
@@ -77,11 +77,12 @@ build-mt: gen-mt
(cd obj_dir && make -j -f VVortex.mk)
run: run-s
run-s: build-s
(cd obj_dir && ./VVortex)
run-sd: build-sd
(cd obj_dir && valgrind ./VVortex)
(cd obj_dir && ./VVortex)
run-st: build-st
(cd obj_dir && ./VVortex)

View File

@@ -28,15 +28,11 @@ Simulator::Simulator() {
ram_ = nullptr;
vortex_ = new VVortex();
dram_rsp_active_ = false;
snp_req_active_ = false;
csr_req_active_ = false;
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
trace_ = new VerilatedFstC();
vortex_->trace(trace_, 99);
trace_->open("trace.vcd");
trace_->open("trace.fst");
#endif
// reset the device
@@ -66,27 +62,49 @@ void Simulator::reset() {
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
vortex_->reset = 1;
this->step();
vortex_->reset = 0;
print_bufs_.clear();
dram_rsp_vec_.clear();
dram_rsp_active_ = false;
snp_req_active_ = false;
csr_req_active_ = false;
snp_req_size_ = 0;
pending_snp_reqs_ = 0;
csr_rsp_value_ = nullptr;
vortex_->dram_rsp_valid = 0;
vortex_->dram_req_ready = 0;
vortex_->io_req_ready = 0;
vortex_->io_rsp_valid = 0;
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
vortex_->csr_io_req_valid = 0;
vortex_->csr_io_rsp_ready = 0;
vortex_->reset = 1;
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
vortex_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void Simulator::step() {
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
this->eval_dram_bus();
this->eval_io_bus();
this->eval_csr_bus();
this->eval_snp_bus();
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
}
void Simulator::eval() {
@@ -104,14 +122,13 @@ void Simulator::eval_dram_bus() {
}
// schedule DRAM responses
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
dram_rsp_vec_[i].cycles_left -= 1;
std::list<dram_req_t>::iterator dram_rsp_it(dram_rsp_vec_.end());
for (auto it = dram_rsp_vec_.begin(), ie = dram_rsp_vec_.end(); it != ie; ++it) {
if (it->cycles_left > 0) {
it->cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (dram_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
if ((dram_rsp_it == ie) && (it->cycles_left == 0)) {
dram_rsp_it = it;
}
}
@@ -122,11 +139,11 @@ void Simulator::eval_dram_bus() {
dram_rsp_active_ = false;
}
if (!dram_rsp_active_) {
if (dequeue_index != -1) {
if (dram_rsp_it != dram_rsp_vec_.end()) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_it->tag;
dram_rsp_vec_.erase(dram_rsp_it);
dram_rsp_active_ = true;
} else {
vortex_->dram_rsp_valid = 0;
@@ -161,7 +178,7 @@ void Simulator::eval_dram_bus() {
dram_req.cycles_left = DRAM_LATENCY;
dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_rsp_vec_.push_back(dram_req);
dram_rsp_vec_.emplace_back(dram_req);
}
}
}

View File

@@ -5,13 +5,14 @@
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#include <verilated_fst_c.h>
#endif
#include <VX_config.h>
#include "ram.h"
#include <ostream>
#include <list>
#include <vector>
#include <sstream>
#include <unordered_map>
@@ -62,7 +63,7 @@ private:
void eval_csr_bus();
void eval_snp_bus();
std::vector<dram_req_t> dram_rsp_vec_;
std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_;
bool snp_req_active_;
@@ -75,6 +76,6 @@ private:
RAM *ram_;
VVortex *vortex_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
VerilatedFstC *trace_;
#endif
};