vlsim fix, verilator fst trace, use ram optimization

This commit is contained in:
Blaise Tine
2020-10-25 16:40:50 -07:00
parent 81dc8c7279
commit 43ae82e788
23 changed files with 424 additions and 422 deletions

View File

@@ -15,13 +15,13 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#DEBUG=1
#SCOPE=1
@@ -58,7 +58,7 @@ VL_FLAGS += verilator.vlt
# Debugigng
ifdef DEBUG
VL_FLAGS += -DVCD_OUTPUT --assert --trace $(DBG_FLAGS)
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG

View File

@@ -31,9 +31,9 @@ opae_sim::opae_sim() {
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC();
trace_ = new VerilatedFstC();
vortex_afu_->trace(trace_, 99);
trace_->open("trace.vcd");
trace_->open("trace.fst");
#endif
this->reset();
@@ -85,6 +85,19 @@ void opae_sim::get_io_address(uint64_t wsid, uint64_t *ioaddr) {
*ioaddr = host_buffers_[wsid].ioaddr;
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
}
void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
@@ -94,20 +107,7 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, &value, 8);
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid);
}
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
vortex_afu_->vcp2af_sRxPort_c0_ReqMmioHdr_tid = 0;
this->step();
assert(!vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid);
assert(vortex_afu_->af2cp_sTxPort_c2_mmioRdValid);
*value = vortex_afu_->af2cp_sTxPort_c2_data;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}
void opae_sim::flush() {
@@ -117,24 +117,41 @@ void opae_sim::flush() {
///////////////////////////////////////////////////////////////////////////////
void opae_sim::reset() {
vortex_afu_->reset = 1;
this->step();
vortex_afu_->reset = 0;
host_buffers_.clear();
dram_reads_.clear();
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
vortex_afu_->avs_readdatavalid = 0;
vortex_afu_->avs_waitrequest = 0;
vortex_afu_->reset = 1;
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
vortex_afu_->reset = 0;
// Turn on assertion after reset
Verilated::assertOn(true);
}
void opae_sim::step() {
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
this->sRxPort_bus();
this->sTxPort_bus();
this->avs_bus();
vortex_afu_->clk = 0;
this->eval();
vortex_afu_->clk = 1;
this->eval();
#ifndef NDEBUG
fflush(stdout);
@@ -149,100 +166,105 @@ void opae_sim::eval() {
++timestamp;
}
void opae_sim::sRxPort_bus() {
void opae_sim::sRxPort_bus() {
// check mmio request
bool mmio_req_enabled = vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid
|| vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid;
// schedule CCI read responses
int cci_rd_index = -1;
for (int i = 0; i < cci_reads_.size(); i++) {
if (cci_reads_[i].cycles_left > 0) {
cci_reads_[i].cycles_left -= 1;
}
if ((cci_rd_index == -1)
&& (cci_reads_[i].cycles_left == 0)) {
cci_rd_index = i;
std::list<cci_rd_req_t>::iterator cci_rd_it(cci_reads_.end());
for (auto it = cci_reads_.begin(), ie = cci_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_rd_it == ie) && (it->cycles_left == 0)) {
cci_rd_it = it;
}
}
// schedule CCI write responses
int cci_wr_index = -1;
for (int i = 0; i < cci_writes_.size(); i++) {
if (cci_writes_[i].cycles_left > 0) {
cci_writes_[i].cycles_left -= 1;
std::list<cci_wr_req_t>::iterator cci_wr_it(cci_writes_.end());
for (auto it = cci_writes_.begin(), ie = cci_writes_.end(); it != ie; ++it) {
if (it->cycles_left > 0)
it->cycles_left -= 1;
if ((cci_wr_it == ie) && (it->cycles_left == 0)) {
cci_wr_it = it;
}
if ((cci_wr_index == -1)
&& (cci_writes_[i].cycles_left == 0)) {
cci_wr_index = i;
}
}
// send CCI read response
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (cci_rd_index != -1) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_reads_[cci_rd_index].block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_reads_[cci_rd_index].mdata;
cci_reads_.erase(cci_reads_.begin() + cci_rd_index);
}
// send CCI write response
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
if (cci_wr_index != -1) {
if (cci_wr_it != cci_writes_.end()) {
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 1;
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_writes_[cci_wr_index].mdata;
cci_writes_.erase(cci_writes_.begin() + cci_wr_index);
vortex_afu_->vcp2af_sRxPort_c1_hdr_mdata = cci_wr_it->mdata;
cci_writes_.erase(cci_wr_it);
}
// mmio
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
// send CCI read response (ensure mmio disabled)
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
if (!mmio_req_enabled
&& (cci_rd_it != cci_reads_.end())) {
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 1;
memcpy(vortex_afu_->vcp2af_sRxPort_c0_data, cci_rd_it->block.data(), CACHE_BLOCK_SIZE);
vortex_afu_->vcp2af_sRxPort_c0_hdr_mdata = cci_rd_it->mdata;
printf("*** [vlsim] read-rsp: addr=%ld, mdata=%d, data=", cci_rd_it->addr, cci_rd_it->mdata);
for (int i = 0; i < CACHE_BLOCK_SIZE; ++i) {
printf("%02x", cci_rd_it->block[CACHE_BLOCK_SIZE-1-i]);
}
printf("\n");
fflush(stdout);
cci_reads_.erase(cci_rd_it);
}
}
void opae_sim::sTxPort_bus() {
// check read queue size
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= CCI_RQ_SIZE);
// check write queue size
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= CCI_WQ_SIZE);
// process read requests
if (vortex_afu_->af2cp_sTxPort_c0_valid && !vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c0_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull);
cci_rd_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.addr = vortex_afu_->af2cp_sTxPort_c0_hdr_address;
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c0_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c0_hdr_address * CACHE_BLOCK_SIZE);
memcpy(cci_req.block.data(), host_ptr, CACHE_BLOCK_SIZE);
cci_reads_.push_back(cci_req);
printf("*** [vlsim] read-req: addr=%ld, mdata=%d\n", vortex_afu_->af2cp_sTxPort_c0_hdr_address, cci_req.mdata);
fflush(stdout);
cci_reads_.emplace_back(cci_req);
}
// process write requests
if (vortex_afu_->af2cp_sTxPort_c1_valid && !vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull) {
if (vortex_afu_->af2cp_sTxPort_c1_valid) {
assert(!vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull);
cci_wr_req_t cci_req;
cci_req.cycles_left = CCI_LATENCY + (timestamp % CCI_RAND_MOD);
cci_req.mdata = vortex_afu_->af2cp_sTxPort_c1_hdr_mdata;
auto host_ptr = (uint64_t*)(vortex_afu_->af2cp_sTxPort_c1_hdr_address * CACHE_BLOCK_SIZE);
memcpy(host_ptr, vortex_afu_->af2cp_sTxPort_c1_data, CACHE_BLOCK_SIZE);
cci_writes_.push_back(cci_req);
cci_writes_.emplace_back(cci_req);
}
// check queues overflow
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = (cci_reads_.size() >= (CCI_RQ_SIZE-1));
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = (cci_writes_.size() >= (CCI_WQ_SIZE-1));
}
void opae_sim::avs_bus() {
// schedule DRAM read responses
int dram_rd_index = -1;
for (int i = 0; i < dram_reads_.size(); i++) {
if (dram_reads_[i].cycles_left > 0) {
dram_reads_[i].cycles_left -= 1;
std::list<dram_rd_req_t>::iterator dram_rd_it(dram_reads_.end());
for (auto it = dram_reads_.begin(), ie = dram_reads_.end(); it != ie; ++it) {
if (it->cycles_left > 0) {
it->cycles_left -= 1;
}
if ((dram_rd_index == -1)
&& (dram_reads_[i].cycles_left == 0)) {
dram_rd_index = i;
if ((it != ie) && (it->cycles_left == 0)) {
dram_rd_it = it;
}
}
// send DRAM response
vortex_afu_->avs_readdatavalid = 0;
if (dram_rd_index != -1) {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_reads_[dram_rd_index].block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_reads_.begin() + dram_rd_index);
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
dram_reads_.erase(dram_rd_it);
}
// handle DRAM stalls
@@ -275,7 +297,7 @@ void opae_sim::avs_bus() {
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_reads_.push_back(dram_req);
dram_reads_.emplace_back(dram_req);
}
}

View File

@@ -5,7 +5,7 @@
#include "verilated.h"
#ifdef VCD_OUTPUT
#include <verilated_vcd_c.h>
#include <verilated_fst_c.h>
#endif
#include <VX_config.h>
@@ -13,7 +13,7 @@
#include <ostream>
#include <future>
#include <vector>
#include <list>
#include <unordered_map>
#define CACHE_BLOCK_SIZE 64
@@ -41,18 +41,19 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned tag;
uint32_t tag;
} dram_rd_req_t;
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
unsigned mdata;
uint64_t addr;
uint32_t mdata;
} cci_rd_req_t;
typedef struct {
int cycles_left;
unsigned mdata;
uint32_t mdata;
} cci_wr_req_t;
typedef struct {
@@ -76,17 +77,17 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
std::vector<dram_rd_req_t> dram_reads_;
std::list<dram_rd_req_t> dram_reads_;
std::vector<cci_rd_req_t> cci_reads_;
std::list<cci_rd_req_t> cci_reads_;
std::vector<cci_wr_req_t> cci_writes_;
std::list<cci_wr_req_t> cci_writes_;
std::mutex mutex_;
RAM ram_;
Vvortex_afu_shim *vortex_afu_;
#ifdef VCD_OUTPUT
VerilatedVcdC *trace_;
VerilatedFstC *trace_;
#endif
};

View File

@@ -1,6 +1,6 @@
#pragma once
//#define HANG_TIMEOUT 60
#define HANG_TIMEOUT 60
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);

View File

@@ -1,7 +1,7 @@
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
OPTS ?= -n32
OPTS ?= -n64
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++