fix l2 cache issues

This commit is contained in:
Blaise Tine
2020-06-04 18:34:14 -04:00
parent ea890b457d
commit 171d46b501
11 changed files with 300 additions and 260 deletions

View File

@@ -11,11 +11,13 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
Verilated::randReset(2);
Verilated::randReset(1);
ram_ = nullptr;
vortex_ = new VVortex_Socket();
snp_req_active_ = false;
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC;
@@ -36,6 +38,214 @@ void Simulator::attach_ram(RAM* ram) {
dram_rsp_vec_.clear();
}
void Simulator::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
vortex_->reset = 1;
this->step();
vortex_->reset = 0;
dram_rsp_vec_.clear();
}
void Simulator::step() {
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
this->eval_dram_bus();
this->eval_io_bus();
this->eval_snp_bus();
}
void Simulator::eval() {
vortex_->eval();
#ifdef VCD_OUTPUT
trace_->dump(timestamp);
#endif
++timestamp;
}
void Simulator::eval_dram_bus() {
if (ram_ == nullptr) {
vortex_->dram_req_ready = 0;
return;
}
// handle DRAM response cycle
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
dram_rsp_vec_[i].cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (dram_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
}
}
// handle DRAM response message
if ((dequeue_index != -1)
&& vortex_->dram_rsp_ready) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
} else {
vortex_->dram_rsp_valid = 0;
}
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
} else
if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) {
dram_stalled = true;
}
#endif
// handle DRAM requests
if (!dram_stalled) {
if (vortex_->dram_req_valid) {
if (vortex_->dram_req_rw) {
uint64_t byteen = vortex_->dram_req_byteen;
unsigned base_addr = (vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vortex_->dram_req_data);
for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data);
dram_rsp_vec_.push_back(dram_req);
}
}
}
vortex_->dram_req_ready = ~dram_stalled;
}
void Simulator::eval_io_bus() {
if (vortex_->io_req_valid
&& vortex_->io_req_rw
&& ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write;
std::cout << c;
}
vortex_->io_req_ready = 1;
vortex_->io_rsp_valid = 0;
}
void Simulator::eval_snp_bus() {
if (snp_req_active_) {
if (vortex_->snp_rsp_valid) {
assert(pending_snp_reqs_ > 0);
--pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs_ << std::endl;
#endif
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
if (snp_req_size_) {
vortex_->snp_req_addr += 1;
vortex_->snp_req_tag += 1;
--snp_req_size_;
++pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
} else {
vortex_->snp_req_valid = 0;
}
}
if (!vortex_->snp_req_valid
&& 0 == pending_snp_reqs_) {
snp_req_active_ = false;
}
} else {
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
}
}
void Simulator::wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
}
}
bool Simulator::is_busy() {
return vortex_->busy || snp_req_active_;
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
#endif
if (0 == size)
return;
snp_req_active_ = true;
snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE;
vortex_->snp_req_tag = 0;
vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1;
--snp_req_size_;
pending_snp_reqs_ = 1;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
}
bool Simulator::run() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl;
#endif
// reset the device
this->reset();
// execute program
while (vortex_->busy
&& !vortex_->ebreak) {
this->step();
}
// wait 5 cycles to flush the pipeline
this->wait(5);
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#endif
#endif
return (status == 1);
}
void Simulator::load_bin(const char* program_file) {
if (ram_ == nullptr)
return;
@@ -123,202 +333,4 @@ void Simulator::load_ihex(const char* program_file) {
void Simulator::print_stats(std::ostream& out) {
out << std::left;
out << std::setw(24) << "# of total cycles:" << std::dec << timestamp/2 << std::endl;
}
void Simulator::dbus_driver() {
if (ram_ == nullptr) {
vortex_->dram_req_ready = 0;
return;
}
// handle DRAM response cycle
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
dram_rsp_vec_[i].cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (dram_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
}
}
// handle DRAM response message
if ((dequeue_index != -1)
&& vortex_->dram_rsp_ready) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
} else {
vortex_->dram_rsp_valid = 0;
}
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
} else
if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) {
dram_stalled = true;
}
#endif
// handle DRAM requests
if (!dram_stalled) {
if (vortex_->dram_req_valid) {
if (vortex_->dram_req_rw) {
uint64_t byteen = vortex_->dram_req_byteen;
unsigned base_addr = (vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vortex_->dram_req_data);
for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
dram_req.tag = vortex_->dram_req_tag;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data);
dram_rsp_vec_.push_back(dram_req);
}
}
}
vortex_->dram_req_ready = ~dram_stalled;
}
void Simulator::io_driver() {
if (vortex_->io_req_valid
&& vortex_->io_req_rw
&& ((vortex_->io_req_addr << 2) == IO_BUS_ADDR_COUT)) {
uint32_t data_write = (uint32_t)vortex_->io_req_data;
char c = (char)data_write;
std::cout << c;
}
vortex_->io_req_ready = 1;
vortex_->io_rsp_valid = 0;
}
void Simulator::reset() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] reset()" << std::endl;
#endif
vortex_->reset = 1;
this->step();
vortex_->reset = 0;
dram_rsp_vec_.clear();
}
void Simulator::step() {
vortex_->clk = 0;
this->eval();
vortex_->clk = 1;
this->eval();
dbus_driver();
io_driver();
}
void Simulator::eval() {
vortex_->eval();
#ifdef VCD_OUTPUT
trace_->dump(timestamp);
#endif
++timestamp;
}
void Simulator::wait(uint32_t cycles) {
for (int i = 0; i < cycles; ++i) {
this->step();
}
}
bool Simulator::is_busy() {
return vortex_->busy;
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] flush_caches()" << std::endl;
#endif
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
// submit snoop requests for the needed blocks
vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_tag = 0;
vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1;
int pending_snp_reqs = 1;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << (aligned_addr_end - vortex_->snp_req_addr - 1) << std::endl;
#endif
for (;;) {
this->step();
if (vortex_->snp_rsp_valid) {
assert(pending_snp_reqs > 0);
--pending_snp_reqs;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp rsp: tag=" << vortex_->snp_rsp_tag << " pending=" << pending_snp_reqs << std::endl;
#endif
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
if (vortex_->snp_req_addr + 1 < aligned_addr_end) {
vortex_->snp_req_addr += 1;
vortex_->snp_req_tag += 1;
++pending_snp_reqs;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << (aligned_addr_end - vortex_->snp_req_addr - 1) << std::endl;
#endif
} else {
vortex_->snp_req_valid = 0;
}
}
if (!vortex_->snp_req_valid
&& 0 == pending_snp_reqs) {
break;
}
}
}
bool Simulator::run() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl;
#endif
// reset the device
this->reset();
// execute program
while (vortex_->busy
&& !vortex_->ebreak) {
this->step();
}
// wait 5 cycles to flush the pipeline
this->wait(5);
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#endif
#endif
return (status == 1);
}