From 18172fa611796847bedf5ddc3a5e142f4ff17d09 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 10 Sep 2021 01:36:01 -0700 Subject: [PATCH] AXI memory bus support --- ci/regression.sh | 3 + driver/rtlsim/Makefile | 7 +- hw/rtl/Vortex_axi.v | 124 +++++++++++++++++++++++ hw/rtl/libs/VX_axi_adapter.v | 88 ++++++++++++++++ hw/simulate/simulator.cpp | 191 +++++++++++++++++++++++++++++++++-- hw/simulate/simulator.h | 22 +++- 6 files changed, 425 insertions(+), 10 deletions(-) create mode 100644 hw/rtl/Vortex_axi.v create mode 100644 hw/rtl/libs/VX_axi_adapter.v diff --git a/ci/regression.sh b/ci/regression.sh index ccf0eab3..6f3afa74 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -72,6 +72,9 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood # using FPNEW FPU core FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood +# using AXI bus +AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo + # adjust l1 block size to match l2 CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1" diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 65eb1ac0..75b77884 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -28,7 +28,12 @@ CFLAGS += -DDUMP_PERF_STATS LDFLAGS += -shared -pthread #LDFLAGS += -dynamiclib -pthread -TOP = Vortex +ifdef AXI_BUS + TOP = Vortex_axi + CFLAGS += -DAXI_BUS +else + TOP = Vortex +endif RTL_DIR = ../../hw/rtl DPI_DIR = ../../hw/dpi diff --git a/hw/rtl/Vortex_axi.v b/hw/rtl/Vortex_axi.v new file mode 100644 index 00000000..48432203 --- /dev/null +++ b/hw/rtl/Vortex_axi.v @@ -0,0 +1,124 @@ +`include "VX_define.vh" + +module Vortex_axi #( + parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, + localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) +)( + // Clock + input wire clk, + input wire reset, + + // AXI write request + output wire m_axi_wvalid, + output wire m_axi_awvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_awid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [7:0] m_axi_awlen, + output wire [2:0] m_axi_awsize, + output wire [1:0] m_axi_awburst, + output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, + output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, + input wire m_axi_wready, + input wire m_axi_awready, + + // AXI read request + output wire m_axi_arvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_arid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output wire [7:0] m_axi_arlen, + output wire [2:0] m_axi_arsize, + output wire [1:0] m_axi_arburst, + input wire m_axi_arready, + + // AXI read response + input wire m_axi_rvalid, + input wire [AXI_TID_WIDTH-1:0] m_axi_rid, + input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + output wire m_axi_rready, + + // Status + output wire busy +); + wire mem_req_valid; + wire mem_req_rw; + wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen; + wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr; + wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data; + wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag; + wire mem_req_ready; + + wire mem_rsp_valid; + wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data; + wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag; + wire mem_rsp_ready; + + VX_axi_adapter #( + .VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_TID_WIDTH (AXI_TID_WIDTH) + ) axi_adapter ( + .mem_req_valid (mem_req_valid), + .mem_req_rw (mem_req_rw), + .mem_req_byteen (mem_req_byteen), + .mem_req_addr (mem_req_addr), + .mem_req_data (mem_req_data), + .mem_req_tag (mem_req_tag), + .mem_req_ready (mem_req_ready), + + .mem_rsp_valid (mem_rsp_valid), + .mem_rsp_data (mem_rsp_data), + .mem_rsp_tag (mem_rsp_tag), + .mem_rsp_ready (mem_rsp_ready), + + .m_axi_wvalid (m_axi_wvalid), + .m_axi_awvalid (m_axi_awvalid), + .m_axi_awid (m_axi_awid), + .m_axi_awaddr (m_axi_awaddr), + .m_axi_awlen (m_axi_awlen), + .m_axi_awsize (m_axi_awsize), + .m_axi_awburst (m_axi_awburst), + .m_axi_wdata (m_axi_wdata), + .m_axi_wstrb (m_axi_wstrb), + .m_axi_wready (m_axi_wready), + .m_axi_awready (m_axi_awready), + + .m_axi_arvalid (m_axi_arvalid), + .m_axi_arid (m_axi_arid), + .m_axi_araddr (m_axi_araddr), + .m_axi_arlen (m_axi_arlen), + .m_axi_arsize (m_axi_arsize), + .m_axi_arburst (m_axi_arburst), + .m_axi_arready (m_axi_arready), + + .m_axi_rvalid (m_axi_rvalid), + .m_axi_rid (m_axi_rid), + .m_axi_rdata (m_axi_rdata), + .m_axi_rready (m_axi_rready) + ); + + Vortex vortex ( + .clk (clk), + .reset (reset), + + .mem_req_valid (mem_req_valid), + .mem_req_rw (mem_req_rw), + .mem_req_byteen (mem_req_byteen), + .mem_req_addr (mem_req_addr), + .mem_req_data (mem_req_data), + .mem_req_tag (mem_req_tag), + .mem_req_ready (mem_req_ready), + + .mem_rsp_valid (mem_rsp_valid), + .mem_rsp_data (mem_rsp_data), + .mem_rsp_tag (mem_rsp_tag), + .mem_rsp_ready (mem_rsp_ready), + + .busy (busy) + ); + +endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_axi_adapter.v b/hw/rtl/libs/VX_axi_adapter.v new file mode 100644 index 00000000..6652401d --- /dev/null +++ b/hw/rtl/libs/VX_axi_adapter.v @@ -0,0 +1,88 @@ +`include "VX_define.vh" + +module VX_axi_adapter #( + parameter VX_DATA_WIDTH = 512, + parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)), + parameter VX_TAG_WIDTH = 8, + parameter AXI_DATA_WIDTH = VX_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = VX_TAG_WIDTH, + + localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8), + localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) +) ( + // Vortex request + input wire mem_req_valid, + input wire mem_req_rw, + input wire [VX_BYTEEN_WIDTH-1:0] mem_req_byteen, + input wire [VX_ADDR_WIDTH-1:0] mem_req_addr, + input wire [VX_DATA_WIDTH-1:0] mem_req_data, + input wire [VX_TAG_WIDTH-1:0] mem_req_tag, + + // Vortex response + input wire mem_rsp_ready, + output wire mem_rsp_valid, + output wire [VX_DATA_WIDTH-1:0] mem_rsp_data, + output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag, + output wire mem_req_ready, + + // AXI write request + output wire m_axi_wvalid, + output wire m_axi_awvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_awid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [7:0] m_axi_awlen, + output wire [2:0] m_axi_awsize, + output wire [1:0] m_axi_awburst, + output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, + output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, + input wire m_axi_wready, + input wire m_axi_awready, + + // AXI read request + output wire m_axi_arvalid, + output wire [AXI_TID_WIDTH-1:0] m_axi_arid, + output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, + output wire [7:0] m_axi_arlen, + output wire [2:0] m_axi_arsize, + output wire [1:0] m_axi_arburst, + input wire m_axi_arready, + + // AXI read response + input wire m_axi_rvalid, + input wire [AXI_TID_WIDTH-1:0] m_axi_rid, + input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + output wire m_axi_rready +); + localparam AXSIZE = $clog2(VX_DATA_WIDTH/8); + + `STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter")) + `STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter")) + + // AXI write channel + assign m_axi_wvalid = mem_req_valid & mem_req_rw; + assign m_axi_awvalid = mem_req_valid & mem_req_rw; + assign m_axi_awid = mem_req_tag; + assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; + assign m_axi_awlen = 8'b00000000; + assign m_axi_awsize = 3'(AXSIZE); + assign m_axi_awburst = 2'b00; + assign m_axi_wdata = mem_req_data; + assign m_axi_wstrb = mem_req_byteen; + + // AXI read channel + assign m_axi_arvalid = mem_req_valid & ~mem_req_rw; + assign m_axi_arid = mem_req_tag; + assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; + assign m_axi_arlen = 8'b00000000; + assign m_axi_arsize = 3'(AXSIZE); + assign m_axi_arburst = 2'b00; + assign m_axi_rready = mem_rsp_ready; + + // Vortex inputs + assign mem_rsp_valid = m_axi_rvalid; + assign mem_rsp_tag = m_axi_rid; + assign mem_rsp_data = m_axi_rdata; + assign mem_req_ready = mem_req_rw ? (m_axi_awready && m_axi_wready) : m_axi_arready; + +endmodule \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index ccb37bf2..6adf457c 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -66,7 +66,12 @@ Simulator::Simulator() { Verilated::assertOn(false); ram_ = nullptr; + +#ifdef AXI_BUS + vortex_ = new VVortex_axi(); +#else vortex_ = new VVortex(); +#endif #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -103,15 +108,18 @@ void Simulator::attach_ram(RAM* ram) { void Simulator::reset() { print_bufs_.clear(); + for (int b = 0; b < MEMORY_BANKS; ++b) { mem_rsp_vec_[b].clear(); } last_mem_rsp_bank_ = 0; - mem_rsp_active_ = false; - vortex_->mem_rsp_valid = 0; - vortex_->mem_req_ready = 0; +#ifdef AXI_BUS + this->reset_axi_bus(); +#else + this->reset_mem_bus(); +#endif vortex_->reset = 1; @@ -133,12 +141,20 @@ void Simulator::step() { vortex_->clk = 0; this->eval(); - mem_rsp_ready_ = vortex_->mem_rsp_ready; - +#ifdef AXI_BUS + this->eval_axi_bus(0); +#else + this->eval_mem_bus(0); +#endif + vortex_->clk = 1; this->eval(); - this->eval_mem_bus(); +#ifdef AXI_BUS + this->eval_axi_bus(1); +#else + this->eval_mem_bus(1); +#endif #ifndef NDEBUG fflush(stdout); @@ -155,7 +171,158 @@ void Simulator::eval() { ++timestamp; } -void Simulator::eval_mem_bus() { +#ifdef AXI_BUS + +void Simulator::reset_axi_bus() { + vortex_->m_axi_wready = 0; + vortex_->m_axi_awready = 0; + vortex_->m_axi_arready = 0; + vortex_->m_axi_rvalid = 0; +} + +void Simulator::eval_axi_bus(bool clk) { + if (!clk) { + mem_rsp_ready_ = vortex_->m_axi_rready; + return; + } + if (ram_ == nullptr) { + vortex_->m_axi_wready = 0; + vortex_->m_axi_awready = 0; + vortex_->m_axi_arready = 0; + return; + } + + // update memory responses schedule + for (int b = 0; b < MEMORY_BANKS; ++b) { + for (auto& rsp : mem_rsp_vec_[b]) { + if (rsp.cycles_left > 0) + rsp.cycles_left -= 1; + } + } + + bool has_response = false; + + // schedule memory responses that are ready + for (int i = 0; i < MEMORY_BANKS; ++i) { + uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS; + if (!mem_rsp_vec_[b].empty() + && (mem_rsp_vec_[b].begin()->cycles_left) <= 0) { + has_response = true; + last_mem_rsp_bank_ = b; + break; + } + } + + // send memory response + if (mem_rsp_active_ + && vortex_->m_axi_rvalid && mem_rsp_ready_) { + mem_rsp_active_ = false; + } + if (!mem_rsp_active_) { + if (has_response) { + vortex_->m_axi_rvalid = 1; + std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + /* + printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); + vortex_->m_axi_rid = mem_rsp_it->tag; + mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); + mem_rsp_active_ = true; + } else { + vortex_->m_axi_rvalid = 0; + } + } + + // select the memory bank + uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr; + uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0; + + // handle memory stalls + bool mem_stalled = false; +#ifdef ENABLE_MEM_STALLS + if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) { + mem_stalled = true; + } else + if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) { + mem_stalled = true; + } +#endif + + // process memory requests + if (!mem_stalled) { + if (vortex_->m_axi_wvalid || vortex_->m_axi_arvalid) { + if (vortex_->m_axi_wvalid) { + uint64_t byteen = vortex_->m_axi_wstrb; + unsigned base_addr = vortex_->m_axi_awaddr; + uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata); + if (base_addr >= IO_COUT_ADDR + && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + auto& ss_buf = print_bufs_[i]; + char c = data[i]; + ss_buf << c; + if (c == '\n') { + std::cout << std::dec << "#" << i << ": " << ss_buf.str() << std::flush; + ss_buf.str(""); + } + } + } + } else { + /* + printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, base_addr, byteen); + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]); + } + printf("\n"); + */ + for (int i = 0; i < MEM_BLOCK_SIZE; i++) { + if ((byteen >> i) & 0x1) { + (*ram_)[base_addr + i] = data[i]; + } + } + } + } else { + mem_req_t mem_req; + mem_req.tag = vortex_->m_axi_arid; + mem_req.addr = vortex_->m_axi_araddr; + ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data()); + mem_req.cycles_left = MEM_LATENCY; + for (auto& rsp : mem_rsp_vec_[req_bank]) { + if (mem_req.addr == rsp.addr) { + // duplicate requests receive the same cycle delay + mem_req.cycles_left = rsp.cycles_left; + break; + } + } + mem_rsp_vec_[req_bank].emplace_back(mem_req); + } + } + } + + vortex_->m_axi_wready = !mem_stalled; + vortex_->m_axi_awready = !mem_stalled; + vortex_->m_axi_arready = !mem_stalled; +} + +#else + +void Simulator::reset_mem_bus() { + vortex_->mem_req_ready = 0; + vortex_->mem_rsp_valid = 0; +} + +void Simulator::eval_mem_bus(bool clk) { + if (!clk) { + mem_rsp_ready_ = vortex_->mem_rsp_ready; + return; + } + if (ram_ == nullptr) { vortex_->mem_req_ready = 0; return; @@ -276,6 +443,8 @@ void Simulator::eval_mem_bus() { vortex_->mem_req_ready = !mem_stalled; } +#endif + void Simulator::wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->step(); @@ -309,11 +478,19 @@ int Simulator::run() { } bool Simulator::get_ebreak() const { +#ifdef AXI_BUS + return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; +#else return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak; +#endif } int Simulator::get_last_wb_value(int reg) const { +#ifdef AXI_BUS + return (int)vortex_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; +#else return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg]; +#endif } void Simulator::load_bin(const char* program_file) { diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index fe64babe..d867ea83 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -1,8 +1,14 @@ #pragma once #include + +#ifdef AXI_BUS +#include "VVortex_axi.h" +#include "VVortex_axi__Syms.h" +#else #include "VVortex.h" #include "VVortex__Syms.h" +#endif #ifdef VCD_OUTPUT #include @@ -58,8 +64,14 @@ private: std::unordered_map print_bufs_; void eval(); - - void eval_mem_bus(); + +#ifdef AXI_BUS + void reset_axi_bus(); + void eval_axi_bus(bool clk); +#else + void reset_mem_bus(); + void eval_mem_bus(bool clk); +#endif int get_last_wb_value(int reg) const; @@ -73,7 +85,13 @@ private: bool mem_rsp_ready_; RAM *ram_; + +#ifdef AXI_BUS + VVortex_axi *vortex_; +#else VVortex *vortex_; +#endif + #ifdef VCD_OUTPUT VerilatedVcdC *trace_; #endif