From 9f34b2944cb07f254cf92b59e0873b7decc419a9 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 27 Sep 2021 08:55:10 -0400 Subject: [PATCH] code refactoring for Vivado, sv2v, and yosys compatibility --- ci/regression.sh | 4 +- driver/opae/vlsim/Makefile | 4 +- driver/rtlsim/Makefile | 4 +- hw/rtl/VX_alu_unit.v | 10 +- hw/rtl/VX_cluster.v | 50 ++++---- hw/rtl/VX_commit.v | 25 ++-- hw/rtl/VX_config.vh | 124 ++++++++++---------- hw/rtl/VX_core.v | 34 +++--- hw/rtl/VX_csr_data.v | 19 ++-- hw/rtl/VX_csr_unit.v | 22 ++-- hw/rtl/VX_decode.v | 8 +- hw/rtl/VX_define.vh | 117 +++++++++---------- hw/rtl/VX_execute.v | 40 +++---- hw/rtl/VX_fetch.v | 18 +-- hw/rtl/VX_fpu_unit.v | 6 +- hw/rtl/VX_gpr_stage.v | 10 +- hw/rtl/VX_gpu_unit.v | 10 +- hw/rtl/VX_ibuffer.v | 4 +- hw/rtl/VX_icache_stage.v | 12 +- hw/rtl/VX_instr_demux.v | 18 +-- hw/rtl/VX_issue.v | 16 +-- hw/rtl/VX_lsu_unit.v | 24 ++-- hw/rtl/VX_mem_arb.v | 6 +- hw/rtl/VX_mem_unit.v | 144 ++++++++++++------------ hw/rtl/VX_pipeline.v | 18 +-- hw/rtl/VX_platform.vh | 29 ++++- hw/rtl/VX_scoreboard.v | 21 ++-- hw/rtl/VX_smem_arb.v | 8 +- hw/rtl/VX_warp_sched.v | 27 ++--- hw/rtl/VX_writeback.v | 14 +-- hw/rtl/Vortex.v | 46 ++++---- hw/rtl/Vortex_axi.v | 65 +++++++---- hw/rtl/afu/VX_avs_wrapper.v | 4 +- hw/rtl/afu/vortex_afu.sv | 2 +- hw/rtl/cache/VX_bank.v | 17 +-- hw/rtl/cache/VX_cache.v | 58 +++++----- hw/rtl/cache/VX_cache_define.vh | 4 +- hw/rtl/cache/VX_core_req_bank_sel.v | 6 +- hw/rtl/cache/VX_data_access.v | 2 +- hw/rtl/cache/VX_miss_resrv.v | 13 ++- hw/rtl/cache/VX_nc_bypass.v | 8 +- hw/rtl/cache/VX_shared_mem.v | 20 ++-- hw/rtl/interfaces/VX_alu_req_if.v | 38 +++++++ hw/rtl/interfaces/VX_branch_ctl_if.v | 14 +++ hw/rtl/interfaces/VX_cmt_to_csr_if.v | 10 ++ hw/rtl/interfaces/VX_commit_if.v | 26 ++++- hw/rtl/interfaces/VX_csr_req_if.v | 30 +++++ hw/rtl/interfaces/VX_dcache_req_if.v | 20 ++++ hw/rtl/interfaces/VX_dcache_rsp_if.v | 16 +++ hw/rtl/interfaces/VX_decode_if.v | 38 +++++++ hw/rtl/interfaces/VX_fetch_to_csr_if.v | 8 ++ hw/rtl/interfaces/VX_fpu_req_if.v | 30 +++++ hw/rtl/interfaces/VX_fpu_to_csr_if.v | 16 +++ hw/rtl/interfaces/VX_gpr_req_if.v | 16 ++- hw/rtl/interfaces/VX_gpr_rsp_if.v | 12 ++ hw/rtl/interfaces/VX_gpu_req_if.v | 30 +++++ hw/rtl/interfaces/VX_ibuffer_if.v | 54 ++++++++- hw/rtl/interfaces/VX_icache_req_if.v | 14 +++ hw/rtl/interfaces/VX_icache_rsp_if.v | 16 ++- hw/rtl/interfaces/VX_ifetch_req_if.v | 16 +++ hw/rtl/interfaces/VX_ifetch_rsp_if.v | 18 +++ hw/rtl/interfaces/VX_join_if.v | 10 ++ hw/rtl/interfaces/VX_lsu_req_if.v | 30 +++++ hw/rtl/interfaces/VX_mem_req_if.v | 20 ++++ hw/rtl/interfaces/VX_mem_rsp_if.v | 16 ++- hw/rtl/interfaces/VX_perf_cache_if.v | 22 ++++ hw/rtl/interfaces/VX_perf_memsys_if.v | 44 ++++++++ hw/rtl/interfaces/VX_perf_pipeline_if.v | 28 ++++- hw/rtl/interfaces/VX_warp_ctl_if.v | 18 +++ hw/rtl/interfaces/VX_writeback_if.v | 26 ++++- hw/rtl/interfaces/VX_wstall_if.v | 12 ++ hw/rtl/libs/VX_axi_adapter.v | 59 ++++++---- hw/rtl/libs/VX_bypass_buffer.v | 2 +- hw/rtl/libs/VX_dp_ram.v | 4 +- hw/rtl/libs/VX_fifo_queue.v | 8 +- hw/rtl/libs/VX_find_first.v | 2 +- hw/rtl/libs/VX_index_buffer.v | 4 +- hw/rtl/libs/VX_index_queue.v | 6 +- hw/rtl/libs/VX_lzc.v | 6 +- hw/rtl/libs/VX_pending_size.v | 2 +- hw/rtl/libs/VX_skid_buffer.v | 4 +- hw/rtl/libs/VX_sp_ram.v | 4 +- hw/rtl/libs/VX_stream_demux.v | 2 +- hw/simulate/Makefile | 4 +- hw/simulate/simulator.cpp | 95 +++++++++++----- hw/simulate/simulator.h | 9 +- hw/syn/opae/Makefile | 10 +- hw/syn/quartus/top16/Makefile | 2 +- hw/syn/quartus/top32/Makefile | 2 +- hw/syn/quartus/top4/Makefile | 2 +- hw/syn/quartus/top64/Makefile | 2 +- hw/syn/quartus/top8/Makefile | 2 +- hw/syn/yosys/Makefile | 11 +- hw/syn/yosys/diagram.ys | 5 - hw/syn/yosys/sv2v.sh | 57 ++++++++++ hw/syn/yosys/synth.sh | 56 ++++----- hw/unit_tests/cache/Makefile | 2 +- 97 files changed, 1435 insertions(+), 666 deletions(-) delete mode 100644 hw/syn/yosys/diagram.ys create mode 100755 hw/syn/yosys/sv2v.sh diff --git a/ci/regression.sh b/ci/regression.sh index a8196516..4b825960 100755 --- a/ci/regression.sh +++ b/ci/regression.sh @@ -85,8 +85,8 @@ CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_add # test cache multi-porting CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1" -CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr -CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr +CONFIGS="-DL2_NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr +CONFIGS="-DL2_NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr # test 128-bit MEM block CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index fe3ca8bb..52189e9f 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -36,8 +36,8 @@ DPI_DIR=../../../hw/dpi SRCS = fpga.cpp opae_sim.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) +FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 75b77884..c5972a48 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -41,8 +41,8 @@ DPI_DIR = ../../hw/dpi SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp -FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) +FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index f9812992..129b1202 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -3,15 +3,15 @@ module VX_alu_unit #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Inputs - VX_alu_req_if alu_req_if, + VX_alu_req_if.slave alu_req_if, // Outputs - VX_branch_ctl_if branch_ctl_if, - VX_commit_if alu_commit_if + VX_branch_ctl_if.master branch_ctl_if, + VX_commit_if.master alu_commit_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 35b933bb..030b7e65 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -12,16 +12,16 @@ module VX_cluster #( // Memory request output wire mem_req_valid, output wire mem_req_rw, - output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen, - output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [`L2MEM_DATA_WIDTH-1:0] mem_req_data, - output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag, + output wire [`L2_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen, + output wire [`L2_MEM_ADDR_WIDTH-1:0] mem_req_addr, + output wire [`L2_MEM_DATA_WIDTH-1:0] mem_req_data, + output wire [`L2_MEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, // Memory response input wire mem_rsp_valid, - input wire [`L2MEM_DATA_WIDTH-1:0] mem_rsp_data, - input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag, + input wire [`L2_MEM_DATA_WIDTH-1:0] mem_rsp_data, + input wire [`L2_MEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, // Status @@ -31,14 +31,14 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_mem_req_valid; wire [`NUM_CORES-1:0] per_core_mem_req_rw; - wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen; - wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr; - wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_req_data; + wire [`NUM_CORES-1:0][`DCACHE_MEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen; + wire [`NUM_CORES-1:0][`DCACHE_MEM_ADDR_WIDTH-1:0] per_core_mem_req_addr; + wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_req_data; wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag; wire [`NUM_CORES-1:0] per_core_mem_req_ready; wire [`NUM_CORES-1:0] per_core_mem_rsp_valid; - wire [`NUM_CORES-1:0][`DMEM_DATA_WIDTH-1:0] per_core_mem_rsp_data; + wire [`NUM_CORES-1:0][`DCACHE_MEM_DATA_WIDTH-1:0] per_core_mem_rsp_data; wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag; wire [`NUM_CORES-1:0] per_core_mem_rsp_ready; @@ -83,22 +83,22 @@ module VX_cluster #( `RESET_RELAY (l2_reset); VX_cache #( - .CACHE_ID (`L2CACHE_ID), - .CACHE_SIZE (`L2CACHE_SIZE), - .CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE), - .NUM_BANKS (`L2NUM_BANKS), - .NUM_PORTS (`L2NUM_PORTS), - .WORD_SIZE (`L2WORD_SIZE), - .NUM_REQS (`L2NUM_REQS), - .CREQ_SIZE (`L2CREQ_SIZE), - .CRSQ_SIZE (`L2CRSQ_SIZE), - .MSHR_SIZE (`L2MSHR_SIZE), - .MRSQ_SIZE (`L2MRSQ_SIZE), - .MREQ_SIZE (`L2MREQ_SIZE), + .CACHE_ID (`L2_CACHE_ID), + .CACHE_SIZE (`L2_CACHE_SIZE), + .CACHE_LINE_SIZE (`L2_CACHE_LINE_SIZE), + .NUM_BANKS (`L2_NUM_BANKS), + .NUM_PORTS (`L2_NUM_PORTS), + .WORD_SIZE (`L2_WORD_SIZE), + .NUM_REQS (`L2_NUM_REQS), + .CREQ_SIZE (`L2_CREQ_SIZE), + .CRSQ_SIZE (`L2_CRSQ_SIZE), + .MSHR_SIZE (`L2_MSHR_SIZE), + .MRSQ_SIZE (`L2_MRSQ_SIZE), + .MREQ_SIZE (`L2_MREQ_SIZE), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`XMEM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH), + .MEM_TAG_WIDTH (`L2_MEM_TAG_WIDTH), .NC_ENABLE (1) ) l2cache ( `SCOPE_BIND_VX_cluster_l2cache @@ -148,8 +148,8 @@ module VX_cluster #( VX_mem_arb #( .NUM_REQS (`NUM_CORES), - .DATA_WIDTH (`DMEM_DATA_WIDTH), - .ADDR_WIDTH (`DMEM_ADDR_WIDTH), + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), + .ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH), .TAG_IN_WIDTH (`XMEM_TAG_WIDTH), .TYPE ("R"), .TAG_SEL_IDX (1), // Skip 0 for NC flag diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 93c02eff..a8e1764b 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -3,22 +3,22 @@ module VX_commit #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_commit_if alu_commit_if, - VX_commit_if ld_commit_if, - VX_commit_if st_commit_if, - VX_commit_if csr_commit_if, + VX_commit_if.slave alu_commit_if, + VX_commit_if.slave ld_commit_if, + VX_commit_if.slave st_commit_if, + VX_commit_if.slave csr_commit_if, `ifdef EXT_F_ENABLE - VX_commit_if fpu_commit_if, + VX_commit_if.slave fpu_commit_if, `endif - VX_commit_if gpu_commit_if, + VX_commit_if.slave gpu_commit_if, // outputs - VX_writeback_if writeback_if, - VX_cmt_to_csr_if cmt_to_csr_if + VX_writeback_if.master writeback_if, + VX_cmt_to_csr_if.master cmt_to_csr_if ); // CSRs update @@ -50,6 +50,9 @@ module VX_commit #( `endif /*gpu_commit_fire ?*/ gpu_commit_if.tmask; + wire [$clog2(`NUM_THREADS+1)-1:0] commit_cnt; + `POP_COUNT(commit_cnt, commit_tmask); + VX_pipe_register #( .DATAW (1 + $clog2(`NUM_THREADS+1)), .RESETW (1) @@ -57,7 +60,7 @@ module VX_commit #( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({commit_fire, $countones(commit_tmask)}), + .data_in ({commit_fire, commit_cnt}), .data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size}) ); diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 74322329..0b9dbf42 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -255,28 +255,28 @@ `endif // Core Request Queue Size -`ifndef ICREQ_SIZE -`define ICREQ_SIZE 0 +`ifndef ICACHE_CREQ_SIZE +`define ICACHE_CREQ_SIZE 0 `endif // Core Response Queue Size -`ifndef ICRSQ_SIZE -`define ICRSQ_SIZE 2 +`ifndef ICACHE_CRSQ_SIZE +`define ICACHE_CRSQ_SIZE 2 `endif // Miss Handling Register Size -`ifndef IMSHR_SIZE -`define IMSHR_SIZE `NUM_WARPS +`ifndef ICACHE_MSHR_SIZE +`define ICACHE_MSHR_SIZE `NUM_WARPS `endif // Memory Request Queue Size -`ifndef IMREQ_SIZE -`define IMREQ_SIZE 4 +`ifndef ICACHE_MREQ_SIZE +`define ICACHE_MREQ_SIZE 4 `endif // Memory Response Queue Size -`ifndef IMRSQ_SIZE -`define IMRSQ_SIZE 0 +`ifndef ICACHE_MRSQ_SIZE +`define ICACHE_MRSQ_SIZE 0 `endif // Dcache Configurable Knobs ////////////////////////////////////////////////// @@ -287,38 +287,38 @@ `endif // Number of banks -`ifndef DNUM_BANKS -`define DNUM_BANKS `NUM_THREADS +`ifndef DCACHE_NUM_BANKS +`define DCACHE_NUM_BANKS `NUM_THREADS `endif // Number of ports per bank -`ifndef DNUM_PORTS -`define DNUM_PORTS 1 +`ifndef DCACHE_NUM_PORTS +`define DCACHE_NUM_PORTS 1 `endif // Core Request Queue Size -`ifndef DCREQ_SIZE -`define DCREQ_SIZE 0 +`ifndef DCACHE_CREQ_SIZE +`define DCACHE_CREQ_SIZE 0 `endif // Core Response Queue Size -`ifndef DCRSQ_SIZE -`define DCRSQ_SIZE 2 +`ifndef DCACHE_CRSQ_SIZE +`define DCACHE_CRSQ_SIZE 2 `endif // Miss Handling Register Size -`ifndef DMSHR_SIZE -`define DMSHR_SIZE `LSUQ_SIZE +`ifndef DCACHE_MSHR_SIZE +`define DCACHE_MSHR_SIZE `LSUQ_SIZE `endif // Memory Request Queue Size -`ifndef DMREQ_SIZE -`define DMREQ_SIZE 4 +`ifndef DCACHE_MREQ_SIZE +`define DCACHE_MREQ_SIZE 4 `endif // Memory Response Queue Size -`ifndef DMRSQ_SIZE -`define DMRSQ_SIZE 0 +`ifndef DCACHE_MRSQ_SIZE +`define DCACHE_MRSQ_SIZE 0 `endif // SM Configurable Knobs ////////////////////////////////////////////////////// @@ -335,102 +335,102 @@ `endif // Number of banks -`ifndef SNUM_BANKS -`define SNUM_BANKS `NUM_THREADS +`ifndef SMEM_NUM_BANKS +`define SMEM_NUM_BANKS `NUM_THREADS `endif // Core Request Queue Size -`ifndef SCREQ_SIZE -`define SCREQ_SIZE 2 +`ifndef SMEM_CREQ_SIZE +`define SMEM_CREQ_SIZE 2 `endif // Core Response Queue Size -`ifndef SCRSQ_SIZE -`define SCRSQ_SIZE 2 +`ifndef SMEM_CRSQ_SIZE +`define SMEM_CRSQ_SIZE 2 `endif // L2cache Configurable Knobs ///////////////////////////////////////////////// // Size of cache in bytes -`ifndef L2CACHE_SIZE -`define L2CACHE_SIZE 131072 +`ifndef L2_CACHE_SIZE +`define L2_CACHE_SIZE 131072 `endif // Number of banks -`ifndef L2NUM_BANKS -`define L2NUM_BANKS `MIN(`NUM_CORES, 4) +`ifndef L2_NUM_BANKS +`define L2_NUM_BANKS `MIN(`NUM_CORES, 4) `endif // Number of ports per bank -`ifndef L2NUM_PORTS -`define L2NUM_PORTS 1 +`ifndef L2_NUM_PORTS +`define L2_NUM_PORTS 1 `endif // Core Request Queue Size -`ifndef L2CREQ_SIZE -`define L2CREQ_SIZE 0 +`ifndef L2_CREQ_SIZE +`define L2_CREQ_SIZE 0 `endif // Core Response Queue Size -`ifndef L2CRSQ_SIZE -`define L2CRSQ_SIZE 2 +`ifndef L2_CRSQ_SIZE +`define L2_CRSQ_SIZE 2 `endif // Miss Handling Register Size -`ifndef L2MSHR_SIZE -`define L2MSHR_SIZE 16 +`ifndef L2_MSHR_SIZE +`define L2_MSHR_SIZE 16 `endif // Memory Request Queue Size -`ifndef L2MREQ_SIZE -`define L2MREQ_SIZE 4 +`ifndef L2_MREQ_SIZE +`define L2_MREQ_SIZE 4 `endif // Memory Response Queue Size -`ifndef L2MRSQ_SIZE -`define L2MRSQ_SIZE 0 +`ifndef L2_MRSQ_SIZE +`define L2_MRSQ_SIZE 0 `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// // Size of cache in bytes -`ifndef L3CACHE_SIZE -`define L3CACHE_SIZE 1048576 +`ifndef L3_CACHE_SIZE +`define L3_CACHE_SIZE 1048576 `endif // Number of banks -`ifndef L3NUM_BANKS -`define L3NUM_BANKS `MIN(`NUM_CLUSTERS, 4) +`ifndef L3_NUM_BANKS +`define L3_NUM_BANKS `MIN(`NUM_CLUSTERS, 4) `endif // Number of ports per bank -`ifndef L3NUM_PORTS -`define L3NUM_PORTS 1 +`ifndef L3_NUM_PORTS +`define L3_NUM_PORTS 1 `endif // Core Request Queue Size -`ifndef L3CREQ_SIZE -`define L3CREQ_SIZE 0 +`ifndef L3_CREQ_SIZE +`define L3_CREQ_SIZE 0 `endif // Core Response Queue Size -`ifndef L3CRSQ_SIZE -`define L3CRSQ_SIZE 2 +`ifndef L3_CRSQ_SIZE +`define L3_CRSQ_SIZE 2 `endif // Miss Handling Register Size -`ifndef L3MSHR_SIZE -`define L3MSHR_SIZE 16 +`ifndef L3_MSHR_SIZE +`define L3_MSHR_SIZE 16 `endif // Memory Request Queue Size -`ifndef L3MREQ_SIZE -`define L3MREQ_SIZE 4 +`ifndef L3_MREQ_SIZE +`define L3_MREQ_SIZE 4 `endif // Memory Response Queue Size -`ifndef L3MRSQ_SIZE -`define L3MRSQ_SIZE 0 +`ifndef L3_MRSQ_SIZE +`define L3_MRSQ_SIZE 0 `endif `endif diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index fcfdd524..a4a27eb0 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -12,15 +12,15 @@ module VX_core #( // Memory request output wire mem_req_valid, output wire mem_req_rw, - output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen, - output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr, - output wire [`DMEM_DATA_WIDTH-1:0] mem_req_data, + output wire [`DCACHE_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen, + output wire [`DCACHE_MEM_ADDR_WIDTH-1:0] mem_req_addr, + output wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_req_data, output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag, input wire mem_req_ready, // Memory reponse input wire mem_rsp_valid, - input wire [`DMEM_DATA_WIDTH-1:0] mem_rsp_data, + input wire [`DCACHE_MEM_DATA_WIDTH-1:0] mem_rsp_data, input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_rsp_ready, @@ -32,13 +32,13 @@ module VX_core #( `endif VX_mem_req_if #( - .DATA_WIDTH (`DMEM_DATA_WIDTH), - .ADDR_WIDTH (`DMEM_ADDR_WIDTH), + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), + .ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH), .TAG_WIDTH (`XMEM_TAG_WIDTH) ) mem_req_if(); VX_mem_rsp_if #( - .DATA_WIDTH (`DMEM_DATA_WIDTH), + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), .TAG_WIDTH (`XMEM_TAG_WIDTH) ) mem_rsp_if(); @@ -58,25 +58,25 @@ module VX_core #( //-- VX_dcache_req_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) ) dcache_req_if(); VX_dcache_rsp_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) ) dcache_rsp_if(); VX_icache_req_if #( - .WORD_SIZE (`IWORD_SIZE), - .TAG_WIDTH (`ICORE_TAG_WIDTH) + .WORD_SIZE (`ICACHE_WORD_SIZE), + .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) ) icache_req_if(); VX_icache_rsp_if #( - .WORD_SIZE (`IWORD_SIZE), - .TAG_WIDTH (`ICORE_TAG_WIDTH) + .WORD_SIZE (`ICACHE_WORD_SIZE), + .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) ) icache_rsp_if(); VX_pipeline #( diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index e77f28b0..733de498 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -7,15 +7,15 @@ module VX_csr_data #( input wire reset, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, - VX_perf_pipeline_if perf_pipeline_if, + VX_perf_memsys_if.slave perf_memsys_if, + VX_perf_pipeline_if.slave perf_pipeline_if, `endif - VX_cmt_to_csr_if cmt_to_csr_if, - VX_fetch_to_csr_if fetch_to_csr_if, + VX_cmt_to_csr_if.slave cmt_to_csr_if, + VX_fetch_to_csr_if.slave fetch_to_csr_if, `ifdef EXT_F_ENABLE - VX_fpu_to_csr_if fpu_to_csr_if, + VX_fpu_to_csr_if.slave fpu_to_csr_if, `endif input wire read_enable, @@ -44,19 +44,16 @@ module VX_csr_data #( reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr; - always @(posedge clk) begin - + always @(posedge clk) begin `ifdef EXT_F_ENABLE if (reset) begin fcsr <= '0; - end - + end if (fpu_to_csr_if.write_enable) begin fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] | fpu_to_csr_if.write_fflags; end `endif - if (write_enable) begin case (write_addr) `CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0]; @@ -77,7 +74,7 @@ module VX_csr_data #( `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; default: begin - assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr); + `ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr)); end endcase end diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index aa07188a..1628253c 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -3,26 +3,26 @@ module VX_csr_unit #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, - VX_perf_pipeline_if perf_pipeline_if, + VX_perf_memsys_if.slave perf_memsys_if, + VX_perf_pipeline_if.slave perf_pipeline_if, `endif - VX_cmt_to_csr_if cmt_to_csr_if, - VX_fetch_to_csr_if fetch_to_csr_if, - VX_csr_req_if csr_req_if, - VX_commit_if csr_commit_if, + VX_cmt_to_csr_if.slave cmt_to_csr_if, + VX_fetch_to_csr_if.slave fetch_to_csr_if, + VX_csr_req_if.slave csr_req_if, + VX_commit_if.master csr_commit_if, `ifdef EXT_F_ENABLE - VX_fpu_to_csr_if fpu_to_csr_if, - input wire[`NUM_WARPS-1:0] fpu_pending, + VX_fpu_to_csr_if.slave fpu_to_csr_if, + input wire[`NUM_WARPS-1:0] fpu_pending, `endif output wire[`NUM_WARPS-1:0] pending, - input wire busy + input wire busy ); wire csr_we_s1; wire [`CSR_ADDR_BITS-1:0] csr_addr_s1; diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index d4253208..fbcbba11 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -19,12 +19,12 @@ module VX_decode #( input wire reset, // inputs - VX_ifetch_rsp_if ifetch_rsp_if, + VX_ifetch_rsp_if.slave ifetch_rsp_if, // outputs - VX_decode_if decode_if, - VX_wstall_if wstall_if, - VX_join_if join_if + VX_decode_if.master decode_if, + VX_wstall_if.master wstall_if, + VX_join_if.master join_if ); `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (clk) diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 46a6a406..d557373c 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -238,45 +238,33 @@ `endif // non-cacheable address bit -`define NC_ADDR_BITS 1 +`define NC_FLAG_BITS 1 ////////////////////////// Icache Configurable Knobs ////////////////////////// // Cache ID `define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0) -// Number of banks -`define INUM_BANKS 1 - // Word size in bytes -`define IWORD_SIZE 4 +`define ICACHE_WORD_SIZE 4 // Block size in bytes `define ICACHE_LINE_SIZE `L1_BLOCK_SIZE -// Core request address bits -`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE)) - -// Core request byte enable bits -`define ICORE_BYTEEN_WIDTH `DWORD_SIZE - // TAG sharing enable -`define ICORE_TAG_ID_BITS `NW_BITS +`define ICACHE_CORE_TAG_ID_BITS `NW_BITS // Core request tag bits -`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS) +`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICACHE_CORE_TAG_ID_BITS) // Memory request data bits -`define IMEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8) +`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8) // Memory request address bits -`define IMEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE)) - -// Memory byte enable bits -`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE +`define ICACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`ICACHE_LINE_SIZE)) // Memory request tag bits -`define IMEM_TAG_WIDTH (`IMEM_ADDR_WIDTH + `CLOG2(`IMSHR_SIZE)) +`define ICACHE_MEM_TAG_WIDTH `CLOG2(`ICACHE_MSHR_SIZE) ////////////////////////// Dcache Configurable Knobs ////////////////////////// @@ -284,129 +272,126 @@ `define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1) // Word size in bytes -`define DWORD_SIZE 4 +`define DCACHE_WORD_SIZE 4 // Block size in bytes `define DCACHE_LINE_SIZE `L1_BLOCK_SIZE -// Core request address bits -`define DCORE_ADDR_WIDTH (32-`CLOG2(`DWORD_SIZE)) - // TAG sharing enable `define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE) -`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) +`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_FLAG_BITS + `SM_ENABLE) // Input request tag bits -`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) +`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS) // Memory request data bits -`define DMEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8) +`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8) // Memory request address bits -`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE)) +`define DCACHE_MEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE)) // Memory byte enable bits -`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE +`define DCACHE_MEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE // Input request size -`define DNUM_REQS `NUM_THREADS +`define DCACHE_NUM_REQS `NUM_THREADS // Memory request tag bits -`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE) -`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH) -`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `CLOG2(`DMSHR_SIZE) + `NC_ADDR_BITS), `_DNC_MEM_TAG_WIDTH) +`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DCACHE_WORD_SIZE) +`define _DNC_MEM_TAG_WIDTH ($clog2(`DCACHE_NUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCACHE_CORE_TAG_WIDTH) +`define DCACHE_MEM_TAG_WIDTH `MAX((`CLOG2(`DCACHE_NUM_BANKS) + `CLOG2(`DCACHE_MSHR_SIZE) + `NC_FLAG_BITS), `_DNC_MEM_TAG_WIDTH) ////////////////////////// SM Configurable Knobs ////////////////////////////// // Cache ID -`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2) +`define SMEM_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2) // Word size in bytes -`define SWORD_SIZE 4 +`define SMEM_WORD_SIZE 4 // bank address offset -`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE) +`define SMEM_BANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SMEM_WORD_SIZE) // Input request size -`define SNUM_REQS `NUM_THREADS +`define SMEM_NUM_REQS `NUM_THREADS ////////////////////////// L2cache Configurable Knobs ///////////////////////// // Cache ID -`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID) +`define L2_CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID) // Word size in bytes -`define L2WORD_SIZE `DCACHE_LINE_SIZE +`define L2_WORD_SIZE `DCACHE_LINE_SIZE // Block size in bytes -`define L2CACHE_LINE_SIZE (`L2_ENABLE ? `MEM_BLOCK_SIZE : `L2WORD_SIZE) +`define L2_CACHE_LINE_SIZE ((`L2_ENABLE) ? `MEM_BLOCK_SIZE : `L2_WORD_SIZE) // Input request tag bits -`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) +`define L2_CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) // Memory request data bits -`define L2MEM_DATA_WIDTH (`L2CACHE_LINE_SIZE * 8) +`define L2_MEM_DATA_WIDTH (`L2_CACHE_LINE_SIZE * 8) // Memory request address bits -`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE)) +`define L2_MEM_ADDR_WIDTH (32 - `CLOG2(`L2_CACHE_LINE_SIZE)) // Memory byte enable bits -`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE +`define L2_MEM_BYTEEN_WIDTH `L2_CACHE_LINE_SIZE // Input request size -`define L2NUM_REQS `NUM_CORES +`define L2_NUM_REQS `NUM_CORES // Memory request tag bits -`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE) -`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH) -`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `CLOG2(`L2MSHR_SIZE) + `NC_ADDR_BITS), `_L2NC_MEM_TAG_WIDTH) -`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS))) +`define _L2_MEM_ADDR_RATIO_W $clog2(`L2_CACHE_LINE_SIZE / `L2_WORD_SIZE) +`define _L2_NC_MEM_TAG_WIDTH ($clog2(`L2_NUM_REQS) + `_L2_MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH) +`define _L2_MEM_TAG_WIDTH `MAX((`CLOG2(`L2_NUM_BANKS) + `CLOG2(`L2_MSHR_SIZE) + `NC_FLAG_BITS), `_L2_NC_MEM_TAG_WIDTH) +`define L2_MEM_TAG_WIDTH ((`L2_ENABLE) ? `_L2_MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2_NUM_REQS))) ////////////////////////// L3cache Configurable Knobs ///////////////////////// // Cache ID -`define L3CACHE_ID 0 +`define L3_CACHE_ID 0 // Word size in bytes -`define L3WORD_SIZE `L2CACHE_LINE_SIZE +`define L3_WORD_SIZE `L2_CACHE_LINE_SIZE // Block size in bytes -`define L3CACHE_LINE_SIZE (`L3_ENABLE ? `MEM_BLOCK_SIZE : `L3WORD_SIZE) +`define L3_CACHE_LINE_SIZE ((`L3_ENABLE) ? `MEM_BLOCK_SIZE : `L3_WORD_SIZE) // Input request tag bits -`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS)) +`define L3_CORE_TAG_WIDTH (`L2_CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS)) // Memory request data bits -`define L3MEM_DATA_WIDTH (`L3CACHE_LINE_SIZE * 8) +`define L3_MEM_DATA_WIDTH (`L3_CACHE_LINE_SIZE * 8) // Memory request address bits -`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE)) +`define L3_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_CACHE_LINE_SIZE)) // Memory byte enable bits -`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE +`define L3_MEM_BYTEEN_WIDTH `L3_CACHE_LINE_SIZE // Input request size -`define L3NUM_REQS `NUM_CLUSTERS +`define L3_NUM_REQS `NUM_CLUSTERS // Memory request tag bits -`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE) -`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH) -`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `CLOG2(`L3MSHR_SIZE) + `NC_ADDR_BITS), `_L3NC_MEM_TAG_WIDTH) -`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS))) +`define _L3_MEM_ADDR_RATIO_W $clog2(`L3_CACHE_LINE_SIZE / `L3_WORD_SIZE) +`define _L3_NC_MEM_TAG_WIDTH ($clog2(`L3_NUM_REQS) + `_L3_MEM_ADDR_RATIO_W + `L2_MEM_TAG_WIDTH) +`define _L3_MEM_TAG_WIDTH `MAX((`CLOG2(`L3_NUM_BANKS) + `CLOG2(`L3_MSHR_SIZE) + `NC_FLAG_BITS), `_L3_NC_MEM_TAG_WIDTH) +`define L3_MEM_TAG_WIDTH ((`L3_ENABLE) ? `_L3_MEM_TAG_WIDTH : (`L2_MEM_TAG_WIDTH + `CLOG2(`L3_NUM_REQS))) /////////////////////////////////////////////////////////////////////////////// -`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH -`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH -`define VX_MEM_DATA_WIDTH `L3MEM_DATA_WIDTH -`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH -`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH +`define VX_MEM_BYTEEN_WIDTH `L3_MEM_BYTEEN_WIDTH +`define VX_MEM_ADDR_WIDTH `L3_MEM_ADDR_WIDTH +`define VX_MEM_DATA_WIDTH `L3_MEM_DATA_WIDTH +`define VX_MEM_TAG_WIDTH `L3_MEM_TAG_WIDTH +`define VX_CORE_TAG_WIDTH `L3_CORE_TAG_WIDTH `define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES) `define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} // Merged D-cache/I-cache memory tag -`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2)) +`define XMEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH + `CLOG2(2)) `include "VX_types.vh" diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 98eced32..9c09d826 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -9,42 +9,42 @@ module VX_execute #( input wire reset, // Dcache interface - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.master dcache_req_if, + VX_dcache_rsp_if.slave dcache_rsp_if, // commit interface - VX_cmt_to_csr_if cmt_to_csr_if, + VX_cmt_to_csr_if.slave cmt_to_csr_if, // fetch interface - VX_fetch_to_csr_if fetch_to_csr_if, + VX_fetch_to_csr_if.slave fetch_to_csr_if, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, - VX_perf_pipeline_if perf_pipeline_if, + VX_perf_memsys_if.slave perf_memsys_if, + VX_perf_pipeline_if.slave perf_pipeline_if, `endif // inputs - VX_alu_req_if alu_req_if, - VX_lsu_req_if lsu_req_if, - VX_csr_req_if csr_req_if, + VX_alu_req_if.slave alu_req_if, + VX_lsu_req_if.slave lsu_req_if, + VX_csr_req_if.slave csr_req_if, `ifdef EXT_F_ENABLE - VX_fpu_req_if fpu_req_if, + VX_fpu_req_if.slave fpu_req_if, `endif - VX_gpu_req_if gpu_req_if, + VX_gpu_req_if.slave gpu_req_if, // outputs - VX_branch_ctl_if branch_ctl_if, - VX_warp_ctl_if warp_ctl_if, - VX_commit_if alu_commit_if, - VX_commit_if ld_commit_if, - VX_commit_if st_commit_if, - VX_commit_if csr_commit_if, + VX_branch_ctl_if.master branch_ctl_if, + VX_warp_ctl_if.master warp_ctl_if, + VX_commit_if.master alu_commit_if, + VX_commit_if.master ld_commit_if, + VX_commit_if.master st_commit_if, + VX_commit_if.master csr_commit_if, `ifdef EXT_F_ENABLE - VX_commit_if fpu_commit_if, + VX_commit_if.master fpu_commit_if, `endif - VX_commit_if gpu_commit_if, + VX_commit_if.master gpu_commit_if, - input wire busy + input wire busy ); `ifdef EXT_F_ENABLE VX_fpu_to_csr_if fpu_to_csr_if(); diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index 765d9b72..7db7faab 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -9,23 +9,23 @@ module VX_fetch #( input wire reset, // Icache interface - VX_icache_req_if icache_req_if, - VX_icache_rsp_if icache_rsp_if, + VX_icache_req_if.master icache_req_if, + VX_icache_rsp_if.slave icache_rsp_if, // inputs - VX_wstall_if wstall_if, - VX_join_if join_if, - VX_branch_ctl_if branch_ctl_if, - VX_warp_ctl_if warp_ctl_if, + VX_wstall_if.slave wstall_if, + VX_join_if.slave join_if, + VX_branch_ctl_if.slave branch_ctl_if, + VX_warp_ctl_if.slave warp_ctl_if, // outputs - VX_ifetch_rsp_if ifetch_rsp_if, + VX_ifetch_rsp_if.master ifetch_rsp_if, // csr interface - VX_fetch_to_csr_if fetch_to_csr_if, + VX_fetch_to_csr_if.master fetch_to_csr_if, // busy status - output wire busy + output wire busy ); VX_ifetch_req_if ifetch_req_if(); diff --git a/hw/rtl/VX_fpu_unit.v b/hw/rtl/VX_fpu_unit.v index cba4d399..d2b6f118 100644 --- a/hw/rtl/VX_fpu_unit.v +++ b/hw/rtl/VX_fpu_unit.v @@ -6,9 +6,9 @@ module VX_fpu_unit #( input wire clk, input wire reset, - VX_fpu_req_if fpu_req_if, - VX_fpu_to_csr_if fpu_to_csr_if, - VX_commit_if fpu_commit_if, + VX_fpu_req_if.slave fpu_req_if, + VX_fpu_to_csr_if.master fpu_to_csr_if, + VX_commit_if.master fpu_commit_if, input wire[`NUM_WARPS-1:0] csr_pending, output wire[`NUM_WARPS-1:0] pending diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index cf601b9a..05fc6248 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -3,15 +3,15 @@ module VX_gpr_stage #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_writeback_if writeback_if, - VX_gpr_req_if gpr_req_if, + VX_writeback_if.slave writeback_if, + VX_gpr_req_if.slave gpr_req_if, // outputs - VX_gpr_rsp_if gpr_rsp_if + VX_gpr_rsp_if.master gpr_rsp_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/VX_gpu_unit.v b/hw/rtl/VX_gpu_unit.v index 4d8b9168..18e9f573 100644 --- a/hw/rtl/VX_gpu_unit.v +++ b/hw/rtl/VX_gpu_unit.v @@ -5,15 +5,15 @@ module VX_gpu_unit #( ) ( `SCOPE_IO_VX_gpu_unit - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Inputs - VX_gpu_req_if gpu_req_if, + VX_gpu_req_if.slave gpu_req_if, // Outputs - VX_warp_ctl_if warp_ctl_if, - VX_commit_if gpu_commit_if + VX_warp_ctl_if.master warp_ctl_if, + VX_commit_if.master gpu_commit_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 4bc65591..9b9fd397 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -7,10 +7,10 @@ module VX_ibuffer #( input wire reset, // inputs - VX_decode_if decode_if, + VX_decode_if.slave decode_if, // outputs - VX_ibuffer_if ibuffer_if + VX_ibuffer_if.master ibuffer_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index c17553a8..96ab2531 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -5,18 +5,18 @@ module VX_icache_stage #( ) ( `SCOPE_IO_VX_icache_stage - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Icache interface - VX_icache_req_if icache_req_if, - VX_icache_rsp_if icache_rsp_if, + VX_icache_req_if.master icache_req_if, + VX_icache_rsp_if.slave icache_rsp_if, // request - VX_ifetch_req_if ifetch_req_if, + VX_ifetch_req_if.slave ifetch_req_if, // reponse - VX_ifetch_rsp_if ifetch_rsp_if + VX_ifetch_rsp_if.master ifetch_rsp_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 3e59f28d..b761e9d9 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -1,21 +1,21 @@ `include "VX_define.vh" module VX_instr_demux ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_ibuffer_if ibuffer_if, - VX_gpr_rsp_if gpr_rsp_if, + VX_ibuffer_if.slave ibuffer_if, + VX_gpr_rsp_if.slave gpr_rsp_if, // outputs - VX_alu_req_if alu_req_if, - VX_lsu_req_if lsu_req_if, - VX_csr_req_if csr_req_if, + VX_alu_req_if.master alu_req_if, + VX_lsu_req_if.master lsu_req_if, + VX_csr_req_if.master csr_req_if, `ifdef EXT_F_ENABLE - VX_fpu_req_if fpu_req_if, + VX_fpu_req_if.master fpu_req_if, `endif - VX_gpu_req_if gpu_req_if + VX_gpu_req_if.master gpu_req_if ); wire [`NT_BITS-1:0] tid; wire alu_req_ready; diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 8b00ddd9..31483b9a 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -9,19 +9,19 @@ module VX_issue #( input wire reset, `ifdef PERF_ENABLE - VX_perf_pipeline_if perf_pipeline_if, + VX_perf_pipeline_if.master perf_pipeline_if, `endif - VX_decode_if decode_if, - VX_writeback_if writeback_if, + VX_decode_if.slave decode_if, + VX_writeback_if.slave writeback_if, - VX_alu_req_if alu_req_if, - VX_lsu_req_if lsu_req_if, - VX_csr_req_if csr_req_if, + VX_alu_req_if.master alu_req_if, + VX_lsu_req_if.master lsu_req_if, + VX_csr_req_if.master csr_req_if, `ifdef EXT_F_ENABLE - VX_fpu_req_if fpu_req_if, + VX_fpu_req_if.master fpu_req_if, `endif - VX_gpu_req_if gpu_req_if + VX_gpu_req_if.master gpu_req_if ); VX_ibuffer_if ibuffer_if(); VX_ibuffer_if execute_if(); diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 3afa5a28..de05a60c 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -5,26 +5,26 @@ module VX_lsu_unit #( ) ( `SCOPE_IO_VX_lsu_unit - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Dcache interface - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.master dcache_req_if, + VX_dcache_rsp_if.slave dcache_rsp_if, // inputs - VX_lsu_req_if lsu_req_if, + VX_lsu_req_if.slave lsu_req_if, // outputs - VX_commit_if ld_commit_if, - VX_commit_if st_commit_if + VX_commit_if.master ld_commit_if, + VX_commit_if.master st_commit_if ); localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE); localparam MEM_ADDRW = 32 - MEM_ASHIFT; - localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE); + localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE); - localparam ADDR_TYPEW = `NC_ADDR_BITS + `SM_ENABLE; + localparam ADDR_TYPEW = `NC_FLAG_BITS + `SM_ENABLE; `STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) `STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter")) @@ -321,9 +321,9 @@ module VX_lsu_unit #( for (integer i = 0; i < `LSUQ_SIZE; ++i) begin if (pending_reqs[i][0]) begin - assert(($time - pending_reqs[i][1 +: 64]) < delay_timeout) else - $error("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d", - $time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]); + `ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout, + ("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d", + $time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS])); end end end diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index a1f27f81..5a410476 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -10,9 +10,9 @@ module VX_mem_arb #( parameter BUFFERED_RSP = 0, parameter TYPE = "P", - localparam DATA_SIZE = (DATA_WIDTH / 8), - localparam LOG_NUM_REQS = `CLOG2(NUM_REQS), - localparam TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS + parameter DATA_SIZE = (DATA_WIDTH / 8), + parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), + parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS ) ( input wire clk, input wire reset, diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index a889216e..7c53ccc8 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -5,24 +5,24 @@ module VX_mem_unit # ( ) ( `SCOPE_IO_VX_mem_unit - input wire clk, - input wire reset, + input wire clk, + input wire reset, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, + VX_perf_memsys_if.master perf_memsys_if, `endif // Core <-> Dcache - VX_dcache_req_if dcache_req_if, - VX_dcache_rsp_if dcache_rsp_if, + VX_dcache_req_if.slave dcache_req_if, + VX_dcache_rsp_if.master dcache_rsp_if, // Core <-> Icache - VX_icache_req_if icache_req_if, - VX_icache_rsp_if icache_rsp_if, + VX_icache_req_if.slave icache_req_if, + VX_icache_rsp_if.master icache_rsp_if, // Memory - VX_mem_req_if mem_req_if, - VX_mem_rsp_if mem_rsp_if + VX_mem_req_if.master mem_req_if, + VX_mem_rsp_if.slave mem_rsp_if ); `ifdef PERF_ENABLE @@ -30,37 +30,37 @@ module VX_mem_unit # ( `endif VX_mem_req_if #( - .DATA_WIDTH (`IMEM_DATA_WIDTH), - .ADDR_WIDTH (`IMEM_ADDR_WIDTH), - .TAG_WIDTH (`IMEM_TAG_WIDTH) + .DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH), + .ADDR_WIDTH (`ICACHE_MEM_ADDR_WIDTH), + .TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH) ) icache_mem_req_if(); VX_mem_rsp_if #( - .DATA_WIDTH (`IMEM_DATA_WIDTH), - .TAG_WIDTH (`IMEM_TAG_WIDTH) + .DATA_WIDTH (`ICACHE_MEM_DATA_WIDTH), + .TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH) ) icache_mem_rsp_if(); VX_mem_req_if #( - .DATA_WIDTH (`DMEM_DATA_WIDTH), - .ADDR_WIDTH (`DMEM_ADDR_WIDTH), - .TAG_WIDTH (`DMEM_TAG_WIDTH) + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), + .ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH), + .TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH) ) dcache_mem_req_if(); VX_mem_rsp_if #( - .DATA_WIDTH (`DMEM_DATA_WIDTH), - .TAG_WIDTH (`DMEM_TAG_WIDTH) + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), + .TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH) ) dcache_mem_rsp_if(); VX_dcache_req_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE) ) dcache_req_tmp_if(); VX_dcache_rsp_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE) ) dcache_rsp_tmp_if(); `RESET_RELAY (icache_reset); @@ -71,18 +71,18 @@ module VX_mem_unit # ( .CACHE_ID (`ICACHE_ID), .CACHE_SIZE (`ICACHE_SIZE), .CACHE_LINE_SIZE (`ICACHE_LINE_SIZE), - .NUM_BANKS (`INUM_BANKS), - .WORD_SIZE (`IWORD_SIZE), + .NUM_BANKS (1), + .WORD_SIZE (`ICACHE_WORD_SIZE), .NUM_REQS (1), - .CREQ_SIZE (`ICREQ_SIZE), - .CRSQ_SIZE (`ICRSQ_SIZE), - .MSHR_SIZE (`IMSHR_SIZE), - .MRSQ_SIZE (`IMRSQ_SIZE), - .MREQ_SIZE (`IMREQ_SIZE), + .CREQ_SIZE (`ICACHE_CREQ_SIZE), + .CRSQ_SIZE (`ICACHE_CRSQ_SIZE), + .MSHR_SIZE (`ICACHE_MSHR_SIZE), + .MRSQ_SIZE (`ICACHE_MRSQ_SIZE), + .MREQ_SIZE (`ICACHE_MREQ_SIZE), .WRITE_ENABLE (0), - .CORE_TAG_WIDTH (`ICORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS), - .MEM_TAG_WIDTH (`IMEM_TAG_WIDTH) + .CORE_TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH), + .CORE_TAG_ID_BITS (`ICACHE_CORE_TAG_ID_BITS), + .MEM_TAG_WIDTH (`ICACHE_MEM_TAG_WIDTH) ) icache ( `SCOPE_BIND_VX_mem_unit_icache @@ -92,7 +92,7 @@ module VX_mem_unit # ( // Core request .core_req_valid (icache_req_if.valid), .core_req_rw (1'b0), - .core_req_byteen ({`IWORD_SIZE{1'b1}}), + .core_req_byteen ('b0), .core_req_addr (icache_req_if.addr), .core_req_data ('x), .core_req_tag (icache_req_if.tag), @@ -129,19 +129,19 @@ module VX_mem_unit # ( .CACHE_ID (`DCACHE_ID), .CACHE_SIZE (`DCACHE_SIZE), .CACHE_LINE_SIZE (`DCACHE_LINE_SIZE), - .NUM_BANKS (`DNUM_BANKS), - .NUM_PORTS (`DNUM_PORTS), - .WORD_SIZE (`DWORD_SIZE), - .NUM_REQS (`DNUM_REQS), - .CREQ_SIZE (`DCREQ_SIZE), - .CRSQ_SIZE (`DCRSQ_SIZE), - .MSHR_SIZE (`DMSHR_SIZE), - .MRSQ_SIZE (`DMRSQ_SIZE), - .MREQ_SIZE (`DMREQ_SIZE), + .NUM_BANKS (`DCACHE_NUM_BANKS), + .NUM_PORTS (`DCACHE_NUM_PORTS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .NUM_REQS (`DCACHE_NUM_REQS), + .CREQ_SIZE (`DCACHE_CREQ_SIZE), + .CRSQ_SIZE (`DCACHE_CRSQ_SIZE), + .MSHR_SIZE (`DCACHE_MSHR_SIZE), + .MRSQ_SIZE (`DCACHE_MRSQ_SIZE), + .MREQ_SIZE (`DCACHE_MREQ_SIZE), .WRITE_ENABLE (1), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE), - .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE), - .MEM_TAG_WIDTH (`DMEM_TAG_WIDTH), + .CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE), + .CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE), + .MEM_TAG_WIDTH (`DCACHE_MEM_TAG_WIDTH), .NC_ENABLE (1) ) dcache ( `SCOPE_BIND_VX_mem_unit_dcache @@ -187,15 +187,15 @@ module VX_mem_unit # ( if (`SM_ENABLE) begin VX_dcache_req_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE) ) smem_req_if(); VX_dcache_rsp_if #( - .NUM_REQS (`DNUM_REQS), - .WORD_SIZE (`DWORD_SIZE), - .TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE) + .NUM_REQS (`DCACHE_NUM_REQS), + .WORD_SIZE (`DCACHE_WORD_SIZE), + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE) ) smem_rsp_if(); `RESET_RELAY (smem_arb_reset); @@ -205,7 +205,7 @@ module VX_mem_unit # ( .NUM_REQS (2), .LANES (`NUM_THREADS), .DATA_SIZE (4), - .TAG_IN_WIDTH (`DCORE_TAG_WIDTH), + .TAG_IN_WIDTH (`DCACHE_CORE_TAG_WIDTH), .TYPE ("P"), .BUFFERED_REQ (2), .BUFFERED_RSP (1) @@ -247,16 +247,16 @@ module VX_mem_unit # ( ); VX_shared_mem #( - .CACHE_ID (`SCACHE_ID), + .CACHE_ID (`SMEM_ID), .CACHE_SIZE (`SMEM_SIZE), - .NUM_BANKS (`SNUM_BANKS), - .WORD_SIZE (`SWORD_SIZE), - .NUM_REQS (`SNUM_REQS), - .CREQ_SIZE (`SCREQ_SIZE), - .CRSQ_SIZE (`SCRSQ_SIZE), - .CORE_TAG_WIDTH (`DCORE_TAG_WIDTH-`SM_ENABLE), - .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS-`SM_ENABLE), - .BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET) + .NUM_BANKS (`SMEM_NUM_BANKS), + .WORD_SIZE (`SMEM_WORD_SIZE), + .NUM_REQS (`SMEM_NUM_REQS), + .CREQ_SIZE (`SMEM_CREQ_SIZE), + .CRSQ_SIZE (`SMEM_CRSQ_SIZE), + .CORE_TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH-`SM_ENABLE), + .CORE_TAG_ID_BITS (`DCACHE_CORE_TAG_ID_BITS-`SM_ENABLE), + .BANK_ADDR_OFFSET (`SMEM_BANK_ADDR_OFFSET) ) smem ( .clk (clk), .reset (smem_reset), @@ -283,9 +283,9 @@ module VX_mem_unit # ( ); end else begin // core to D-cache request - for (genvar i = 0; i < `DNUM_REQS; ++i) begin + for (genvar i = 0; i < `DCACHE_NUM_REQS; ++i) begin VX_skid_buffer #( - .DATAW ((32-`CLOG2(`DWORD_SIZE)) + 1 + `DWORD_SIZE + (8*`DWORD_SIZE) + `DCORE_TAG_WIDTH) + .DATAW ((32-`CLOG2(`DCACHE_WORD_SIZE)) + 1 + `DCACHE_WORD_SIZE + (8*`DCACHE_WORD_SIZE) + `DCACHE_CORE_TAG_WIDTH) ) req_buf ( .clk (clk), .reset (reset), @@ -306,16 +306,16 @@ module VX_mem_unit # ( assign dcache_rsp_tmp_if.ready = dcache_rsp_if.ready; end - wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag); - wire [`DMEM_TAG_WIDTH-1:0] icache_mem_rsp_tag; - assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`IMEM_TAG_WIDTH-1:0]; + wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DCACHE_MEM_TAG_WIDTH'(icache_mem_req_if.tag); + wire [`DCACHE_MEM_TAG_WIDTH-1:0] icache_mem_rsp_tag; + assign icache_mem_rsp_if.tag = icache_mem_rsp_tag[`ICACHE_MEM_TAG_WIDTH-1:0]; `UNUSED_VAR (icache_mem_rsp_tag) VX_mem_arb #( .NUM_REQS (2), - .DATA_WIDTH (`DMEM_DATA_WIDTH), - .ADDR_WIDTH (`DMEM_ADDR_WIDTH), - .TAG_IN_WIDTH (`DMEM_TAG_WIDTH), + .DATA_WIDTH (`DCACHE_MEM_DATA_WIDTH), + .ADDR_WIDTH (`DCACHE_MEM_ADDR_WIDTH), + .TAG_IN_WIDTH (`DCACHE_MEM_TAG_WIDTH), .TYPE ("R"), .TAG_SEL_IDX (1), // Skip 0 for NC flag .BUFFERED_REQ (1), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 53a88e41..ea763315 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -15,30 +15,30 @@ module VX_pipeline #( output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen, output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr, output wire [`NUM_THREADS-1:0][31:0] dcache_req_data, - output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag, + output wire [`NUM_THREADS-1:0][`DCACHE_CORE_TAG_WIDTH-1:0] dcache_req_tag, input wire [`NUM_THREADS-1:0] dcache_req_ready, // Dcache core reponse input wire dcache_rsp_valid, input wire [`NUM_THREADS-1:0] dcache_rsp_tmask, input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data, - input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag, + input wire [`DCACHE_CORE_TAG_WIDTH-1:0] dcache_rsp_tag, output wire dcache_rsp_ready, // Icache core request output wire icache_req_valid, output wire [29:0] icache_req_addr, - output wire [`ICORE_TAG_WIDTH-1:0] icache_req_tag, + output wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_req_tag, input wire icache_req_ready, // Icache core response input wire icache_rsp_valid, input wire [31:0] icache_rsp_data, - input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag, + input wire [`ICACHE_CORE_TAG_WIDTH-1:0] icache_rsp_tag, output wire icache_rsp_ready, `ifdef PERF_ENABLE - VX_perf_memsys_if perf_memsys_if, + VX_perf_memsys_if.slave perf_memsys_if, `endif // Status @@ -51,7 +51,7 @@ module VX_pipeline #( VX_dcache_req_if #( .NUM_REQS (`NUM_THREADS), .WORD_SIZE (4), - .TAG_WIDTH (`DCORE_TAG_WIDTH) + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) ) dcache_req_if(); assign dcache_req_valid = dcache_req_if.valid; @@ -69,7 +69,7 @@ module VX_pipeline #( VX_dcache_rsp_if #( .NUM_REQS (`NUM_THREADS), .WORD_SIZE (4), - .TAG_WIDTH (`DCORE_TAG_WIDTH) + .TAG_WIDTH (`DCACHE_CORE_TAG_WIDTH) ) dcache_rsp_if(); assign dcache_rsp_if.valid = dcache_rsp_valid; @@ -84,7 +84,7 @@ module VX_pipeline #( VX_icache_req_if #( .WORD_SIZE (4), - .TAG_WIDTH (`ICORE_TAG_WIDTH) + .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) ) icache_req_if(); assign icache_req_valid = icache_req_if.valid; @@ -98,7 +98,7 @@ module VX_pipeline #( VX_icache_rsp_if #( .WORD_SIZE (4), - .TAG_WIDTH (`ICORE_TAG_WIDTH) + .TAG_WIDTH (`ICACHE_CORE_TAG_WIDTH) ) icache_rsp_if(); assign icache_rsp_if.valid = icache_rsp_valid; diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index da14bb2f..e9ec6deb 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// `ifndef SYNTHESIS + `ifndef NDEBUG `define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \ x \ @@ -48,18 +49,24 @@ . x () \ /* verilator lint_on PINCONNECTEMPTY */ +`define ASSERT(cond, msg) \ + assert(cond) else $error msg + `define STATIC_ASSERT(cond, msg) \ generate \ if (!(cond)) $error msg; \ endgenerate `define RUNTIME_ASSERT(cond, msg) \ - always @(posedge clk) \ + always @(posedge clk) begin \ assert(cond) else $error msg; \ + end `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ + `else // SYNTHESIS + `define DEBUG_BLOCK(x) `define IGNORE_UNUSED_BEGIN `define IGNORE_UNUSED_END @@ -68,10 +75,12 @@ `define UNUSED_PARAM(x) `define UNUSED_VAR(x) `define UNUSED_PIN(x) . x () +`define ASSERT(cond, msg) if (cond); `define STATIC_ASSERT(cond, msg) `define RUNTIME_ASSERT(cond, msg) `define TRACING_ON `define TRACING_OFF + `endif // SYNTHESIS /////////////////////////////////////////////////////////////////////////////// @@ -131,12 +140,20 @@ end \ dpi_trace("}") -`define RESET_RELAY(signal) \ - wire signal; \ +`define RESET_RELAY(signal) \ + wire signal; \ VX_reset_relay __``signal ( \ - .clk (clk), \ - .reset (reset), \ - .reset_o (signal) \ + .clk (clk), \ + .reset (reset), \ + .reset_o (signal) \ + ) + +`define POP_COUNT(out, in) \ + VX_popcount #( \ + .N ($bits(in)) \ + ) __``out ( \ + .in_i (in), \ + .cnt_o (out) \ ) `endif \ No newline at end of file diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index d523a2b2..f6592c4f 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -3,12 +3,12 @@ module VX_scoreboard #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, - VX_ibuffer_if ibuffer_if, - VX_writeback_if writeback_if, - output wire delay + VX_ibuffer_if.slave ibuffer_if, + VX_writeback_if.slave writeback_if, + output wire delay ); reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n; @@ -61,15 +61,16 @@ module VX_scoreboard #( end `endif if (release_reg) begin - assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0) - else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d", - $time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd); + `ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0, + ("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d", + $time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd)); end if (ibuffer_if.valid && ~ibuffer_if.ready) begin deadlock_ctr <= deadlock_ctr + 1; - assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", + `ASSERT(deadlock_ctr < deadlock_timeout, + ("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", $time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb, - deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3); + deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3)); end else if (ibuffer_if.valid && ibuffer_if.ready) begin deadlock_ctr <= 0; end diff --git a/hw/rtl/VX_smem_arb.v b/hw/rtl/VX_smem_arb.v index 07b13730..24d64ff1 100644 --- a/hw/rtl/VX_smem_arb.v +++ b/hw/rtl/VX_smem_arb.v @@ -10,10 +10,10 @@ module VX_smem_arb #( parameter BUFFERED_RSP = 0, parameter TYPE = "P", - localparam ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)), - localparam DATA_WIDTH = (8 * DATA_SIZE), - localparam LOG_NUM_REQS = `CLOG2(NUM_REQS), - localparam TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS + parameter ADDR_WIDTH = (32-`CLOG2(DATA_SIZE)), + parameter DATA_WIDTH = (8 * DATA_SIZE), + parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), + parameter TAG_OUT_WIDTH = TAG_IN_WIDTH - LOG_NUM_REQS ) ( input wire clk, input wire reset, diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 79eb629a..e206c09e 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -5,19 +5,19 @@ module VX_warp_sched #( ) ( `SCOPE_IO_VX_warp_sched - input wire clk, - input wire reset, + input wire clk, + input wire reset, - VX_warp_ctl_if warp_ctl_if, - VX_wstall_if wstall_if, - VX_join_if join_if, - VX_branch_ctl_if branch_ctl_if, + VX_warp_ctl_if.slave warp_ctl_if, + VX_wstall_if.slave wstall_if, + VX_join_if.slave join_if, + VX_branch_ctl_if.slave branch_ctl_if, - VX_ifetch_req_if ifetch_req_if, + VX_ifetch_req_if.master ifetch_req_if, - VX_fetch_to_csr_if fetch_to_csr_if, + VX_fetch_to_csr_if.master fetch_to_csr_if, - output wire busy + output wire busy ); `UNUSED_PARAM (CORE_ID) @@ -147,7 +147,8 @@ module VX_warp_sched #( `IGNORE_UNUSED_BEGIN wire [`NW_BITS:0] active_barrier_count; `IGNORE_UNUSED_END - assign active_barrier_count = $countones(barrier_masks[warp_ctl_if.barrier.id]); + wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id]; + `POP_COUNT(active_barrier_count, barrier_mask); assign reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1); @@ -161,7 +162,7 @@ module VX_warp_sched #( // split/join stack management - wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0]; + wire [(1+32+`NUM_THREADS)-1:0] ipdom [`NUM_WARPS-1:0]; wire [`NUM_THREADS-1:0] curr_tmask = thread_masks[warp_ctl_if.wid]; @@ -173,8 +174,8 @@ module VX_warp_sched #( wire pop = join_if.valid && (i == join_if.wid); wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_tmask : curr_tmask; - wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, curr_tmask}; - wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask}; + wire [(1+32+`NUM_THREADS)-1:0] q_end = {1'b0, 32'b0, curr_tmask}; + wire [(1+32+`NUM_THREADS)-1:0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask}; VX_ipdom_stack #( .WIDTH (1+32+`NUM_THREADS), diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index a9e6040e..fb2bfcb7 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -3,19 +3,19 @@ module VX_writeback #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs - VX_commit_if alu_commit_if, - VX_commit_if ld_commit_if, - VX_commit_if csr_commit_if, + VX_commit_if.slave alu_commit_if, + VX_commit_if.slave ld_commit_if, + VX_commit_if.slave csr_commit_if, `ifdef EXT_F_ENABLE - VX_commit_if fpu_commit_if, + VX_commit_if.slave fpu_commit_if, `endif // outputs - VX_writeback_if writeback_if + VX_writeback_if.master writeback_if ); `UNUSED_PARAM (CORE_ID) diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index ab55c968..a2ea0a68 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -29,15 +29,15 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid; wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw; - wire [`NUM_CLUSTERS-1:0][`L2MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen; - wire [`NUM_CLUSTERS-1:0][`L2MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr; - wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data; - wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_req_data; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L2MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data; - wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_DATA_WIDTH-1:0] per_cluster_mem_rsp_data; + wire [`NUM_CLUSTERS-1:0][`L2_MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_busy; @@ -81,22 +81,22 @@ module Vortex ( `RESET_RELAY (l3_reset); VX_cache #( - .CACHE_ID (`L3CACHE_ID), - .CACHE_SIZE (`L3CACHE_SIZE), - .CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE), - .NUM_BANKS (`L3NUM_BANKS), - .NUM_PORTS (`L3NUM_PORTS), - .WORD_SIZE (`L3WORD_SIZE), - .NUM_REQS (`L3NUM_REQS), - .CREQ_SIZE (`L3CREQ_SIZE), - .CRSQ_SIZE (`L3CRSQ_SIZE), - .MSHR_SIZE (`L3MSHR_SIZE), - .MRSQ_SIZE (`L3MRSQ_SIZE), - .MREQ_SIZE (`L3MREQ_SIZE), + .CACHE_ID (`L3_CACHE_ID), + .CACHE_SIZE (`L3_CACHE_SIZE), + .CACHE_LINE_SIZE (`L3_CACHE_LINE_SIZE), + .NUM_BANKS (`L3_NUM_BANKS), + .NUM_PORTS (`L3_NUM_PORTS), + .WORD_SIZE (`L3_WORD_SIZE), + .NUM_REQS (`L3_NUM_REQS), + .CREQ_SIZE (`L3_CREQ_SIZE), + .CRSQ_SIZE (`L3_CRSQ_SIZE), + .MSHR_SIZE (`L3_MSHR_SIZE), + .MRSQ_SIZE (`L3_MRSQ_SIZE), + .MREQ_SIZE (`L3_MREQ_SIZE), .WRITE_ENABLE (1), - .CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH), + .CORE_TAG_WIDTH (`L2_MEM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), - .MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH), + .MEM_TAG_WIDTH (`L3_MEM_TAG_WIDTH), .NC_ENABLE (1) ) l3cache ( `SCOPE_BIND_Vortex_l3cache @@ -146,9 +146,9 @@ module Vortex ( VX_mem_arb #( .NUM_REQS (`NUM_CLUSTERS), - .DATA_WIDTH (`L3MEM_DATA_WIDTH), - .ADDR_WIDTH (`L3MEM_ADDR_WIDTH), - .TAG_IN_WIDTH (`L2MEM_TAG_WIDTH), + .DATA_WIDTH (`L3_MEM_DATA_WIDTH), + .ADDR_WIDTH (`L3_MEM_ADDR_WIDTH), + .TAG_IN_WIDTH (`L2_MEM_TAG_WIDTH), .TYPE ("R"), .BUFFERED_REQ (1), .BUFFERED_RSP (1) diff --git a/hw/rtl/Vortex_axi.v b/hw/rtl/Vortex_axi.v index 06aa48b8..000e0bcb 100644 --- a/hw/rtl/Vortex_axi.v +++ b/hw/rtl/Vortex_axi.v @@ -1,17 +1,16 @@ `include "VX_define.vh" module Vortex_axi #( - parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, - parameter AXI_ADDR_WIDTH = 32, - parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, - localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) + parameter AXI_DATA_WIDTH = `VX_MEM_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = `VX_MEM_TAG_WIDTH, + parameter AXI_STROBE_WIDTH = (`VX_MEM_DATA_WIDTH / 8) )( // Clock input wire clk, input wire reset, - // AXI write address channel - output wire m_axi_awvalid, + // AXI write request address channel output wire [AXI_TID_WIDTH-1:0] m_axi_awid, output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, output wire [7:0] m_axi_awlen, @@ -20,18 +19,24 @@ module Vortex_axi #( output wire m_axi_awlock, output wire [3:0] m_axi_awcache, output wire [2:0] m_axi_awprot, - output wire [3:0] m_axi_awqos, + output wire [3:0] m_axi_awqos, + output wire m_axi_awvalid, input wire m_axi_awready, - // AXI write data channel - output wire m_axi_wvalid, + // AXI write request data channel output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, - output wire m_axi_wlast, + output wire m_axi_wlast, + output wire m_axi_wvalid, input wire m_axi_wready, + + // AXI write response channel + input wire [AXI_TID_WIDTH-1:0] m_axi_bid, + input wire [1:0] m_axi_bresp, + input wire m_axi_bvalid, + output wire m_axi_bready, - // AXI read address channel - output wire m_axi_arvalid, + // AXI read request channel output wire [AXI_TID_WIDTH-1:0] m_axi_arid, output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, output wire [7:0] m_axi_arlen, @@ -41,12 +46,15 @@ module Vortex_axi #( output wire [3:0] m_axi_arcache, output wire [2:0] m_axi_arprot, output wire [3:0] m_axi_arqos, + output wire m_axi_arvalid, input wire m_axi_arready, - // AXI read data channel - input wire m_axi_rvalid, + // AXI read response channel input wire [AXI_TID_WIDTH-1:0] m_axi_rid, input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + input wire [1:0] m_axi_rresp, + input wire m_axi_rlast, + input wire m_axi_rvalid, output wire m_axi_rready, // Status @@ -66,12 +74,14 @@ module Vortex_axi #( wire mem_rsp_ready; VX_axi_adapter #( - .VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH), - .VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), - .VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH), - .AXI_DATA_WIDTH (AXI_DATA_WIDTH), - .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), - .AXI_TID_WIDTH (AXI_TID_WIDTH) + .VX_DATA_WIDTH (`VX_MEM_DATA_WIDTH), + .VX_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH), + .VX_TAG_WIDTH (`VX_MEM_TAG_WIDTH), + .VX_BYTEEN_WIDTH (AXI_STROBE_WIDTH), + .AXI_DATA_WIDTH (AXI_DATA_WIDTH), + .AXI_ADDR_WIDTH (AXI_ADDR_WIDTH), + .AXI_TID_WIDTH (AXI_TID_WIDTH), + .AXI_STROBE_WIDTH (AXI_STROBE_WIDTH) ) axi_adapter ( .clk (clk), .reset (reset), @@ -89,7 +99,6 @@ module Vortex_axi #( .mem_rsp_tag (mem_rsp_tag), .mem_rsp_ready (mem_rsp_ready), - .m_axi_awvalid (m_axi_awvalid), .m_axi_awid (m_axi_awid), .m_axi_awaddr (m_axi_awaddr), .m_axi_awlen (m_axi_awlen), @@ -99,15 +108,20 @@ module Vortex_axi #( .m_axi_awcache (m_axi_awcache), .m_axi_awprot (m_axi_awprot), .m_axi_awqos (m_axi_awqos), + .m_axi_awvalid (m_axi_awvalid), .m_axi_awready (m_axi_awready), - .m_axi_wvalid (m_axi_wvalid), .m_axi_wdata (m_axi_wdata), .m_axi_wstrb (m_axi_wstrb), .m_axi_wlast (m_axi_wlast), + .m_axi_wvalid (m_axi_wvalid), .m_axi_wready (m_axi_wready), + + .m_axi_bid (m_axi_bid), + .m_axi_bresp (m_axi_bresp), + .m_axi_bvalid (m_axi_bvalid), + .m_axi_bready (m_axi_bready), - .m_axi_arvalid (m_axi_arvalid), .m_axi_arid (m_axi_arid), .m_axi_araddr (m_axi_araddr), .m_axi_arlen (m_axi_arlen), @@ -117,11 +131,14 @@ module Vortex_axi #( .m_axi_arcache (m_axi_arcache), .m_axi_arprot (m_axi_arprot), .m_axi_arqos (m_axi_arqos), + .m_axi_arvalid (m_axi_arvalid), .m_axi_arready (m_axi_arready), - .m_axi_rvalid (m_axi_rvalid), .m_axi_rid (m_axi_rid), .m_axi_rdata (m_axi_rdata), + .m_axi_rresp (m_axi_rresp), + .m_axi_rlast (m_axi_rlast), + .m_axi_rvalid (m_axi_rvalid), .m_axi_rready (m_axi_rready) ); diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index fcfae197..d6aaf890 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -8,8 +8,8 @@ module VX_avs_wrapper #( parameter REQ_TAG_WIDTH = 1, parameter RD_QUEUE_SIZE = 1, - localparam AVS_BYTEENW = (AVS_DATA_WIDTH / 8), - localparam RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1) + parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8), + parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1) ) ( input wire clk, input wire reset, diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 1656ed17..4801bee9 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -851,7 +851,7 @@ begin cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data); if (cci_wr_req_fire) begin - assert(cci_wr_req_ctr != 0); + `ASSERT(cci_wr_req_ctr != 0, ("runtime error")); cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1); if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin cci_wr_req_done <= 1; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index d67355bb..1f05ae04 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -39,8 +39,8 @@ module VX_bank #( // bank offset from beginning of index range parameter BANK_ADDR_OFFSET = 0, - localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE), - localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) + parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE), + parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) ) ( `SCOPE_IO_VX_bank @@ -86,8 +86,7 @@ module VX_bank #( input wire mem_req_ready, // Memory response - input wire mem_rsp_valid, - input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr, + input wire mem_rsp_valid, input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id, input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data, output wire mem_rsp_ready, @@ -130,8 +129,12 @@ module VX_bank #( .ready_out (creq_ready), .valid_out (creq_valid) ); - + wire mreq_alm_full; + wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr; + wire crsq_valid, crsq_ready; + wire crsq_stall; + wire mshr_valid; wire mshr_ready; wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id; @@ -161,9 +164,6 @@ module VX_bank #( wire is_flush_st0; wire mshr_pending_st0, mshr_pending_st1; - wire crsq_valid, crsq_ready, crsq_stall; - wire mreq_alm_full; - // prevent read-during-write hazard when accessing tags/data block RAMs wire rdw_fill_hazard = valid_st0 && is_fill_st0; wire rdw_write_hazard = valid_st0 && is_write_st0 && ~creq_rw; @@ -398,6 +398,7 @@ module VX_bank #( // fill .fill_valid (mem_rsp_fire), .fill_id (mem_rsp_id), + .fill_addr (mem_rsp_addr), // dequeue .dequeue_valid (mshr_valid), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 79d9a7ae..29e14892 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -46,13 +46,13 @@ module VX_cache #( // enable bypass for non-cacheable addresses parameter NC_ENABLE = 0, - localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) + parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) ) ( `SCOPE_IO_VX_cache // PERF `ifdef PERF_ENABLE - VX_perf_cache_if perf_cache_if, + VX_perf_cache_if.master perf_cache_if, `endif input wire clk, @@ -94,7 +94,7 @@ module VX_cache #( `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE); - localparam MEM_TAG_IN_WIDTH = `MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH; + localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH; localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE; localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS; @@ -444,7 +444,6 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready; if (NUM_BANKS == 1) begin - `UNUSED_VAR (mem_rsp_tag_qual) assign mrsq_out_ready = per_bank_mem_rsp_ready; end else begin assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)]; @@ -515,8 +514,7 @@ module VX_cache #( wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data; wire curr_bank_mem_req_ready; - wire curr_bank_mem_rsp_valid; - wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr; + wire curr_bank_mem_rsp_valid; wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id; wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data; wire curr_bank_mem_rsp_ready; @@ -558,11 +556,9 @@ module VX_cache #( // Memory response if (NUM_BANKS == 1) begin - assign curr_bank_mem_rsp_valid = mrsq_out_valid; - assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual); + assign curr_bank_mem_rsp_valid = mrsq_out_valid; end else begin assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i); - assign curr_bank_mem_rsp_addr = `MEM_TAG_TO_LINE_ADDR(mem_rsp_tag_qual); end assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual); assign curr_bank_mem_rsp_data = mem_rsp_data_qual; @@ -633,7 +629,6 @@ module VX_cache #( // Memory response .mem_rsp_valid (curr_bank_mem_rsp_valid), - .mem_rsp_addr (curr_bank_mem_rsp_addr), .mem_rsp_id (curr_bank_mem_rsp_id), .mem_rsp_data (curr_bank_mem_rsp_data), .mem_rsp_ready (curr_bank_mem_rsp_ready), @@ -668,7 +663,7 @@ module VX_cache #( .core_rsp_ready (core_rsp_ready_nc) ); - wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in; + wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in; for (genvar i = 0; i < NUM_BANKS; ++i) begin assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]}; end @@ -692,33 +687,42 @@ module VX_cache #( .ready_out (mem_req_ready_nc) ); - assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({mem_req_addr_nc, mem_req_id}); + if (NUM_BANKS == 1) begin + assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id); + end else begin + assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id}); + end `ifdef PERF_ENABLE // per cycle: core_reads, core_writes - reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle; - reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle; - reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; - assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw); - assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); + wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw; + wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw; + + `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask); + `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask); if (CORE_TAG_ID_BITS != 0) begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}); + wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}; + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask); end else begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); + wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready; + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask); end // per cycle: read misses, write misses, msrq stalls, pipeline stalls - reg [($clog2(NUM_BANKS+1)-1):0] perf_read_miss_per_cycle; - reg [($clog2(NUM_BANKS+1)-1):0] perf_write_miss_per_cycle; - reg [($clog2(NUM_BANKS+1)-1):0] perf_mshr_stall_per_cycle; - reg [($clog2(NUM_BANKS+1)-1):0] perf_pipe_stall_per_cycle; + wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle; + wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle; + wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle; + wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle; - assign perf_read_miss_per_cycle = $countones(perf_read_miss_per_bank); - assign perf_write_miss_per_cycle = $countones(perf_write_miss_per_bank); - assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank); - assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank); + `POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank); + `POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank); + `POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank); + `POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank); reg [`PERF_CTR_BITS-1:0] perf_core_reads; reg [`PERF_CTR_BITS-1:0] perf_core_writes; diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index ecc78015..fc1864e9 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -61,12 +61,12 @@ `define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)} +`define MEM_ADDR_TO_BANK_ID(x) x[0 +: `BANK_SELECT_BITS] + `define MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0] `define MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `BANK_SELECT_BITS] -`define MEM_TAG_TO_LINE_ADDR(x) x[(MSHR_ADDR_WIDTH+`BANK_SELECT_BITS) +: `LINE_ADDR_WIDTH] - `define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))} `define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} diff --git a/hw/rtl/cache/VX_core_req_bank_sel.v b/hw/rtl/cache/VX_core_req_bank_sel.v index ce65d5e6..01c9f12b 100644 --- a/hw/rtl/cache/VX_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_core_req_bank_sel.v @@ -291,12 +291,16 @@ module VX_core_req_bank_sel #( end reg [`PERF_CTR_BITS-1:0] bank_stalls_r; + wire [$clog2(NUM_REQS+1)-1:0] bank_stall_cnt; + + wire [NUM_REQS-1:0] bank_stall_mask = core_req_sel_r & ~core_req_ready; + `POP_COUNT(bank_stall_cnt, bank_stall_mask); always @(posedge clk) begin if (reset) begin bank_stalls_r <= 0; end else begin - bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready)); + bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'(bank_stall_cnt); end end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index b6ea877d..5b81140d 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -16,7 +16,7 @@ module VX_data_access #( // Enable cache writeable parameter WRITE_ENABLE = 1, - localparam WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) + parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS) ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 0f7b0819..152a6702 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -20,7 +20,7 @@ module VX_miss_resrv #( // core request tag size parameter CORE_TAG_WIDTH = 1, - localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE) + parameter MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE) ) ( input wire clk, input wire reset, @@ -46,6 +46,7 @@ module VX_miss_resrv #( // fill input wire fill_valid, input wire [MSHR_ADDR_WIDTH-1:0] fill_id, + output wire [`LINE_ADDR_WIDTH-1:0] fill_addr, // lookup input wire lookup_valid, @@ -161,8 +162,8 @@ module VX_miss_resrv #( dequeue_id_r <= dequeue_id_n; allocate_id_r <= allocate_id_n; - assert(!allocate_fire || !valid_table[allocate_id_r]); - assert(!release_valid || valid_table[release_id]); + `ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error")); + `ASSERT(!release_valid || valid_table[release_id], ("runtime error")); end `RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id]), ("%t: *** cache%0d:%0d in-use allocation: addr=%0h, id=%0d", $time, CACHE_ID, BANK_ID, @@ -184,6 +185,8 @@ module VX_miss_resrv #( .rdata (dequeue_data) ); + assign fill_addr = addr_table[fill_id]; + assign allocate_ready = allocate_rdy_r; assign allocate_id = allocate_id_r; @@ -206,8 +209,8 @@ module VX_miss_resrv #( dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc); if (fill_valid) - dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID, - `LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id); + dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID, + `LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID)); if (dequeue_fire) dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc); diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index ca6f6be0..21eb440a 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -15,10 +15,10 @@ module VX_nc_bypass #( parameter MEM_TAG_IN_WIDTH = 1, parameter MEM_TAG_OUT_WIDTH = 1, - localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, - localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8, - localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1, - localparam MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE)) + parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, + parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8, + parameter CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1, + parameter MEM_SELECT_BITS = `UP(`CLOG2(MEM_DATA_SIZE / CORE_DATA_SIZE)) ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 765f9916..2a8f7c47 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -31,7 +31,7 @@ module VX_shared_mem #( // PERF `ifdef PERF_ENABLE - VX_perf_cache_if perf_cache_if, + VX_perf_cache_if.master perf_cache_if, `endif // Core request @@ -337,16 +337,22 @@ module VX_shared_mem #( `ifdef PERF_ENABLE // per cycle: core_reads, core_writes - reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle; - reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle; + wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; - assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw); - assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); + wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw; + wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw; + + `POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask); + `POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask); if (CORE_TAG_ID_BITS != 0) begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}); + wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}}; + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask); end else begin - assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); + wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready; + `POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask); end reg [`PERF_CTR_BITS-1:0] perf_core_reads; diff --git a/hw/rtl/interfaces/VX_alu_req_if.v b/hw/rtl/interfaces/VX_alu_req_if.v index 8ae714af..2c6ffd5e 100644 --- a/hw/rtl/interfaces/VX_alu_req_if.v +++ b/hw/rtl/interfaces/VX_alu_req_if.v @@ -22,6 +22,44 @@ interface VX_alu_req_if (); wire wb; wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output next_PC, + output op_type, + output op_mod, + output use_PC, + output use_imm, + output imm, + output tid, + output rs1_data, + output rs2_data, + output rd, + output wb, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input next_PC, + input op_type, + input op_mod, + input use_PC, + input use_imm, + input imm, + input tid, + input rs1_data, + input rs2_data, + input rd, + input wb, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_branch_ctl_if.v b/hw/rtl/interfaces/VX_branch_ctl_if.v index 5e5e840a..f71c43fe 100644 --- a/hw/rtl/interfaces/VX_branch_ctl_if.v +++ b/hw/rtl/interfaces/VX_branch_ctl_if.v @@ -10,6 +10,20 @@ interface VX_branch_ctl_if (); wire taken; wire [31:0] dest; + modport master ( + output valid, + output wid, + output taken, + output dest + ); + + modport slave ( + input valid, + input wid, + input taken, + input dest + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index ce2549d6..800d428d 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -8,6 +8,16 @@ interface VX_cmt_to_csr_if (); wire valid; wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; + modport master ( + output valid, + output commit_size + ); + + modport slave ( + input valid, + input commit_size + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_commit_if.v b/hw/rtl/interfaces/VX_commit_if.v index 05d0f11c..4b6844d6 100644 --- a/hw/rtl/interfaces/VX_commit_if.v +++ b/hw/rtl/interfaces/VX_commit_if.v @@ -13,7 +13,31 @@ interface VX_commit_if (); wire [`NR_BITS-1:0] rd; wire wb; wire eop; - wire ready; + wire ready; + + modport master ( + output valid, + output wid, + output tmask, + output PC, + output data, + output rd, + output wb, + output eop, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input data, + input rd, + input wb, + input eop, + output ready + ); endinterface diff --git a/hw/rtl/interfaces/VX_csr_req_if.v b/hw/rtl/interfaces/VX_csr_req_if.v index d4374bab..23345d53 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.v +++ b/hw/rtl/interfaces/VX_csr_req_if.v @@ -17,6 +17,36 @@ interface VX_csr_req_if (); wire [`NR_BITS-1:0] rd; wire wb; wire ready; + + modport master ( + output valid, + output wid, + output tmask, + output PC, + output op_type, + output addr, + output rs1_data, + output use_imm, + output imm, + output rd, + output wb, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input op_type, + input addr, + input rs1_data, + input use_imm, + input imm, + input rd, + input wb, + output ready + ); endinterface diff --git a/hw/rtl/interfaces/VX_dcache_req_if.v b/hw/rtl/interfaces/VX_dcache_req_if.v index c922ea64..13f3b00b 100644 --- a/hw/rtl/interfaces/VX_dcache_req_if.v +++ b/hw/rtl/interfaces/VX_dcache_req_if.v @@ -17,6 +17,26 @@ interface VX_dcache_req_if #( wire [NUM_REQS-1:0][TAG_WIDTH-1:0] tag; wire [NUM_REQS-1:0] ready; + modport master ( + output valid, + output rw, + output byteen, + output addr, + output data, + output tag, + input ready + ); + + modport slave ( + input valid, + input rw, + input byteen, + input addr, + input data, + input tag, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_dcache_rsp_if.v b/hw/rtl/interfaces/VX_dcache_rsp_if.v index df72c1e3..0f424501 100644 --- a/hw/rtl/interfaces/VX_dcache_rsp_if.v +++ b/hw/rtl/interfaces/VX_dcache_rsp_if.v @@ -15,6 +15,22 @@ interface VX_dcache_rsp_if #( wire [TAG_WIDTH-1:0] tag; wire ready; + modport master ( + output valid, + output tmask, + output data, + output tag, + input ready + ); + + modport slave ( + input valid, + input tmask, + input data, + input tag, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_decode_if.v b/hw/rtl/interfaces/VX_decode_if.v index 70ec9fcd..90c5d70e 100644 --- a/hw/rtl/interfaces/VX_decode_if.v +++ b/hw/rtl/interfaces/VX_decode_if.v @@ -22,6 +22,44 @@ interface VX_decode_if (); wire [`NR_BITS-1:0] rs3; wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output ex_type, + output op_type, + output op_mod, + output wb, + output use_PC, + output use_imm, + output imm, + output rd, + output rs1, + output rs2, + output rs3, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input ex_type, + input op_type, + input op_mod, + input wb, + input use_PC, + input use_imm, + input imm, + input rd, + input rs1, + input rs2, + input rs3, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_fetch_to_csr_if.v b/hw/rtl/interfaces/VX_fetch_to_csr_if.v index 12e89f98..1c2e3ddb 100644 --- a/hw/rtl/interfaces/VX_fetch_to_csr_if.v +++ b/hw/rtl/interfaces/VX_fetch_to_csr_if.v @@ -7,6 +7,14 @@ interface VX_fetch_to_csr_if (); wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks; + modport master ( + output thread_masks + ); + + modport slave ( + input thread_masks + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_fpu_req_if.v b/hw/rtl/interfaces/VX_fpu_req_if.v index 92be96cf..25867e42 100644 --- a/hw/rtl/interfaces/VX_fpu_req_if.v +++ b/hw/rtl/interfaces/VX_fpu_req_if.v @@ -18,6 +18,36 @@ interface VX_fpu_req_if (); wire wb; wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output op_type, + output op_mod, + output rs1_data, + output rs2_data, + output rs3_data, + output rd, + output wb, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input op_type, + input op_mod, + input rs1_data, + input rs2_data, + input rs3_data, + input rd, + input wb, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_fpu_to_csr_if.v b/hw/rtl/interfaces/VX_fpu_to_csr_if.v index 3f27fa02..865c8cfa 100644 --- a/hw/rtl/interfaces/VX_fpu_to_csr_if.v +++ b/hw/rtl/interfaces/VX_fpu_to_csr_if.v @@ -12,6 +12,22 @@ interface VX_fpu_to_csr_if (); wire [`NW_BITS-1:0] read_wid; wire [`INST_FRM_BITS-1:0] read_frm; + modport master ( + output write_enable, + output write_wid, + output write_fflags, + output read_wid, + input read_frm + ); + + modport slave ( + input write_enable, + input write_wid, + input write_fflags, + input read_wid, + output read_frm + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpr_req_if.v b/hw/rtl/interfaces/VX_gpr_req_if.v index 0f818ed7..d34448f4 100644 --- a/hw/rtl/interfaces/VX_gpr_req_if.v +++ b/hw/rtl/interfaces/VX_gpr_req_if.v @@ -8,7 +8,21 @@ interface VX_gpr_req_if (); wire [`NW_BITS-1:0] wid; wire [`NR_BITS-1:0] rs1; wire [`NR_BITS-1:0] rs2; - wire [`NR_BITS-1:0] rs3; + wire [`NR_BITS-1:0] rs3; + + modport master ( + output wid, + output rs1, + output rs2, + output rs3 + ); + + modport slave ( + input wid, + input rs1, + input rs2, + input rs3 + ); endinterface diff --git a/hw/rtl/interfaces/VX_gpr_rsp_if.v b/hw/rtl/interfaces/VX_gpr_rsp_if.v index b8e6f0df..c323555c 100644 --- a/hw/rtl/interfaces/VX_gpr_rsp_if.v +++ b/hw/rtl/interfaces/VX_gpr_rsp_if.v @@ -9,6 +9,18 @@ interface VX_gpr_rsp_if (); wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs3_data; + modport master ( + output rs1_data, + output rs2_data, + output rs3_data + ); + + modport slave ( + input rs1_data, + input rs2_data, + input rs3_data + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpu_req_if.v b/hw/rtl/interfaces/VX_gpu_req_if.v index 27ee1316..e3511043 100644 --- a/hw/rtl/interfaces/VX_gpu_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_req_if.v @@ -20,6 +20,36 @@ interface VX_gpu_req_if(); wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output next_PC, + output op_type, + output tid, + output rs1_data, + output rs2_data, + output rd, + output wb, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input next_PC, + input op_type, + input tid, + input rs1_data, + input rs2_data, + input rd, + input wb, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_ibuffer_if.v b/hw/rtl/interfaces/VX_ibuffer_if.v index c0d0f465..bb791737 100644 --- a/hw/rtl/interfaces/VX_ibuffer_if.v +++ b/hw/rtl/interfaces/VX_ibuffer_if.v @@ -20,14 +20,62 @@ interface VX_ibuffer_if (); wire [`NR_BITS-1:0] rs1; wire [`NR_BITS-1:0] rs2; wire [`NR_BITS-1:0] rs3; - wire ready; - - // scoreboard forwarding + wire [`NR_BITS-1:0] rd_n; wire [`NR_BITS-1:0] rs1_n; wire [`NR_BITS-1:0] rs2_n; wire [`NR_BITS-1:0] rs3_n; wire [`NW_BITS-1:0] wid_n; + + wire ready; + + modport master ( + output valid, + output wid, + output tmask, + output PC, + output ex_type, + output op_type, + output op_mod, + output wb, + output use_PC, + output use_imm, + output imm, + output rd, + output rs1, + output rs2, + output rs3, + output rd_n, + output rs1_n, + output rs2_n, + output rs3_n, + output wid_n, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input ex_type, + input op_type, + input op_mod, + input wb, + input use_PC, + input use_imm, + input imm, + input rd, + input rs1, + input rs2, + input rs3, + input rd_n, + input rs1_n, + input rs2_n, + input rs3_n, + input wid_n, + output ready + ); endinterface diff --git a/hw/rtl/interfaces/VX_icache_req_if.v b/hw/rtl/interfaces/VX_icache_req_if.v index c60632f3..1decc6a5 100644 --- a/hw/rtl/interfaces/VX_icache_req_if.v +++ b/hw/rtl/interfaces/VX_icache_req_if.v @@ -13,6 +13,20 @@ interface VX_icache_req_if #( wire [TAG_WIDTH-1:0] tag; wire ready; + modport master ( + output valid, + output addr, + output tag, + input ready + ); + + modport slave ( + input valid, + input addr, + input tag, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_icache_rsp_if.v b/hw/rtl/interfaces/VX_icache_rsp_if.v index 9bab8b72..71cee32b 100644 --- a/hw/rtl/interfaces/VX_icache_rsp_if.v +++ b/hw/rtl/interfaces/VX_icache_rsp_if.v @@ -11,7 +11,21 @@ interface VX_icache_rsp_if #( wire valid; wire [`WORD_WIDTH-1:0] data; wire [TAG_WIDTH-1:0] tag; - wire ready; + wire ready; + + modport master ( + output valid, + output data, + output tag, + input ready + ); + + modport slave ( + input valid, + input data, + input tag, + output ready + ); endinterface diff --git a/hw/rtl/interfaces/VX_ifetch_req_if.v b/hw/rtl/interfaces/VX_ifetch_req_if.v index b99ed5da..3d75e736 100644 --- a/hw/rtl/interfaces/VX_ifetch_req_if.v +++ b/hw/rtl/interfaces/VX_ifetch_req_if.v @@ -11,6 +11,22 @@ interface VX_ifetch_req_if (); wire [31:0] PC; wire ready; + modport master ( + output valid, + output tmask, + output wid, + output PC, + input ready + ); + + modport slave ( + input valid, + input tmask, + input wid, + input PC, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_ifetch_rsp_if.v b/hw/rtl/interfaces/VX_ifetch_rsp_if.v index 78706577..a2f04fe4 100644 --- a/hw/rtl/interfaces/VX_ifetch_rsp_if.v +++ b/hw/rtl/interfaces/VX_ifetch_rsp_if.v @@ -12,6 +12,24 @@ interface VX_ifetch_rsp_if (); wire [31:0] data; wire ready; + modport master ( + output valid, + output tmask, + output wid, + output PC, + output data, + input ready + ); + + modport slave ( + input valid, + input tmask, + input wid, + input PC, + input data, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_join_if.v b/hw/rtl/interfaces/VX_join_if.v index 0ee163ab..d39ed9c0 100644 --- a/hw/rtl/interfaces/VX_join_if.v +++ b/hw/rtl/interfaces/VX_join_if.v @@ -8,6 +8,16 @@ interface VX_join_if (); wire valid; wire [`NW_BITS-1:0] wid; + modport master ( + output valid, + output wid + ); + + modport slave ( + input valid, + input wid + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_lsu_req_if.v b/hw/rtl/interfaces/VX_lsu_req_if.v index fed1f270..36b4e778 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.v +++ b/hw/rtl/interfaces/VX_lsu_req_if.v @@ -18,6 +18,36 @@ interface VX_lsu_req_if (); wire wb; wire ready; + modport master ( + output valid, + output wid, + output tmask, + output PC, + output op_type, + output is_fence, + output store_data, + output base_addr, + output offset, + output rd, + output wb, + input ready + ); + + modport slave ( + input valid, + input wid, + input tmask, + input PC, + input op_type, + input is_fence, + input store_data, + input base_addr, + input offset, + input rd, + input wb, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_mem_req_if.v b/hw/rtl/interfaces/VX_mem_req_if.v index a1a9040f..50bde8a2 100644 --- a/hw/rtl/interfaces/VX_mem_req_if.v +++ b/hw/rtl/interfaces/VX_mem_req_if.v @@ -18,6 +18,26 @@ interface VX_mem_req_if #( wire [TAG_WIDTH-1:0] tag; wire ready; + modport master ( + output valid, + output rw, + output byteen, + output addr, + output data, + output tag, + input ready + ); + + modport slave ( + input valid, + input rw, + input byteen, + input addr, + input data, + input tag, + output ready + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_mem_rsp_if.v b/hw/rtl/interfaces/VX_mem_rsp_if.v index afc19989..3ee69d88 100644 --- a/hw/rtl/interfaces/VX_mem_rsp_if.v +++ b/hw/rtl/interfaces/VX_mem_rsp_if.v @@ -11,7 +11,21 @@ interface VX_mem_rsp_if #( wire valid; wire [DATA_WIDTH-1:0] data; wire [TAG_WIDTH-1:0] tag; - wire ready; + wire ready; + + modport master ( + output valid, + output data, + output tag, + input ready + ); + + modport slave ( + input valid, + input data, + input tag, + output ready + ); endinterface diff --git a/hw/rtl/interfaces/VX_perf_cache_if.v b/hw/rtl/interfaces/VX_perf_cache_if.v index 35004368..d9efb2cc 100644 --- a/hw/rtl/interfaces/VX_perf_cache_if.v +++ b/hw/rtl/interfaces/VX_perf_cache_if.v @@ -14,6 +14,28 @@ interface VX_perf_cache_if (); wire [`PERF_CTR_BITS-1:0] pipe_stalls; wire [`PERF_CTR_BITS-1:0] crsp_stalls; + modport master ( + output reads, + output writes, + output read_misses, + output write_misses, + output bank_stalls, + output mshr_stalls, + output pipe_stalls, + output crsp_stalls + ); + + modport slave ( + input reads, + input writes, + input read_misses, + input write_misses, + input bank_stalls, + input mshr_stalls, + input pipe_stalls, + input crsp_stalls + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_perf_memsys_if.v b/hw/rtl/interfaces/VX_perf_memsys_if.v index a2ef4835..f0e27ed6 100644 --- a/hw/rtl/interfaces/VX_perf_memsys_if.v +++ b/hw/rtl/interfaces/VX_perf_memsys_if.v @@ -28,6 +28,50 @@ interface VX_perf_memsys_if (); wire [`PERF_CTR_BITS-1:0] mem_stalls; wire [`PERF_CTR_BITS-1:0] mem_latency; + modport master ( + output icache_reads, + output icache_read_misses, + output icache_pipe_stalls, + output icache_crsp_stalls, + output dcache_reads, + output dcache_writes, + output dcache_read_misses, + output dcache_write_misses, + output dcache_bank_stalls, + output dcache_mshr_stalls, + output dcache_pipe_stalls, + output dcache_crsp_stalls, + output smem_reads, + output smem_writes, + output smem_bank_stalls, + output mem_reads, + output mem_writes, + output mem_stalls, + output mem_latency + ); + + modport slave ( + input icache_reads, + input icache_read_misses, + input icache_pipe_stalls, + input icache_crsp_stalls, + input dcache_reads, + input dcache_writes, + input dcache_read_misses, + input dcache_write_misses, + input dcache_bank_stalls, + input dcache_mshr_stalls, + input dcache_pipe_stalls, + input dcache_crsp_stalls, + input smem_reads, + input smem_writes, + input smem_bank_stalls, + input mem_reads, + input mem_writes, + input mem_stalls, + input mem_latency + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_perf_pipeline_if.v b/hw/rtl/interfaces/VX_perf_pipeline_if.v index 12d76d9c..19cc15c3 100644 --- a/hw/rtl/interfaces/VX_perf_pipeline_if.v +++ b/hw/rtl/interfaces/VX_perf_pipeline_if.v @@ -4,15 +4,41 @@ `include "VX_define.vh" interface VX_perf_pipeline_if (); + wire [`PERF_CTR_BITS-1:0] ibf_stalls; wire [`PERF_CTR_BITS-1:0] scb_stalls; wire [`PERF_CTR_BITS-1:0] lsu_stalls; wire [`PERF_CTR_BITS-1:0] csr_stalls; wire [`PERF_CTR_BITS-1:0] alu_stalls; - wire [`PERF_CTR_BITS-1:0] gpu_stalls; `ifdef EXT_F_ENABLE wire [`PERF_CTR_BITS-1:0] fpu_stalls; `endif + wire [`PERF_CTR_BITS-1:0] gpu_stalls; + + modport master ( + output ibf_stalls, + output scb_stalls, + output lsu_stalls, + output csr_stalls, + output alu_stalls, + `ifdef EXT_F_ENABLE + output fpu_stalls, + `endif + output gpu_stalls + ); + + modport slave ( + input ibf_stalls, + input scb_stalls, + input lsu_stalls, + input csr_stalls, + input alu_stalls, + `ifdef EXT_F_ENABLE + input fpu_stalls, + `endif + input gpu_stalls + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_warp_ctl_if.v b/hw/rtl/interfaces/VX_warp_ctl_if.v index 2a53c714..d2117941 100644 --- a/hw/rtl/interfaces/VX_warp_ctl_if.v +++ b/hw/rtl/interfaces/VX_warp_ctl_if.v @@ -12,6 +12,24 @@ interface VX_warp_ctl_if (); gpu_barrier_t barrier; gpu_split_t split; + modport master ( + output valid, + output wid, + output tmc, + output wspawn, + output barrier, + output split + ); + + modport slave ( + input valid, + input wid, + input tmc, + input wspawn, + input barrier, + input split + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_writeback_if.v b/hw/rtl/interfaces/VX_writeback_if.v index 1f64d10d..8f05fc7a 100644 --- a/hw/rtl/interfaces/VX_writeback_if.v +++ b/hw/rtl/interfaces/VX_writeback_if.v @@ -6,16 +6,36 @@ interface VX_writeback_if (); wire valid; - wire [`NUM_THREADS-1:0] tmask; wire [`NW_BITS-1:0] wid; wire [31:0] PC; wire [`NR_BITS-1:0] rd; wire [`NUM_THREADS-1:0][31:0] data; - wire eop; - + wire eop; wire ready; + modport master ( + output valid, + output tmask, + output wid, + output PC, + output rd, + output data, + output eop, + input ready + ); + + modport slave ( + input valid, + input tmask, + input wid, + input PC, + input rd, + input data, + input eop, + output ready + ); + endinterface `endif diff --git a/hw/rtl/interfaces/VX_wstall_if.v b/hw/rtl/interfaces/VX_wstall_if.v index e8e0e249..cff00327 100644 --- a/hw/rtl/interfaces/VX_wstall_if.v +++ b/hw/rtl/interfaces/VX_wstall_if.v @@ -9,6 +9,18 @@ interface VX_wstall_if(); wire [`NW_BITS-1:0] wid; wire stalled; + modport master ( + output valid, + output wid, + output stalled + ); + + modport slave ( + input valid, + input wid, + input stalled + ); + endinterface `endif \ No newline at end of file diff --git a/hw/rtl/libs/VX_axi_adapter.v b/hw/rtl/libs/VX_axi_adapter.v index cf2be72a..2788c315 100644 --- a/hw/rtl/libs/VX_axi_adapter.v +++ b/hw/rtl/libs/VX_axi_adapter.v @@ -1,15 +1,15 @@ `include "VX_define.vh" module VX_axi_adapter #( - parameter VX_DATA_WIDTH = 512, - parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)), - parameter VX_TAG_WIDTH = 8, - parameter AXI_DATA_WIDTH = VX_DATA_WIDTH, - parameter AXI_ADDR_WIDTH = 32, - parameter AXI_TID_WIDTH = VX_TAG_WIDTH, + parameter VX_DATA_WIDTH = 512, + parameter VX_ADDR_WIDTH = (32 - $clog2(VX_DATA_WIDTH/8)), + parameter VX_TAG_WIDTH = 8, + parameter AXI_DATA_WIDTH = VX_DATA_WIDTH, + parameter AXI_ADDR_WIDTH = 32, + parameter AXI_TID_WIDTH = VX_TAG_WIDTH, - localparam VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8), - localparam AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) + parameter VX_BYTEEN_WIDTH = (VX_DATA_WIDTH / 8), + parameter AXI_STROBE_WIDTH = (AXI_DATA_WIDTH / 8) ) ( input wire clk, input wire reset, @@ -29,8 +29,7 @@ module VX_axi_adapter #( output wire [VX_TAG_WIDTH-1:0] mem_rsp_tag, output wire mem_req_ready, - // AXI write address channel - output wire m_axi_awvalid, + // AXI write request address channel output wire [AXI_TID_WIDTH-1:0] m_axi_awid, output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr, output wire [7:0] m_axi_awlen, @@ -39,18 +38,24 @@ module VX_axi_adapter #( output wire m_axi_awlock, output wire [3:0] m_axi_awcache, output wire [2:0] m_axi_awprot, - output wire [3:0] m_axi_awqos, + output wire [3:0] m_axi_awqos, + output wire m_axi_awvalid, input wire m_axi_awready, - // AXI write data channel - output wire m_axi_wvalid, + // AXI write request data channel output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata, output wire [AXI_STROBE_WIDTH-1:0] m_axi_wstrb, - output wire m_axi_wlast, + output wire m_axi_wlast, + output wire m_axi_wvalid, input wire m_axi_wready, + + // AXI write response channel + input wire [AXI_TID_WIDTH-1:0] m_axi_bid, + input wire [1:0] m_axi_bresp, + input wire m_axi_bvalid, + output wire m_axi_bready, // AXI read address channel - output wire m_axi_arvalid, output wire [AXI_TID_WIDTH-1:0] m_axi_arid, output wire [AXI_ADDR_WIDTH-1:0] m_axi_araddr, output wire [7:0] m_axi_arlen, @@ -60,12 +65,15 @@ module VX_axi_adapter #( output wire [3:0] m_axi_arcache, output wire [2:0] m_axi_arprot, output wire [3:0] m_axi_arqos, + output wire m_axi_arvalid, input wire m_axi_arready, - // AXI read data channel - input wire m_axi_rvalid, + // AXI read response channel input wire [AXI_TID_WIDTH-1:0] m_axi_rid, input wire [AXI_DATA_WIDTH-1:0] m_axi_rdata, + input wire [1:0] m_axi_rresp, + input wire m_axi_rlast, + input wire m_axi_rvalid, output wire m_axi_rready ); localparam AXSIZE = $clog2(VX_DATA_WIDTH/8); @@ -73,6 +81,8 @@ module VX_axi_adapter #( `STATIC_ASSERT((AXI_DATA_WIDTH == VX_DATA_WIDTH), ("invalid parameter")) `STATIC_ASSERT((AXI_TID_WIDTH == VX_TAG_WIDTH), ("invalid parameter")) + //`UNUSED_VAR () + reg awvalid_ack; reg wvalid_ack; @@ -95,7 +105,7 @@ module VX_axi_adapter #( wire axi_write_ready = (m_axi_awready || awvalid_ack) && (m_axi_wready || wvalid_ack); - // AXI write address channel + // AXI write request address channel assign m_axi_awvalid = mem_req_valid && mem_req_rw && !awvalid_ack; assign m_axi_awid = mem_req_tag; assign m_axi_awaddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; @@ -107,13 +117,18 @@ module VX_axi_adapter #( assign m_axi_awprot = 3'b0; assign m_axi_awqos = 4'b0; - // AXI write data channel + // AXI write request data channel assign m_axi_wvalid = mem_req_valid && mem_req_rw && !wvalid_ack; assign m_axi_wdata = mem_req_data; assign m_axi_wstrb = mem_req_byteen; assign m_axi_wlast = 1'b1; - // AXI read address channel + // AXI write response channel + `UNUSED_VAR (m_axi_bid); + `RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("AXI response error")); + assign m_axi_bready = 1'b1; + + // AXI read request channel assign m_axi_arvalid = mem_req_valid && !mem_req_rw; assign m_axi_arid = mem_req_tag; assign m_axi_araddr = AXI_ADDR_WIDTH'(mem_req_addr) << AXSIZE; @@ -125,10 +140,12 @@ module VX_axi_adapter #( assign m_axi_arprot = 3'b0; assign m_axi_arqos = 4'b0; - // AXI read data channel + // AXI read response channel assign mem_rsp_valid = m_axi_rvalid; assign mem_rsp_tag = m_axi_rid; assign mem_rsp_data = m_axi_rdata; + `RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("AXI response error")); + `UNUSED_VAR (m_axi_rlast); assign m_axi_rready = mem_rsp_ready; // Vortex request ack diff --git a/hw/rtl/libs/VX_bypass_buffer.v b/hw/rtl/libs/VX_bypass_buffer.v index 84fcfd98..170c2a88 100644 --- a/hw/rtl/libs/VX_bypass_buffer.v +++ b/hw/rtl/libs/VX_bypass_buffer.v @@ -31,7 +31,7 @@ module VX_bypass_buffer #( buffer_valid <= 0; end if (valid_in && ~ready_out) begin - assert(!buffer_valid); + `ASSERT(!buffer_valid, "runtime error"); buffer_valid <= 1; end end diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index db381485..7b39246f 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -28,7 +28,9 @@ module VX_dp_ram #( if (INIT_FILE != "") begin \ initial $readmemh(INIT_FILE, ram); \ end else begin \ - initial ram = '{default: INIT_VALUE}; \ + initial \ + for (integer i = 0; i < SIZE; ++i)\ + ram[i] = INIT_VALUE; \ end \ end diff --git a/hw/rtl/libs/VX_fifo_queue.v b/hw/rtl/libs/VX_fifo_queue.v index fcff5ac9..42f36885 100644 --- a/hw/rtl/libs/VX_fifo_queue.v +++ b/hw/rtl/libs/VX_fifo_queue.v @@ -35,8 +35,8 @@ module VX_fifo_queue #( head_r <= 0; size_r <= 0; end else begin - assert(!push || !full); - assert(!pop || !empty); + `ASSERT(!push || !full, ("runtime error")); + `ASSERT(!pop || !empty, ("runtime error")); if (push) begin if (!pop) begin size_r <= 1; @@ -71,8 +71,8 @@ module VX_fifo_queue #( alm_full_r <= 0; used_r <= 0; end else begin - assert(!push || !full); - assert(!pop || !empty); + `ASSERT(!push || !full, ("runtime error")); + `ASSERT(!pop || !empty, ("runtime error")); if (push) begin if (!pop) begin empty_r <= 0; diff --git a/hw/rtl/libs/VX_find_first.v b/hw/rtl/libs/VX_find_first.v index 3ff8c8a9..048a803c 100644 --- a/hw/rtl/libs/VX_find_first.v +++ b/hw/rtl/libs/VX_find_first.v @@ -5,7 +5,7 @@ module VX_find_first #( parameter N = 1, parameter DATAW = 1, parameter REVERSE = 0, - localparam LOGN = $clog2(N) + parameter LOGN = $clog2(N) ) ( input wire [N-1:0][DATAW-1:0] data_i, input wire [N-1:0] valid_i, diff --git a/hw/rtl/libs/VX_index_buffer.v b/hw/rtl/libs/VX_index_buffer.v index b8fe7729..19efefcf 100644 --- a/hw/rtl/libs/VX_index_buffer.v +++ b/hw/rtl/libs/VX_index_buffer.v @@ -55,10 +55,10 @@ module VX_index_buffer #( full_r <= 1'b0; end else begin if (release_slot) begin - assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr); + `ASSERT(0 == free_slots[release_addr], ("%t: releasing invalid slot at port %d", $time, release_addr)); end if (acquire_slot) begin - assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr); + `ASSERT(1 == free_slots[write_addr], ("%t: acquiring used slot at port %d", $time, write_addr)); end write_addr_r <= free_index; free_slots <= free_slots_n; diff --git a/hw/rtl/libs/VX_index_queue.v b/hw/rtl/libs/VX_index_queue.v index ef7f49b4..66307d74 100644 --- a/hw/rtl/libs/VX_index_queue.v +++ b/hw/rtl/libs/VX_index_queue.v @@ -32,10 +32,8 @@ module VX_index_queue #( assign enqueue = push; assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid - always @(*) begin - assert(!push || !full); - end - + `RUNTIME_ASSERT(!push || !full, ("invalid inputs")); + always @(posedge clk) begin if (reset) begin rd_ptr <= 0; diff --git a/hw/rtl/libs/VX_lzc.v b/hw/rtl/libs/VX_lzc.v index 5b97028b..cf89f586 100644 --- a/hw/rtl/libs/VX_lzc.v +++ b/hw/rtl/libs/VX_lzc.v @@ -2,9 +2,9 @@ `TRACING_OFF module VX_lzc #( - parameter N = 2, - parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero - localparam LOGN = $clog2(N) + parameter N = 2, + parameter MODE = 0, // 0 -> trailing zero, 1 -> leading zero + parameter LOGN = $clog2(N) ) ( input wire [N-1:0] in_i, output wire [LOGN-1:0] cnt_o, diff --git a/hw/rtl/libs/VX_pending_size.v b/hw/rtl/libs/VX_pending_size.v index 96d82e60..2964fdd0 100644 --- a/hw/rtl/libs/VX_pending_size.v +++ b/hw/rtl/libs/VX_pending_size.v @@ -25,7 +25,7 @@ module VX_pending_size #( empty_r <= 1; full_r <= 0; end else begin - assert(!incr || !full); + `ASSERT(!incr || !full, ("runtime error")); if (incr) begin if (!decr) begin empty_r <= 0; diff --git a/hw/rtl/libs/VX_skid_buffer.v b/hw/rtl/libs/VX_skid_buffer.v index b96857e3..ba6c8b6c 100644 --- a/hw/rtl/libs/VX_skid_buffer.v +++ b/hw/rtl/libs/VX_skid_buffer.v @@ -30,9 +30,7 @@ module VX_skid_buffer #( end else if (NOBACKPRESSURE) begin - always @(posedge clk) begin - assert(ready_out) else $error("ready_out should always be asserted"); - end + `RUNTIME_ASSERT(ready_out, ("ready_out should always be asserted")) wire stall = valid_out && ~ready_out; diff --git a/hw/rtl/libs/VX_sp_ram.v b/hw/rtl/libs/VX_sp_ram.v index 9485e09c..2ed01d0d 100644 --- a/hw/rtl/libs/VX_sp_ram.v +++ b/hw/rtl/libs/VX_sp_ram.v @@ -27,7 +27,9 @@ module VX_sp_ram #( if (INIT_FILE != "") begin \ initial $readmemh(INIT_FILE, ram); \ end else begin \ - initial ram = '{default: INIT_VALUE}; \ + initial \ + for (integer i = 0; i < SIZE; ++i)\ + ram[i] = INIT_VALUE; \ end \ end diff --git a/hw/rtl/libs/VX_stream_demux.v b/hw/rtl/libs/VX_stream_demux.v index e55007b8..282a2212 100644 --- a/hw/rtl/libs/VX_stream_demux.v +++ b/hw/rtl/libs/VX_stream_demux.v @@ -5,7 +5,7 @@ module VX_stream_demux #( parameter LANES = 1, parameter DATAW = 1, parameter BUFFERED = 0, - localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS) + parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS) ) ( input wire clk, input wire reset, diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 69a78505..7fb0de8a 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -35,8 +35,8 @@ TOP = Vortex RTL_DIR=../rtl DPI_DIR=../dpi -FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src -RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) +FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src +RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) SRCS = simulator.cpp main.cpp SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 6adf457c..5dc59405 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -113,7 +113,8 @@ void Simulator::reset() { mem_rsp_vec_[b].clear(); } last_mem_rsp_bank_ = 0; - mem_rsp_active_ = false; + mem_rd_rsp_active_ = false; + mem_wr_rsp_active_ = false; #ifdef AXI_BUS this->reset_axi_bus(); @@ -182,9 +183,11 @@ void Simulator::reset_axi_bus() { void Simulator::eval_axi_bus(bool clk) { if (!clk) { - mem_rsp_ready_ = vortex_->m_axi_rready; + mem_rd_rsp_ready_ = vortex_->m_axi_rready; + mem_wr_rsp_ready_ = vortex_->m_axi_bready; return; } + if (ram_ == nullptr) { vortex_->m_axi_wready = 0; vortex_->m_axi_awready = 0; @@ -200,44 +203,71 @@ void Simulator::eval_axi_bus(bool clk) { } } - bool has_response = false; + bool has_rd_response = false; + bool has_wr_response = false; // schedule memory responses that are ready for (int i = 0; i < MEMORY_BANKS; ++i) { uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS; - if (!mem_rsp_vec_[b].empty() - && (mem_rsp_vec_[b].begin()->cycles_left) <= 0) { - has_response = true; - last_mem_rsp_bank_ = b; - break; + if (!mem_rsp_vec_[b].empty()) { + auto mem_rsp_it = mem_rsp_vec_[b].begin(); + if (mem_rsp_it->cycles_left <= 0) { + has_rd_response = !mem_rsp_it->write; + has_wr_response = mem_rsp_it->write; + last_mem_rsp_bank_ = b; + break; + } } } - // send memory response - if (mem_rsp_active_ - && vortex_->m_axi_rvalid && mem_rsp_ready_) { - mem_rsp_active_ = false; + // send memory read response + if (mem_rd_rsp_active_ + && vortex_->m_axi_rvalid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; } - if (!mem_rsp_active_) { - if (has_response) { - vortex_->m_axi_rvalid = 1; - std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + if (!mem_rd_rsp_active_) { + if (has_rd_response) { + auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); /* - printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); + printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { printf("%02x", mem_rsp_it->block[(MEM_BLOCK_SIZE-1)-i]); } printf("\n"); - */ + */ + vortex_->m_axi_rvalid = 1; + vortex_->m_axi_rid = mem_rsp_it->tag; + vortex_->m_axi_rresp = 0; + vortex_->m_axi_rlast = 1; memcpy((uint8_t*)vortex_->m_axi_rdata, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); - vortex_->m_axi_rid = mem_rsp_it->tag; mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); - mem_rsp_active_ = true; + mem_rd_rsp_active_ = true; } else { vortex_->m_axi_rvalid = 0; } } + // send memory write response + if (mem_wr_rsp_active_ + && vortex_->m_axi_bvalid && mem_wr_rsp_ready_) { + mem_wr_rsp_active_ = false; + } + if (!mem_wr_rsp_active_) { + if (has_wr_response) { + auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + /* + printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); + */ + vortex_->m_axi_bvalid = 1; + vortex_->m_axi_bid = mem_rsp_it->tag; + vortex_->m_axi_bresp = 0; + mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); + mem_wr_rsp_active_ = true; + } else { + vortex_->m_axi_bvalid = 0; + } + } + // select the memory bank uint32_t req_addr = vortex_->m_axi_wvalid ? vortex_->m_axi_awaddr : vortex_->m_axi_araddr; uint32_t req_bank = (MEMORY_BANKS >= 2) ? ((req_addr / MEM_BLOCK_SIZE) % MEMORY_BANKS) : 0; @@ -260,6 +290,8 @@ void Simulator::eval_axi_bus(bool clk) { uint64_t byteen = vortex_->m_axi_wstrb; unsigned base_addr = vortex_->m_axi_awaddr; uint8_t* data = (uint8_t*)(vortex_->m_axi_wdata); + + // detect stdout write if (base_addr >= IO_COUT_ADDR && base_addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) { for (int i = 0; i < MEM_BLOCK_SIZE; i++) { @@ -286,13 +318,20 @@ void Simulator::eval_axi_bus(bool clk) { (*ram_)[base_addr + i] = data[i]; } } - } + mem_req_t mem_req; + mem_req.tag = vortex_->m_axi_arid; + mem_req.addr = vortex_->m_axi_araddr; + mem_req.cycles_left = 0; + mem_req.write = 1; + mem_rsp_vec_[req_bank].emplace_back(mem_req); + } } else { mem_req_t mem_req; mem_req.tag = vortex_->m_axi_arid; mem_req.addr = vortex_->m_axi_araddr; ram_->read(vortex_->m_axi_araddr, MEM_BLOCK_SIZE, mem_req.block.data()); mem_req.cycles_left = MEM_LATENCY; + mem_req.write = 0; for (auto& rsp : mem_rsp_vec_[req_bank]) { if (mem_req.addr == rsp.addr) { // duplicate requests receive the same cycle delay @@ -319,7 +358,7 @@ void Simulator::reset_mem_bus() { void Simulator::eval_mem_bus(bool clk) { if (!clk) { - mem_rsp_ready_ = vortex_->mem_rsp_ready; + mem_rd_rsp_ready_ = vortex_->mem_rsp_ready; return; } @@ -350,14 +389,14 @@ void Simulator::eval_mem_bus(bool clk) { } // send memory response - if (mem_rsp_active_ - && vortex_->mem_rsp_valid && mem_rsp_ready_) { - mem_rsp_active_ = false; + if (mem_rd_rsp_active_ + && vortex_->mem_rsp_valid && mem_rd_rsp_ready_) { + mem_rd_rsp_active_ = false; } - if (!mem_rsp_active_) { + if (!mem_rd_rsp_active_) { if (has_response) { vortex_->mem_rsp_valid = 1; - std::list::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); + auto mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin(); /* printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp_it->addr); for (int i = 0; i < MEM_BLOCK_SIZE; i++) { @@ -368,7 +407,7 @@ void Simulator::eval_mem_bus(bool clk) { memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE); vortex_->mem_rsp_tag = mem_rsp_it->tag; mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it); - mem_rsp_active_ = true; + mem_rd_rsp_active_ = true; } else { vortex_->mem_rsp_valid = 0; } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index d867ea83..ca618b6a 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -54,11 +54,12 @@ public: private: - typedef struct { + typedef struct { int cycles_left; std::array block; uint64_t addr; uint64_t tag; + bool write; } mem_req_t; std::unordered_map print_bufs_; @@ -80,9 +81,11 @@ private: std::list mem_rsp_vec_ [MEMORY_BANKS]; uint32_t last_mem_rsp_bank_; - bool mem_rsp_active_; + bool mem_rd_rsp_active_; + bool mem_rd_rsp_ready_; - bool mem_rsp_ready_; + bool mem_wr_rsp_active_; + bool mem_wr_rsp_ready_; RAM *ram_; diff --git a/hw/syn/opae/Makefile b/hw/syn/opae/Makefile index 98401114..72b39d44 100644 --- a/hw/syn/opae/Makefile +++ b/hw/syn/opae/Makefile @@ -26,11 +26,11 @@ DBG_FLAGS += -DDBG_CACHE_REQ_INFO CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) -CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS) -CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2CACHE_SIZE=131072 $(CONFIGS) -CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS) -CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=262144 $(CONFIGS) -CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3CACHE_SIZE=524288 $(CONFIGS) +CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS) +CONFIG8 := -DNUM_CLUSTERS=1 -DNUM_CORES=8 -DL2_ENABLE=1 -DL3_ENABLE=0 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL2_CACHE_SIZE=131072 $(CONFIGS) +CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS) +CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=262144 $(CONFIGS) +CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=1 -DICACHE_SIZE=8192 -DDCACHE_SIZE=8192 -DL3_CACHE_SIZE=524288 $(CONFIGS) FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY) RTL_INCLUDE = -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/afu diff --git a/hw/syn/quartus/top16/Makefile b/hw/syn/quartus/top16/Makefile index cdd7980c..a15a3582 100644 --- a/hw/syn/quartus/top16/Makefile +++ b/hw/syn/quartus/top16/Makefile @@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144" + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top32/Makefile b/hw/syn/quartus/top32/Makefile index dbd97d63..89ce5340 100644 --- a/hw/syn/quartus/top32/Makefile +++ b/hw/syn/quartus/top32/Makefile @@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=262144" + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=262144" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top4/Makefile b/hw/syn/quartus/top4/Makefile index a41aa95e..3b71cdd2 100644 --- a/hw/syn/quartus/top4/Makefile +++ b/hw/syn/quartus/top4/Makefile @@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=65536" + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=65536" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top64/Makefile b/hw/syn/quartus/top64/Makefile index 29d3a3fe..95ebb30d 100644 --- a/hw/syn/quartus/top64/Makefile +++ b/hw/syn/quartus/top64/Makefile @@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3CACHE_SIZE=524288" + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8" -set "L2_ENABLE=0" -set "L3_ENABLE=1" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L3_CACHE_SIZE=524288" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top8/Makefile b/hw/syn/quartus/top8/Makefile index a2de1384..07b0a46e 100644 --- a/hw/syn/quartus/top8/Makefile +++ b/hw/syn/quartus/top8/Makefile @@ -58,7 +58,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2CACHE_SIZE=131072" + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=1" -set "L2_ENABLE=1" -set "L3_ENABLE=0" -set "ICACHE_SIZE=8192" -set "DCACHE_SIZE=8192" -set "L2_CACHE_SIZE=131072" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/yosys/Makefile b/hw/syn/yosys/Makefile index e87914aa..0aabcef4 100644 --- a/hw/syn/yosys/Makefile +++ b/hw/syn/yosys/Makefile @@ -3,15 +3,18 @@ TOP_LEVEL_ENTITY = Vortex SRC_FILE = Vortex.v RTL_DIR = ../../rtl -DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2 +DEFINES = -DNDEBUG -DSYNTHESIS -DEXT_F_DISABLE -DNUM_CORES=1 -DNUM_THREADS=2 -DNUM_WARPS=2 -DMEM_BLOCK_SIZE=64 RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache # Build targets all: build -build: - ./synth.sh -t$(TOP_LEVEL_ENTITY) -s$(SRC_FILE) $(DEFINES) $(RTL_INCLUDE) +output.v: + ./sv2v.sh $(DEFINES) $(RTL_INCLUDE) -ooutput.v + +build: output.v + ./synth.sh -t$(TOP_LEVEL_ENTITY) -soutput.v clean: - rm -rf sources.v *.ys *.log + rm -rf output.v *.ys *.log diff --git a/hw/syn/yosys/diagram.ys b/hw/syn/yosys/diagram.ys deleted file mode 100644 index 7e1e5440..00000000 --- a/hw/syn/yosys/diagram.ys +++ /dev/null @@ -1,5 +0,0 @@ -# load design -read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/pipe_regs -I../../rtl/cache ../../rtl/Vortex.v - -# dump diagram -show diff --git a/hw/syn/yosys/sv2v.sh b/hw/syn/yosys/sv2v.sh new file mode 100755 index 00000000..cf5abaaf --- /dev/null +++ b/hw/syn/yosys/sv2v.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# this script uses sv2v and yosys tools to run. +# sv2v: https://github.com/zachjs/sv2v +# yosys: http://www.clifford.at/yosys/ + +# exit when any command fails +set -e + +source="" +includes=() +macro_args="" +output_file=out.v + +usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; } +[ $# -eq 0 ] && usage +while getopts "o:I:D:h" arg; do + case $arg in + s) # source + source=${OPTARG} + ;; + o) # output-file + output_file=${OPTARG} + ;; + I) # include directory + includes+=(${OPTARG}) + ;; + D) # macro definition + macro_args="$macro_args -D${OPTARG}" + ;; + h | *) + usage + exit 0 + ;; + esac +done + +# process include paths +inc_args="" +for dir in "${includes[@]}" +do + inc_args="$inc_args -I$dir" +done + +# process source files +file_args=$source +for dir in "${includes[@]}" +do + for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) + do + echo "file: $file" + file_args="$file_args $file" + done +done + +# system-verilog to verilog conversion +sv2v $macro_args $inc_args $file_args -v -w $output_file \ No newline at end of file diff --git a/hw/syn/yosys/synth.sh b/hw/syn/yosys/synth.sh index c2425f2e..07528757 100755 --- a/hw/syn/yosys/synth.sh +++ b/hw/syn/yosys/synth.sh @@ -10,11 +10,12 @@ set -e source="" top_level="" dir_list=() -defines="" +inc_args="" +macro_args="" usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; } [ $# -eq 0 ] && usage -while getopts "hs:t:I:D:" arg; do +while getopts "s:t:I:D:h" arg; do case $arg in s) # source source=${OPTARG} @@ -24,9 +25,10 @@ while getopts "hs:t:I:D:" arg; do ;; I) # include directory dir_list+=(${OPTARG}) + inc_args="$inc_args -I${OPTARG}" ;; D) # macro definition - defines="$defines -D${OPTARG}" + macro_args="$macro_args -D${OPTARG}" ;; h | *) usage @@ -35,41 +37,29 @@ while getopts "hs:t:I:D:" arg; do esac done -echo "top_level=$top_level, source=$source, defines=$defines" - -# process include paths -inc_list="" -for dir in "${dir_list[@]}" -do - echo "include: $dir" >> synth.log - inc_list="$inc_list -I$dir" -done - -# process source files -file_list="" -for dir in "${dir_list[@]}" -do - for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) +{ + # read design sources + for dir in "${dir_list[@]}" do - echo "file: $file" >> synth.log - file_list="$file_list $file" + for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f) + do + echo "read_verilog $macro_args $inc_args -sv $file" + done done -done + if [ -n "$source" ]; then + echo "read_verilog $macro_args $inc_args -sv $source" + fi -# system-verilog to verilog conversion -sv2v $defines -w output.v $inc_list $file_list + # generic synthesis + echo "synth -top $top_level" -{ - echo "read_verilog -sv output.v" - echo "hierarchy -check -top $top_level" + # mapping to mycells.lib + echo "dfflibmap -liberty mycells.lib" + echo "abc -liberty mycells.lib" + echo "clean" - # insertation of global reset - echo "add -global_input reset 1" - echo "proc -global_arst reset" - - echo "synth -run coarse; opt -fine" - echo "tee -o brams.log memory_bram -rules scripts/brams.txt;;" - echo "write_verilog -noexpr -noattr synth.v" + # write synthesized design + echo "write_verilog synth.v" } > synth.ys yosys -l yosys.log synth.ys \ No newline at end of file diff --git a/hw/unit_tests/cache/Makefile b/hw/unit_tests/cache/Makefile index b6552316..de775d4f 100644 --- a/hw/unit_tests/cache/Makefile +++ b/hw/unit_tests/cache/Makefile @@ -1,4 +1,4 @@ -PARAM += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4 +PARAM += -DCACHE_SIZE=4096 -DCACHE_WORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DCACHE_NUM_BANKS=4 -DCACHE_CREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4