From 5d58bf3d1106ee1a4d4c53be060087ccdf7f4192 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 15 Nov 2020 06:36:32 -0800 Subject: [PATCH] fixed l3cache hang using memory arbiter in afu --- driver/opae/vlsim/Makefile | 17 +- driver/rtlsim/Makefile | 7 +- driver/tests/dogfood/testcases.h | 68 +++-- hw/opae/VX_avs_wrapper.v | 128 ++++++++ hw/opae/sources_1c.txt | 4 +- hw/opae/vortex_afu.sv | 451 ++++++++++++++--------------- hw/rtl/VX_cluster.v | 50 ++-- hw/rtl/VX_define.vh | 10 +- hw/rtl/VX_gpr_stage.v | 4 - hw/rtl/VX_icache_stage.v | 2 +- hw/rtl/VX_lsu_unit.v | 2 +- hw/rtl/VX_mem_arb.v | 96 +++--- hw/rtl/VX_scoreboard.v | 20 +- hw/rtl/cache/VX_bank.v | 16 +- hw/rtl/cache/VX_cache_config.vh | 2 +- hw/rtl/cache/VX_cache_miss_resrv.v | 2 +- hw/rtl/cache/VX_data_access.v | 2 +- hw/rtl/cache/VX_tag_access.v | 4 +- hw/simulate/Makefile | 13 +- hw/unit_tests/cache/Makefile | 4 +- 20 files changed, 514 insertions(+), 388 deletions(-) create mode 100644 hw/opae/VX_avs_wrapper.v diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 279d4b8f..49886330 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -4,20 +4,21 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors CFLAGS += -I../../../../hw # control RTL debug print states +DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK -DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA -DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM -DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP +DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) -DBG_FLAGS += -DDBG_CORE_REQ_INFO +DBG_FLAGS += -DDBG_CACHE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 @@ -59,7 +60,7 @@ VL_FLAGS += verilator.vlt # Debugigng ifdef DEBUG - VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS) + VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs $(DBG_FLAGS) CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS) else VL_FLAGS += -DNDEBUG @@ -78,7 +79,7 @@ VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE # use DPI FPU -#VL_FLAGS += -DFPU_FAST +VL_FLAGS += -DFPU_FAST RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip @@ -94,7 +95,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json $(PROJECT): $(SRCS) $(SCOPE_VH) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) - OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk + make -j -C obj_dir -f V$(TOP).mk clean: rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 821f1bb4..17a96c6e 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -9,15 +9,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) -DBG_FLAGS += -DDBG_CORE_REQ_INFO +DBG_FLAGS += -DDBG_CACHE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1 @@ -70,7 +71,7 @@ all: $(PROJECT) $(PROJECT): $(SRCS) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) - OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk + make -j -C obj_dir -f V$(TOP).mk clean: rm -rf $(PROJECT) obj_dir diff --git a/driver/tests/dogfood/testcases.h b/driver/tests/dogfood/testcases.h index e0df4ec8..4e1301bd 100644 --- a/driver/tests/dogfood/testcases.h +++ b/driver/tests/dogfood/testcases.h @@ -14,17 +14,29 @@ union Float_t { } parts; }; +inline float fround(float x, int32_t precision = 4) { + auto power_of_10 = std::pow(10, precision); + return std::round(x * power_of_10) / power_of_10; +} + inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits::epsilon()) { - auto tolerance = std::max(fabs(a), fabs(b)) * eps; + auto tolerance = std::min(fabs(a), fabs(b)) * eps; return fabs(a - b) <= tolerance; } -inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) { +inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) { Float_t fa{a}, fb{b}; - return std::abs(fa.i - fb.i) <= ulp; + auto d = std::abs(fa.i - fb.i); + if (d > ulp) { + std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl; + return false; + } + return true; } inline bool almost_equal(float a, float b) { + if (almost_equal_eps(a, b)) + return true; return almost_equal_ulp(a, b); } @@ -158,8 +170,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -186,8 +198,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -214,8 +226,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -242,8 +254,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -270,8 +282,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -298,8 +310,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -326,8 +338,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -354,8 +366,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -384,8 +396,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -412,8 +424,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n - i) * (1.0f/n); - b[i] = (n + i) * (1.0f/n); + a[i] = fround((n - i) * (1.0f/n)); + b[i] = fround((n + i) * (1.0f/n)); } } @@ -442,7 +454,7 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - int q = 1.0f + (i % 64); + float q = 1.0f + (i % 64); a[i] = q; b[i] = q; } @@ -471,8 +483,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = (n/2 - i) * (1.0f/n); - b[i] = (n/2 - i) * (1.0f/n); + a[i] = fround((n/2 - i) * (1.0f/n)); + b[i] = fround((n/2 - i) * (1.0f/n)); } } @@ -500,8 +512,8 @@ public: auto a = (float*)src1; auto b = (float*)src2; for (int i = 0; i < n; ++i) { - a[i] = i * (1.0f/n); - b[i] = i * (1.0f/n); + a[i] = fround(i * (1.0f/n)); + b[i] = fround(i * (1.0f/n)); } } diff --git a/hw/opae/VX_avs_wrapper.v b/hw/opae/VX_avs_wrapper.v new file mode 100644 index 00000000..2888ead0 --- /dev/null +++ b/hw/opae/VX_avs_wrapper.v @@ -0,0 +1,128 @@ +`include "VX_platform.vh" + +module VX_avs_wrapper #( + parameter AVS_DATAW = 1, + parameter AVS_ADDRW = 1, + parameter AVS_BURSTW = 1, + parameter AVS_BANKS = 1, + parameter REQ_TAGW = 1, + parameter RD_QUEUE_SIZE = 1, + + parameter AVS_BYTEENW = (AVS_DATAW / 8), + parameter RD_QUEUE_ADDRW= $clog2(RD_QUEUE_SIZE+1), + parameter AVS_BANKS_BITS= $clog2(AVS_BANKS) +) ( + input wire clk, + input wire reset, + + // AVS bus + output wire [AVS_DATAW-1:0] avs_writedata, + input wire [AVS_DATAW-1:0] avs_readdata, + output wire [AVS_ADDRW-1:0] avs_address, + input wire avs_waitrequest, + output wire avs_write, + output wire avs_read, + output wire [AVS_BYTEENW-1:0] avs_byteenable, + output wire [AVS_BURSTW-1:0] avs_burstcount, + input avs_readdatavalid, + output wire [AVS_BANKS_BITS-1:0] avs_bankselect, + + // DRAM request + input wire dram_req_valid, + input wire dram_req_rw, + input wire [AVS_BYTEENW-1:0] dram_req_byteen, + input wire [AVS_ADDRW-1:0] dram_req_addr, + input wire [AVS_DATAW-1:0] dram_req_data, + input wire [REQ_TAGW-1:0] dram_req_tag, + output wire dram_req_ready, + + // DRAM response + output wire dram_rsp_valid, + output wire [AVS_DATAW-1:0] dram_rsp_data, + output wire [REQ_TAGW-1:0] dram_rsp_tag, + input wire dram_rsp_ready +); + reg [AVS_BANKS_BITS-1:0] avs_bankselect_r; + reg [AVS_BURSTW-1:0] avs_burstcount_r; + + wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready; + wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready; + + wire avs_rdq_push = avs_readdatavalid; + wire avs_rdq_pop = avs_rtq_pop; + wire avs_rdq_empty; + + reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads; + wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n; + + assign avs_pending_reads_n = avs_pending_reads + + RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 : + (avs_rdq_pop && !avs_rtq_push) ? -1 : 0); + + always @(posedge clk) begin + if (reset) begin + avs_burstcount_r <= 1; + avs_bankselect_r <= 0; + avs_pending_reads <= 0; + end else begin + avs_pending_reads <= avs_pending_reads_n; + end + end + + VX_generic_queue #( + .DATAW (REQ_TAGW), + .SIZE (RD_QUEUE_SIZE) + ) rd_req_queue ( + .clk (clk), + .reset (reset), + .push (avs_rtq_push), + .data_in (dram_req_tag), + .pop (avs_rtq_pop), + .data_out (dram_rsp_tag), + `UNUSED_PIN (empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + VX_generic_queue #( + .DATAW (AVS_DATAW), + .SIZE (RD_QUEUE_SIZE) + ) rd_rsp_queue ( + .clk (clk), + .reset (reset), + .push (avs_rdq_push), + .data_in (avs_readdata), + .pop (avs_rdq_pop), + .data_out (dram_rsp_data), + .empty (avs_rdq_empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + assign avs_read = dram_req_valid && !dram_req_rw; + assign avs_write = dram_req_valid && dram_req_rw; + assign avs_address = dram_req_addr; + assign avs_byteenable = dram_req_byteen; + assign avs_writedata = dram_req_data; + assign dram_req_ready = !avs_waitrequest + && (avs_pending_reads < RD_QUEUE_SIZE); + assign avs_burstcount = avs_burstcount_r; + assign avs_bankselect = avs_bankselect_r; + + assign dram_rsp_valid = !avs_rdq_empty; + +`ifdef DBG_PRINT_AVS + always @(posedge clk) begin + if (dram_req_valid && dram_req_ready) begin + if (dram_req_rw) + $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata); + else + $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n); + end + if (dram_rsp_valid && dram_rsp_ready) begin + $display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n); + end + end +`endif + +endmodule \ No newline at end of file diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index d36f5e7c..90fade61 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -10,12 +10,14 @@ #+define+DBG_PRINT_CACHE_BANK #+define+DBG_PRINT_CACHE_SNP #+define+DBG_PRINT_CACHE_MSRQ +#+define+DBG_PRINT_CACHE_TAG #+define+DBG_PRINT_CACHE_DATA #+define+DBG_PRINT_DRAM #+define+DBG_PRINT_PIPELINE #+define+DBG_PRINT_OPAE -#+define+DBG_CORE_REQ_INFO +#+define+DBG_PRINT_AVS #+define+DBG_PRINT_SCOPE +#+define+DBG_CACHE_REQ_INFO vortex_afu.json QI:vortex_afu.qsf diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 1c3972b3..be90f49f 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -39,11 +39,13 @@ module vortex_afu #( localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); - localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH); + localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH); +localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW); localparam AVS_RD_QUEUE_SIZE = 16; +localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX; localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; @@ -134,28 +136,12 @@ wire [31:0] vx_csr_io_rsp_data; wire vx_csr_io_rsp_ready; reg vx_reset; +reg vx_enabled; wire vx_busy; -// AVS Queues ///////////////////////////////////////////////////////////////// - -wire avs_rtq_push; -wire avs_rtq_pop; -`DEBUG_BEGIN -wire avs_rtq_empty; -wire avs_rtq_full; -`DEBUG_BEGIN - -wire avs_rdq_push; -wire avs_rdq_pop; -t_local_mem_data avs_rdq_dout; -wire avs_rdq_empty; -`DEBUG_BEGIN -wire avs_rdq_full; -`DEBUG_END - // CMD variables ////////////////////////////////////////////////////////////// -t_ccip_clAddr cmd_io_addr; +t_ccip_clAddr cmd_io_addr; reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; @@ -167,9 +153,9 @@ wire cmd_scope_write; `endif reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; -reg [11:0] cmd_csr_addr; -reg [31:0] cmd_csr_rdata; -reg [31:0] cmd_csr_wdata; +reg [11:0] cmd_csr_addr; +reg [31:0] cmd_csr_rdata; +reg [31:0] cmd_csr_wdata; // MMIO controller //////////////////////////////////////////////////////////// @@ -189,6 +175,7 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address); `endif +/* `DEBUG_BEGIN wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; @@ -201,6 +188,7 @@ wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata; `DEBUG_END +*/ wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; @@ -220,13 +208,8 @@ always @(posedge clk) begin `ifndef VERILATOR $asserton; // enable assertions `endif - mmio_tx.hdr <= 0; - mmio_tx.data <= 0; mmio_tx.mmioRdValid <= 0; - cmd_io_addr <= 0; - cmd_mem_addr <= 0; - cmd_data_size <= 0; `ifdef SCOPE scope_start <= 0; `endif @@ -359,10 +342,10 @@ wire cmd_run_done; always @(posedge clk) begin if (reset) begin - state <= STATE_IDLE; - vx_reset <= 0; - end - else begin + state <= STATE_IDLE; + vx_reset <= 0; + vx_enabled <= 0; + end else begin vx_reset <= 0; @@ -385,7 +368,8 @@ always @(posedge clk) begin `ifdef DBG_PRINT_OPAE $display("%t: STATE START", $time); `endif - vx_reset <= 1; + vx_reset <= 1; + vx_enabled <= 1; state <= STATE_START; end CMD_CLFLUSH: begin @@ -480,215 +464,186 @@ end // AVS Controller ///////////////////////////////////////////////////////////// -wire vortex_enabled; -wire cci_rdq_empty; +wire dram_req_valid; +wire dram_req_rw; +t_local_mem_byte_mask dram_req_byteen; +t_local_mem_addr dram_req_addr; +t_local_mem_data dram_req_data; +wire [AVS_REQ_TAGW:0] dram_req_tag; +wire dram_req_ready; + +wire dram_rsp_valid; +t_local_mem_data dram_rsp_data; +wire [AVS_REQ_TAGW:0] dram_rsp_tag; +wire dram_rsp_ready; + +wire cci_dram_req_valid; +wire cci_dram_req_rw; +t_local_mem_byte_mask cci_dram_req_byteen; +t_local_mem_addr cci_dram_req_addr; +t_local_mem_data cci_dram_req_data; +wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag; +wire cci_dram_req_ready; + +wire cci_dram_rsp_valid; +t_local_mem_data cci_dram_rsp_data; +wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag; +wire cci_dram_rsp_ready; + +wire vx_dram_req_valid_qual; +t_local_mem_addr vx_dram_req_addr_qual; +t_local_mem_byte_mask vx_dram_req_byteen_qual; +t_local_mem_data vx_dram_req_data_qual; +wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual; + +wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual; +wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual; + +wire cci_dram_rd_req_valid, cci_dram_wr_req_valid; +wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; +wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx; -wire cci_dram_rd_req_fire; -wire cci_dram_wr_req_fire; -wire vx_dram_rd_req_fire; -`DEBUG_BEGIN -wire vx_dram_wr_req_fire; -`DEBUG_END -wire vx_dram_rd_rsp_fire; +//-- -t_local_mem_byte_mask vx_dram_req_byteen_; -reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads; -wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next; -wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; +assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid; -wire cci_dram_rd_req_enable, cci_dram_wr_req_enable; -wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable; +assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr; -reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr; +assign cci_dram_req_rw = (CMD_MEM_WRITE == state); -assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state); +assign cci_dram_req_byteen = {64{1'b1}}; -assign cci_dram_rd_req_enable = (state == STATE_READ) - && (avs_pending_reads < AVS_RD_QUEUE_SIZE) - && (cci_dram_rd_req_ctr != 0); +assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; -assign cci_dram_wr_req_enable = (state == STATE_WRITE) - && !cci_rdq_empty - && (cci_dram_wr_req_ctr < cmd_data_size); +assign cci_dram_req_tag = AVS_REQ_TAGW'(0); -assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE); -assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && !vx_dram_req_rw; -assign vx_dram_wr_req_enable = vx_dram_req_enable && vx_dram_req_valid && vx_dram_req_rw; +`UNUSED_VAR (cci_dram_rsp_tag) -assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && !avs_waitrequest; -assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && !avs_waitrequest; +//-- -assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && !avs_waitrequest; -assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest; +assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled; -assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready; - -assign avs_pending_reads_next = avs_pending_reads - + $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 : - (~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0); +assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW; - assign vx_dram_req_byteen_ = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]) << (VX_DRAM_LINE_LW - 3)); + assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]; + assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3)); + assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW); + assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx}; + assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx]; end else begin - assign vx_dram_req_offset = 0; - assign vx_dram_req_byteen_ = vx_dram_req_byteen; + assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0); + assign vx_dram_req_byteen_qual = vx_dram_req_byteen; + assign vx_dram_req_tag_qual = vx_dram_req_tag; + assign vx_dram_req_data_qual = vx_dram_req_data; + assign vx_dram_rsp_data = vx_dram_rsp_data_unqual; end -always @(*) begin - case (state) - CMD_MEM_READ: avs_address = cci_dram_rd_req_addr; - CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); - default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; - endcase +assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0]; +assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX]; - case (state) - CMD_MEM_READ: avs_byteenable = 64'hffffffffffffffff; - CMD_MEM_WRITE: avs_byteenable = 64'hffffffffffffffff; - default: avs_byteenable = vx_dram_req_byteen_; - endcase +//-- - case (state) - CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; - default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset; - endcase -end +VX_mem_arb #( + .NUM_REQUESTS (2), + .DATA_WIDTH ($bits(t_local_mem_data)), + .ADDR_WIDTH ($bits(t_local_mem_addr)), + .TAG_IN_WIDTH (AVS_REQ_TAGW), + .TAG_OUT_WIDTH (AVS_REQ_TAGW+1) +) vx_cci_avs_arb ( + .clk (clk), + .reset (reset), -assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable; -assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable; + // Source request + .req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}), + .req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}), + .req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}), + .req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}), + .req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}), + .req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}), + .req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}), -assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size); + // Source response + .rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}), + .rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}), + .rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}), + .rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}), -always @(posedge clk) begin - if (reset) - begin - mem_bank_select <= 0; - avs_burstcount <= 1; - cci_dram_rd_req_addr <= 0; - cci_dram_wr_req_addr <= 0; - cci_dram_rd_req_ctr <= 0; - cci_dram_wr_req_ctr <= 0; - avs_pending_reads <= 0; - end - else begin - - if (state == STATE_IDLE) begin - if (CMD_MEM_READ == cmd_type) begin - cci_dram_rd_req_addr <= cmd_mem_addr; - cci_dram_rd_req_ctr <= cmd_data_size; - end - else if (CMD_MEM_WRITE == cmd_type) begin - cci_dram_wr_req_addr <= cmd_mem_addr; - cci_dram_wr_req_ctr <= 0; - end - end - - if (cci_dram_rd_req_fire) begin - cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1); - cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); - `ifdef DBG_PRINT_OPAE - $display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next); - `endif - end - - if (cci_dram_wr_req_fire) begin - cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); - cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); - `ifdef DBG_PRINT_OPAE - $display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1)); - `endif - end - - `ifdef DBG_PRINT_OPAE - if (vx_dram_rd_req_fire) begin - $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_pending_reads_next); - end - - if (vx_dram_wr_req_fire) begin - $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_writedata); - end - - if (avs_readdatavalid) begin - $display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_next); - end - `endif - - avs_pending_reads <= avs_pending_reads_next; - end -end - -// Vortex DRAM requests - -assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest; - -// Vortex DRAM fill response - -assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty; -if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin - assign vx_dram_rsp_data = (`VX_DRAM_LINE_WIDTH)'(avs_rdq_dout >> vx_dram_rsp_offset); -end else begin - assign vx_dram_rsp_data = avs_rdq_dout; -end - -// AVS address read request queue ///////////////////////////////////////////// - -assign avs_rtq_push = vx_dram_rd_req_fire; -assign avs_rtq_pop = vx_dram_rd_rsp_fire; - -VX_generic_queue #( - .DATAW (`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW), - .SIZE (AVS_RD_QUEUE_SIZE) -) avs_rd_req_queue ( - .clk (clk), - .reset (reset), - .push (avs_rtq_push), - .data_in ({vx_dram_req_tag, vx_dram_req_offset}), - .pop (avs_rtq_pop), - .data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}), - .empty (avs_rtq_empty), - .full (avs_rtq_full), - `UNUSED_PIN (size) + // DRAM request + .req_valid_out (dram_req_valid), + .req_rw_out (dram_req_rw), + .req_byteen_out (dram_req_byteen), + .req_addr_out (dram_req_addr), + .req_data_out (dram_req_data), + .req_tag_out (dram_req_tag), + .req_ready_out (dram_req_ready), + + // DRAM response + .rsp_valid_in (dram_rsp_valid), + .rsp_tag_in (dram_rsp_tag), + .rsp_data_in (dram_rsp_data), + .rsp_ready_in (dram_rsp_ready) ); -// AVS data read response queue /////////////////////////////////////////////// +//-- -wire cci_wr_req_fire; +VX_avs_wrapper #( + .AVS_DATAW ($bits(t_local_mem_data)), + .AVS_ADDRW ($bits(t_local_mem_addr)), + .AVS_BURSTW ($bits(t_local_mem_burst_cnt)), + .AVS_BANKS (NUM_LOCAL_MEM_BANKS), + .REQ_TAGW (AVS_REQ_TAGW+1), + .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE) +) avs_wrapper ( + .clk (clk), + .reset (reset), -assign avs_rdq_push = avs_readdatavalid; -assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire; + // AVS bus + .avs_writedata (avs_writedata), + .avs_readdata (avs_readdata), + .avs_address (avs_address), + .avs_waitrequest (avs_waitrequest), + .avs_write (avs_write), + .avs_read (avs_read), + .avs_byteenable (avs_byteenable), + .avs_burstcount (avs_burstcount), + .avs_readdatavalid (avs_readdatavalid), + .avs_bankselect (mem_bank_select), -VX_generic_queue #( - .DATAW (DRAM_LINE_WIDTH), - .SIZE (AVS_RD_QUEUE_SIZE) -) avs_rd_rsp_queue ( - .clk (clk), - .reset (reset), - .push (avs_rdq_push), - .data_in (avs_readdata), - .pop (avs_rdq_pop), - .data_out (avs_rdq_dout), - .empty (avs_rdq_empty), - .full (avs_rdq_full), - `UNUSED_PIN (size) + // DRAM request + .dram_req_valid (dram_req_valid), + .dram_req_rw (dram_req_rw), + .dram_req_byteen (dram_req_byteen), + .dram_req_addr (dram_req_addr), + .dram_req_data (dram_req_data), + .dram_req_tag (dram_req_tag), + .dram_req_ready (dram_req_ready), + + // DRAM response + .dram_rsp_valid (dram_rsp_valid), + .dram_rsp_data (dram_rsp_data), + .dram_rsp_tag (dram_rsp_tag), + .dram_rsp_ready (dram_rsp_ready) ); // CCI-P Read Request /////////////////////////////////////////////////////////// reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual; wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; t_ccip_clAddr cci_rd_req_addr; -wire cci_rd_req_fire, cci_rd_rsp_fire; reg cci_rd_req_enable, cci_rd_req_wait; wire cci_rdq_push, cci_rdq_pop; wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; +wire cci_rdq_empty; always @(*) begin af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); @@ -696,8 +651,10 @@ always @(*) begin af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); end -assign cci_rd_req_fire = af2cp_sTxPort.c0.valid; -assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; +wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready; + +wire cci_rd_req_fire = af2cp_sTxPort.c0.valid; +wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); @@ -712,28 +669,36 @@ assign cci_pending_reads_next = cci_pending_reads + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); +assign cci_dram_wr_req_valid = !cci_rdq_empty; + +assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); + assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; +assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size); + // Send read requests to CCI always @(posedge clk) begin if (reset) begin - cci_rd_req_addr <= 0; - cci_rd_req_ctr <= 0; - cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; - cci_rd_req_enable <= 0; - cci_rd_req_wait <= 0; + cci_rd_req_addr <= 0; + cci_rd_req_ctr <= 0; + cci_rd_rsp_ctr <= 0; + cci_pending_reads <= 0; + cci_rd_req_enable <= 0; + cci_rd_req_wait <= 0; + cci_dram_wr_req_ctr <= 0; end - else begin - + else begin if ((STATE_IDLE == state) && (CMD_MEM_WRITE == cmd_type)) begin - cci_rd_req_addr <= cmd_io_addr; - cci_rd_req_ctr <= 0; - cci_rd_rsp_ctr <= 0; - cci_pending_reads <= 0; - cci_rd_req_enable <= (cmd_data_size != 0); - cci_rd_req_wait <= 0; + cci_rd_req_addr <= cmd_io_addr; + cci_rd_req_ctr <= 0; + cci_rd_rsp_ctr <= 0; + cci_pending_reads <= 0; + cci_rd_req_enable <= (cmd_data_size != 0); + cci_rd_req_wait <= 0; + cci_dram_wr_req_ctr <= 0; + cci_dram_wr_req_addr_unqual <= cmd_mem_addr; end cci_rd_req_enable <= (STATE_WRITE == state) @@ -768,6 +733,11 @@ always @(posedge clk) begin `endif end + if (cci_dram_wr_req_fire) begin + cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); + cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); + end + cci_pending_reads <= cci_pending_reads_next; end end @@ -811,57 +781,61 @@ VX_generic_queue #( reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr; reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual; t_ccip_clAddr cci_wr_req_addr; -reg cci_wr_req_enable; -wire cci_wr_rsp_fire; always @(*) begin af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode - af2cp_sTxPort.c1.data = t_ccip_clData'(avs_rdq_dout); + af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data); end -assign cci_wr_req_fire = af2cp_sTxPort.c1.valid; -assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; +wire cci_wr_req_fire = af2cp_sTxPort.c1.valid; +wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; + +wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; assign cci_pending_writes_next = cci_pending_writes + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); -assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); +assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0); -assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty; +assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual; + +assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid; +assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull; + +assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); // Send write requests to CCI always @(posedge clk) begin if (reset) begin - cci_wr_req_addr <= 0; - cci_wr_req_ctr <= 0; - cci_wr_req_enable <= 0; - cci_pending_writes <= 0; + cci_wr_req_addr <= 0; + cci_wr_req_ctr <= 0; + cci_pending_writes <= 0; + cci_dram_rd_req_ctr <= 0; end - else begin - + else begin if ((STATE_IDLE == state) && (CMD_MEM_READ == cmd_type)) begin - cci_wr_req_addr <= cmd_io_addr; - cci_wr_req_ctr <= cmd_data_size; - cci_pending_writes <= 0; - end - - cci_wr_req_enable <= (STATE_READ == state) - && (cci_pending_writes_next < CCI_RW_QUEUE_SIZE) - && !cp2af_sRxPort.c1TxAlmFull; + cci_wr_req_addr <= cmd_io_addr; + cci_wr_req_ctr <= cmd_data_size; + cci_pending_writes <= 0; + cci_dram_rd_req_ctr <= cmd_data_size; + cci_dram_rd_req_addr_unqual <= cmd_mem_addr; + end if (cci_wr_req_fire) begin assert(cci_wr_req_ctr != 0); cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); `ifdef DBG_PRINT_OPAE - $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout); + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next); `endif end @@ -871,6 +845,11 @@ begin end `endif + if (cci_dram_rd_req_fire) begin + cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1); + cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); + end + cci_pending_writes <= cci_pending_writes_next; end end diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 5bcb7c4f..c4138aca 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -553,42 +553,42 @@ module VX_cluster #( VX_mem_arb #( .NUM_REQUESTS (`L2NUM_REQUESTS), - .WORD_SIZE (`L2BANK_LINE_SIZE), + .DATA_WIDTH (`L2DRAM_LINE_WIDTH), .TAG_IN_WIDTH (`DDRAM_TAG_WIDTH), .TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH) ) dram_arb ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), // Core request - .mem_req_valid_in (core_dram_req_valid), - .mem_req_rw_in (core_dram_req_rw), - .mem_req_byteen_in (core_dram_req_byteen), - .mem_req_addr_in (core_dram_req_addr), - .mem_req_data_in (core_dram_req_data), - .mem_req_tag_in (core_dram_req_tag), - .mem_req_ready_in (core_dram_req_ready), + .req_valid_in (core_dram_req_valid), + .req_rw_in (core_dram_req_rw), + .req_byteen_in (core_dram_req_byteen), + .req_addr_in (core_dram_req_addr), + .req_data_in (core_dram_req_data), + .req_tag_in (core_dram_req_tag), + .req_ready_in (core_dram_req_ready), // Core response - .mem_rsp_valid_in (core_dram_rsp_valid), - .mem_rsp_data_in (core_dram_rsp_data), - .mem_rsp_tag_in (core_dram_rsp_tag), - .mem_rsp_ready_in (core_dram_rsp_ready), + .rsp_valid_out (core_dram_rsp_valid), + .rsp_data_out (core_dram_rsp_data), + .rsp_tag_out (core_dram_rsp_tag), + .rsp_ready_out (core_dram_rsp_ready), // DRAM request - .mem_req_valid_out (dram_req_valid), - .mem_req_rw_out (dram_req_rw), - .mem_req_byteen_out (dram_req_byteen), - .mem_req_addr_out (dram_req_addr), - .mem_req_data_out (dram_req_data), - .mem_req_tag_out (dram_req_tag), - .mem_req_ready_out (dram_req_ready), + .req_valid_out (dram_req_valid), + .req_rw_out (dram_req_rw), + .req_byteen_out (dram_req_byteen), + .req_addr_out (dram_req_addr), + .req_data_out (dram_req_data), + .req_tag_out (dram_req_tag), + .req_ready_out (dram_req_ready), // DRAM response - .mem_rsp_valid_out (dram_rsp_valid), - .mem_rsp_tag_out (dram_rsp_tag), - .mem_rsp_data_out (dram_rsp_data), - .mem_rsp_ready_out (dram_rsp_ready) + .rsp_valid_in (dram_rsp_valid), + .rsp_tag_in (dram_rsp_tag), + .rsp_data_in (dram_rsp_data), + .rsp_ready_in (dram_rsp_ready) ); end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index a77a4407..84b39f7d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -234,10 +234,10 @@ /////////////////////////////////////////////////////////////////////////////// -`ifdef DBG_CORE_REQ_INFO // pc, rd, wid -`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS) +`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid +`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS) `else -`define DBG_CORE_REQ_MDATAW 0 +`define DBG_CACHE_REQ_MDATAW 0 `endif ////////////////////////// Dcache Configurable Knobs ////////////////////////// @@ -249,7 +249,7 @@ `define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE) // Core request tag bits -`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS) +`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS) // DRAM request data bits `define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8) @@ -287,7 +287,7 @@ `define ICORE_TAG_ID_BITS `NW_BITS // Core request tag bits -`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS) +`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS) // DRAM request data bits `define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8) diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 17e84694..1b996d3d 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -39,10 +39,6 @@ module VX_gpr_stage #( always @(posedge clk) begin if (reset) begin rsp_valid <= 0; - rsp_wid <= 0; - rsp_pc <= 0; - rs1_is_zero <= 0; - rs2_is_zero <= 0; end else begin rsp_valid <= gpr_req_if.valid; rsp_wid <= gpr_req_if.wid; diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 13776c55..f65eb75d 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -45,7 +45,7 @@ module VX_icache_stage #( // Can accept new request? assign ifetch_req_if.ready = icache_req_if.ready; -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag}; `else assign icache_req_if.tag = req_tag; diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 6f442a0e..0c89ec7d 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -144,7 +144,7 @@ module VX_lsu_unit #( assign dcache_req_if.addr = req_addr; assign dcache_req_if.data = req_data; -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag}; `else assign dcache_req_if.tag = req_tag; diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index 08d84b11..8f7b6a46 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -2,46 +2,46 @@ module VX_mem_arb #( parameter NUM_REQUESTS = 1, - parameter WORD_SIZE = 1, + parameter DATA_WIDTH = 1, parameter TAG_IN_WIDTH = 1, parameter TAG_OUT_WIDTH = 1, - - parameter WORD_WIDTH = WORD_SIZE * 8, - parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), + + parameter DATA_SIZE = (DATA_WIDTH / 8), + parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), parameter REQS_BITS = `CLOG2(NUM_REQUESTS) ) ( input wire clk, input wire reset, // input requests - input wire [NUM_REQUESTS-1:0] mem_req_valid_in, - input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in, - input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in, - input wire [NUM_REQUESTS-1:0] mem_req_rw_in, - input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in, - input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in, - output wire [NUM_REQUESTS-1:0] mem_req_ready_in, + input wire [NUM_REQUESTS-1:0] req_valid_in, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] req_tag_in, + input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] req_addr_in, + input wire [NUM_REQUESTS-1:0] req_rw_in, + input wire [NUM_REQUESTS-1:0][DATA_SIZE-1:0] req_byteen_in, + input wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] req_data_in, + output wire [NUM_REQUESTS-1:0] req_ready_in, // input response - output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in, - output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in, - output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in, - input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in, + output wire [NUM_REQUESTS-1:0] rsp_valid_out, + output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out, + output wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] rsp_data_out, + input wire [NUM_REQUESTS-1:0] rsp_ready_out, // output request - output wire mem_req_valid_out, - output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out, - output wire [ADDR_WIDTH-1:0] mem_req_addr_out, - output wire mem_req_rw_out, - output wire [WORD_SIZE-1:0] mem_req_byteen_out, - output wire [WORD_WIDTH-1:0] mem_req_data_out, - input wire mem_req_ready_out, + output wire req_valid_out, + output wire [TAG_OUT_WIDTH-1:0] req_tag_out, + output wire [ADDR_WIDTH-1:0] req_addr_out, + output wire req_rw_out, + output wire [DATA_SIZE-1:0] req_byteen_out, + output wire [DATA_WIDTH-1:0] req_data_out, + input wire req_ready_out, // output response - input wire mem_rsp_valid_out, - input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out, - input wire [WORD_WIDTH-1:0] mem_rsp_data_out, - output wire mem_rsp_ready_out + input wire rsp_valid_in, + input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in, + input wire [DATA_WIDTH-1:0] rsp_data_in, + output wire rsp_ready_in ); if (NUM_REQUESTS > 1) begin @@ -53,59 +53,59 @@ module VX_mem_arb #( ) req_arb ( .clk (clk), .reset (reset), - .requests (mem_req_valid_in), + .requests (req_valid_in), `UNUSED_PIN (grant_valid), .grant_index (req_idx), .grant_onehot (req_1hot) ); - wire stall = ~mem_req_ready_out && mem_req_valid_out; + wire stall = ~req_ready_out && req_valid_out; VX_generic_register #( - .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH), + .N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH), .PASSTHRU(NUM_REQUESTS <= 2) ) pipe_reg ( .clk (clk), .reset (reset), .stall (stall), .flush (1'b0), - .in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}), - .out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out}) + .in ({req_valid_in[req_idx], {req_tag_in[req_idx], REQS_BITS'(req_idx)}, req_addr_in[req_idx], req_rw_in[req_idx], req_byteen_in[req_idx], req_data_in[req_idx]}), + .out ({req_valid_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}) ); for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign mem_req_ready_in[i] = req_1hot[i] && ~stall; + assign req_ready_in[i] = req_1hot[i] && ~stall; end /////////////////////////////////////////////////////////////////////// - wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0]; + wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in[REQS_BITS-1:0]; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i)); - assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH]; - assign mem_rsp_data_in[i] = mem_rsp_data_out; + assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i)); + assign rsp_tag_out[i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH]; + assign rsp_data_out[i] = rsp_data_in; end - assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel]; + assign rsp_ready_in = rsp_ready_out[rsp_sel]; end else begin `UNUSED_VAR (clk) `UNUSED_VAR (reset) - assign mem_req_valid_out = mem_req_valid_in; - assign mem_req_tag_out = mem_req_tag_in; - assign mem_req_addr_out = mem_req_addr_in; - assign mem_req_rw_out = mem_req_rw_in; - assign mem_req_byteen_out = mem_req_byteen_in; - assign mem_req_data_out = mem_req_data_in; - assign mem_req_ready_in = mem_req_ready_out; + assign req_valid_out = req_valid_in; + assign req_tag_out = req_tag_in; + assign req_addr_out = req_addr_in; + assign req_rw_out = req_rw_in; + assign req_byteen_out = req_byteen_in; + assign req_data_out = req_data_in; + assign req_ready_in = req_ready_out; - assign mem_rsp_valid_in = mem_rsp_valid_out; - assign mem_rsp_tag_in = mem_rsp_tag_out; - assign mem_rsp_data_in = mem_rsp_data_out; - assign mem_rsp_ready_out = mem_rsp_ready_in; + assign rsp_valid_out = rsp_valid_in; + assign rsp_tag_out = rsp_tag_in; + assign rsp_data_out = rsp_data_in; + assign rsp_ready_in = rsp_ready_out; end diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index 63e7b1d1..6fca200f 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -64,23 +64,27 @@ module VX_scoreboard #( assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay); `ifdef DBG_PRINT_PIPELINE + always @(posedge clk) begin + if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin + $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", + $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, + inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); + end + end +`endif + reg [31:0] stall_ctr; always @(posedge clk) begin if (reset) begin stall_ctr <= 0; end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin - $display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", + stall_ctr <= stall_ctr + 1; + assert(stall_ctr < 100000) else $error("%t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b", $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay); - stall_ctr <= stall_ctr + 1; - if (stall_ctr >= 2000) begin - $fflush(); - assert(0); - end end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin stall_ctr <= 0; end - end -`endif + end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 27d58a09..31fc23fd 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -100,7 +100,7 @@ module VX_bank #( output wire misses ); -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire[31:0] debug_pc_st0; wire[`NR_BITS-1:0] debug_rd_st0; @@ -352,7 +352,7 @@ module VX_bank #( wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0 || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0; end else begin @@ -371,7 +371,7 @@ module VX_bank #( .out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; end else begin @@ -420,7 +420,7 @@ module VX_bank #( .clk (clk), .reset (reset), - `ifdef DBG_CORE_REQ_INFO + `ifdef DBG_CACHE_REQ_INFO .debug_pc (debug_pc_st1), .debug_rd (debug_rd_st1), .debug_wid (debug_wid_st1), @@ -474,7 +474,7 @@ module VX_bank #( .out ({is_msrq_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) ); -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2; end else begin @@ -498,7 +498,7 @@ module VX_bank #( .clk (clk), .reset (reset), - `ifdef DBG_CORE_REQ_INFO + `ifdef DBG_CACHE_REQ_INFO .debug_pc (debug_pc_st2), .debug_rd (debug_rd_st2), .debug_wid (debug_wid_st2), @@ -562,7 +562,7 @@ module VX_bank #( .out ({is_msrq_st3, send_core_rsp_st3, send_fill_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) ); -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3; end else begin @@ -623,7 +623,7 @@ module VX_bank #( .clk (clk), .reset (reset), - `ifdef DBG_CORE_REQ_INFO + `ifdef DBG_CACHE_REQ_INFO .debug_pc_st0 (debug_pc_st0), .debug_rd_st0 (debug_rd_st0), .debug_wid_st0 (debug_wid_st0), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index b8fe1d46..050c0b32 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -3,7 +3,7 @@ `include "VX_platform.vh" -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO `include "VX_define.vh" `endif diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 7e65bb8e..cf97625e 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -24,7 +24,7 @@ module VX_cache_miss_resrv #( input wire clk, input wire reset, -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc_st0, input wire[`NR_BITS-1:0] debug_rd_st0, diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 5aa70c4d..3b78271d 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -25,7 +25,7 @@ module VX_data_access #( input wire clk, input wire reset, -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc, input wire[`NR_BITS-1:0] debug_rd, diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 3d75ffd0..2745cffc 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -25,7 +25,7 @@ module VX_tag_access #( input wire clk, input wire reset, -`ifdef DBG_CORE_REQ_INFO +`ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc, input wire[`NR_BITS-1:0] debug_rd, @@ -122,7 +122,7 @@ module VX_tag_access #( assign readtag_out = use_read_tag; assign writeen_out = (use_do_write || use_do_fill); -`ifdef DBG_PRINT_CACHE_DATA +`ifdef DBG_PRINT_CACHE_TAG always @(posedge clk) begin if (valid_in && !stall) begin if (use_do_fill && tags_match) begin diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index dfa3891d..f2836aab 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -10,15 +10,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ +DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO +DBG_PRINT_FLAGS += -DDBG_PRINT_AVS DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE DBG_FLAGS += $(DBG_PRINT_FLAGS) -DBG_FLAGS += -DDBG_CORE_REQ_INFO +DBG_FLAGS += -DDBG_CACHE_REQ_INFO FPU_INCLUDE = -I../rtl/fp_cores -I../rtl/fp_cores/svdpi -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate $(FPU_INCLUDE) @@ -45,7 +46,7 @@ gen-s: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' gen-sd: - verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs --trace-threads 1 $(DBG) + verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG) gen-st: verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) @@ -54,7 +55,7 @@ gen-m: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' gen-md: - verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs --trace-threads 1 $(DBG) + verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG) gen-mt: verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) @@ -63,7 +64,7 @@ build-s: gen-s (cd obj_dir && make -j -f VVortex.mk) build-sd: gen-sd - (cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk) + (cd obj_dir && make -j -f VVortex.mk) build-st: gen-st (cd obj_dir && make -j -f VVortex.mk) @@ -72,7 +73,7 @@ build-m: gen-m (cd obj_dir && make -j -f VVortex.mk) build-md: gen-md - (cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk) + (cd obj_dir && make -j -f VVortex.mk) build-mt: gen-mt (cd obj_dir && make -j -f VVortex.mk) diff --git a/hw/unit_tests/cache/Makefile b/hw/unit_tests/cache/Makefile index 93f1fe64..aa2b4000 100644 --- a/hw/unit_tests/cache/Makefile +++ b/hw/unit_tests/cache/Makefile @@ -8,9 +8,11 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ -DDBG_PRINT_CACHE_BANK \ -DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_MSRQ \ + -DDBG_PRINT_CACHE_TAG \ -DDBG_PRINT_CACHE_DATA \ -DDBG_PRINT_DRAM \ - -DDBG_PRINT_OPAE + -DDBG_PRINT_OPAE \ + -DDBG_PRINT_AVS #DBG_PRINT=$(DBG_PRINT_FLAGS)