From 36602cfa6a391e702a74870a191130f3530ed174 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 1 Jan 2021 11:46:30 -0800 Subject: [PATCH 1/3] buffering core reset signal --- hw/rtl/VX_cluster.v | 8 ++++++- hw/rtl/libs/VX_pipe_register.v | 41 +++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index f06fa702..274bc8e3 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -68,13 +68,19 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_ebreak; for (genvar i = 0; i < `NUM_CORES; i++) begin + + reg core_reset; + always @(posedge clk) begin + core_reset <= reset; + end + VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( `SCOPE_BIND_VX_cluster_core(i) .clk (clk), - .reset (reset), + .reset (core_reset), .dram_req_valid (per_core_dram_req_valid[i]), .dram_req_rw (per_core_dram_req_rw [i]), diff --git a/hw/rtl/libs/VX_pipe_register.v b/hw/rtl/libs/VX_pipe_register.v index 60a52d7c..d1f12d1f 100644 --- a/hw/rtl/libs/VX_pipe_register.v +++ b/hw/rtl/libs/VX_pipe_register.v @@ -18,24 +18,43 @@ module VX_pipe_register #( `UNUSED_VAR (enable) assign data_out = data_in; end else if (DEPTH == 1) begin - reg [DATAW-1:0] value; - if (RESETW != 0) begin - always @(posedge clk) begin - if (reset) begin - value[DATAW-1:DATAW-RESETW] <= RESETW'(0); - end else if (enable) begin - value <= data_in; - end - end - end else begin + if (RESETW == 0) begin `UNUSED_VAR (reset) + reg [DATAW-1:0] value; + always @(posedge clk) begin if (enable) begin value <= data_in; end end + assign data_out = value; + end else if (RESETW == DATAW) begin + reg [DATAW-1:0] value; + + always @(posedge clk) begin + if (reset) begin + value <= RESETW'(0); + end else if (enable) begin + value <= data_in; + end + end + assign data_out = value; + end else begin + reg [DATAW-RESETW-1:0] value_d; + reg [RESETW-1:0] value_r; + + always @(posedge clk) begin + if (reset) begin + value_r <= RESETW'(0); + end else if (enable) begin + value_r <= data_in[DATAW-1:DATAW-RESETW]; + end + if (enable) begin + value_d <= data_in[DATAW-RESETW-1:0]; + end + end + assign data_out = {value_r, value_d}; end - assign data_out = value; end else begin VX_shift_register #( .DATAW (DATAW), From da9649c2a3d1230cb669e469524bc72f658a8b62 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 1 Jan 2021 14:54:18 -0800 Subject: [PATCH 2/3] fixed pipe register reset issue in synthesis --- hw/rtl/VX_cluster.v | 10 ++-------- hw/rtl/libs/VX_pipe_register.v | 7 +++++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 274bc8e3..e6d35554 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -67,20 +67,14 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_busy; wire [`NUM_CORES-1:0] per_core_ebreak; - for (genvar i = 0; i < `NUM_CORES; i++) begin - - reg core_reset; - always @(posedge clk) begin - core_reset <= reset; - end - + for (genvar i = 0; i < `NUM_CORES; i++) begin VX_core #( .CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) ) core ( `SCOPE_BIND_VX_cluster_core(i) .clk (clk), - .reset (core_reset), + .reset (reset), .dram_req_valid (per_core_dram_req_valid[i]), .dram_req_rw (per_core_dram_req_rw [i]), diff --git a/hw/rtl/libs/VX_pipe_register.v b/hw/rtl/libs/VX_pipe_register.v index d1f12d1f..1e503ebd 100644 --- a/hw/rtl/libs/VX_pipe_register.v +++ b/hw/rtl/libs/VX_pipe_register.v @@ -17,7 +17,7 @@ module VX_pipe_register #( `UNUSED_VAR (reset) `UNUSED_VAR (enable) assign data_out = data_in; - end else if (DEPTH == 1) begin + end else if (DEPTH == 1) begin if (RESETW == 0) begin `UNUSED_VAR (reset) reg [DATAW-1:0] value; @@ -39,7 +39,7 @@ module VX_pipe_register #( end end assign data_out = value; - end else begin + end else begin reg [DATAW-RESETW-1:0] value_d; reg [RESETW-1:0] value_r; @@ -49,6 +49,9 @@ module VX_pipe_register #( end else if (enable) begin value_r <= data_in[DATAW-1:DATAW-RESETW]; end + end + + always @(posedge clk) begin if (enable) begin value_d <= data_in[DATAW-RESETW-1:0]; end From 93c36273fa5f312eb207b8a192893e69848125b8 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 1 Jan 2021 20:24:18 -0800 Subject: [PATCH 3/3] minor update --- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_gpr_ram_f.v | 2 ++ hw/rtl/VX_gpr_stage.v | 62 ++++++++++++------------------------ hw/rtl/cache/VX_bank.v | 49 ++++++++++++---------------- hw/rtl/cache/VX_miss_resrv.v | 2 +- 5 files changed, 45 insertions(+), 72 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 15568c7b..b5166c98 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -248,7 +248,7 @@ // Size of LSU Request Queue `ifndef LSUQ_SIZE -`define LSUQ_SIZE 8 +`define LSUQ_SIZE (`NUM_WARPS * `NUM_THREADS) `endif // Size of FPU Request Queue diff --git a/hw/rtl/VX_gpr_ram_f.v b/hw/rtl/VX_gpr_ram_f.v index 3b800993..68c2a69f 100644 --- a/hw/rtl/VX_gpr_ram_f.v +++ b/hw/rtl/VX_gpr_ram_f.v @@ -20,6 +20,8 @@ module VX_gpr_ram_f #( ); reg [DATAW-1:0] mem [DEPTH-1:0]; + initial mem = '{default: 0}; + always @(posedge clk) begin if (wren) begin mem [waddr] <= wdata; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 39e81b2a..ce4783e1 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -16,36 +16,14 @@ module VX_gpr_stage #( `UNUSED_VAR (reset) `ifdef EXT_F_ENABLE - localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2); - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f; + localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3; - - wire waddr_is_fp = writeback_if.rd[`NR_BITS-1]; - wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1]; - wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1]; - wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1]; - `UNUSED_VAR (raddr3_is_fp) - assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; - assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - VX_gpr_ram_i #( - .DATAW (32), - .DEPTH (RAM_DEPTH) - ) gpr_ram_i ( - .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp), - .waddr (waddr), - .wdata (writeback_if.data[i]), - .raddr1 (raddr1), - .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) - ); - end + assign waddr = {writeback_if.wid, writeback_if.rd}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; for (genvar i = 0; i < `NUM_THREADS; i++) begin VX_gpr_ram_f #( @@ -53,29 +31,29 @@ module VX_gpr_stage #( .DEPTH (RAM_DEPTH) ) gpr_ram_f ( .clk (clk), - .wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp), + .wren (writeback_if.valid && writeback_if.tmask[i]), .waddr (waddr), .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), .raddr3 (raddr3), - .rdata1 (rdata1_f[i]), - .rdata2 (rdata2_f[i]), - .rdata3 (rdata3_f[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]), + .rdata3 (rdata3[i]) ); end - assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i; - assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i; - assign gpr_rsp_if.rs3_data = rdata3_f; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; + assign gpr_rsp_if.rs3_data = rdata3; `else localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; - wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i; + wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2; assign waddr = {writeback_if.wid, writeback_if.rd}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; `UNUSED_VAR (gpr_req_if.rs3) for (genvar i = 0; i < `NUM_THREADS; i++) begin @@ -89,13 +67,13 @@ module VX_gpr_stage #( .wdata (writeback_if.data[i]), .raddr1 (raddr1), .raddr2 (raddr2), - .rdata1 (rdata1_i[i]), - .rdata2 (rdata2_i[i]) + .rdata1 (rdata1[i]), + .rdata2 (rdata2[i]) ); end - assign gpr_rsp_if.rs1_data = rdata1_i; - assign gpr_rsp_if.rs2_data = rdata2_i; + assign gpr_rsp_if.rs1_data = rdata1; + assign gpr_rsp_if.rs2_data = rdata2; assign gpr_rsp_if.rs3_data = 0; `endif diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 4660ef3d..ad6115d2 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -85,6 +85,9 @@ module VX_bank #( input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, output wire dram_rsp_ready ); + + localparam MSHR_SIZE_BITS = $clog2(MSHR_SIZE+1); + `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; @@ -172,8 +175,8 @@ module VX_bank #( ); wire mshr_pop; - reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; - wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n; + reg [MSHR_SIZE_BITS-1:0] mshr_pending_size; + wire [MSHR_SIZE_BITS-1:0] mshr_pending_size_n; reg mshr_going_full; wire mshr_valid_st0; @@ -264,7 +267,7 @@ module VX_bank #( wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); - wire creq_commit = valid_st2 + wire creq_commit = valid_st2 && !is_fill_st2 && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) && !pipeline_stall; @@ -287,7 +290,7 @@ module VX_bank #( mshr_going_full <= 0; end else begin mshr_pending_size <= mshr_pending_size_n; - mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE); + mshr_going_full <= (mshr_pending_size_n == MSHR_SIZE_BITS'(MSHR_SIZE)); end end @@ -298,13 +301,10 @@ module VX_bank #( assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : drsq_pop_unqual ? drsq_addr_st0 : - creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : - 0; + creq_addr_st0[`LINE_SELECT_ADDR_RNG]; if (`WORD_SELECT_WIDTH != 0) begin - assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : - mshr_pop_unqual ? mshr_wsel_st0 : - 0; + assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : mshr_wsel_st0; end else begin `UNUSED_VAR (mshr_wsel_st0) assign wsel_st0 = 0; @@ -312,25 +312,15 @@ module VX_bank #( assign writedata_st0 = drsq_filldata_st0; - assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : - creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : - 0; + assign tag_st0 = mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag_st0) : `REQ_TAG_WIDTH'(creq_tag_st0); - assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : - creq_pop_unqual ? creq_rw_st0 : - 0; + assign mem_rw_st0 = mshr_pop_unqual ? mshr_rw_st0 : creq_rw_st0; - assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : - creq_pop_unqual ? creq_byteen_st0 : - 0; + assign byteen_st0 = mshr_pop_unqual ? mshr_byteen_st0 : creq_byteen_st0; - assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : - creq_pop_unqual ? creq_tid_st0 : - 0; + assign req_tid_st0 = mshr_pop_unqual ? mshr_tid_st0 : creq_tid_st0; - assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : - creq_pop_unqual ? creq_writeword_st0 : - 0; + assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : creq_writeword_st0; `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -426,13 +416,13 @@ if (DRAM_ENABLE) begin || (is_mshr_st1 && addr_st1 != addr_st2)) && !incoming_fill_st1; - assign do_writeback_st1 = (WRITE_THROUGH && mem_rw_st1) - || (!WRITE_THROUGH && dirty_st1 && is_fill_st1); + assign do_writeback_st1 = (WRITE_THROUGH && !is_fill_st1 && mem_rw_st1) + || (!WRITE_THROUGH && is_fill_st1 && dirty_st1); assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1; assign mshr_push_st1 = (miss_st1 || force_miss_st1) - && !(WRITE_THROUGH && mem_rw_st1); + && !(WRITE_THROUGH && !is_fill_st1 && mem_rw_st1); assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; @@ -591,6 +581,9 @@ end // or the fill request is comming for this block wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2); + // use dram rsp or core req address to lookup the mshr + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = drsq_pop_unqual ? drsq_addr_st0 : creq_addr_st0[`LINE_SELECT_ADDR_RNG]; + VX_miss_resrv #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -622,7 +615,7 @@ end // lookup .lookup_ready (update_ready_st0), - .lookup_addr (addr_st0), + .lookup_addr (lookup_addr), .lookup_match (mshr_pending_hazard_unqual_st0), // schedule diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 1c07d14d..c35444ab 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -53,7 +53,7 @@ module VX_miss_resrv #( // dequeue input wire dequeue ); - `USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; + `USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table;