From f3ba27b138a154fb68525bfa3339007f9bd46f0b Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sat, 28 Aug 2021 15:34:36 -0700 Subject: [PATCH] GPRs optimization - disabling BRAM's read-during-write bypass block. --- hw/rtl/VX_gpr_stage.v | 135 ++++++++++++++++++++++++---------------- hw/rtl/libs/VX_dp_ram.v | 79 ++++++++--------------- 2 files changed, 108 insertions(+), 106 deletions(-) diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 56f484bc..17bc317c 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -17,71 +17,100 @@ module VX_gpr_stage #( `UNUSED_PARAM (CORE_ID) `UNUSED_VAR (reset) + localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS; + // ensure r0 never gets written, which can happen before the reset wire write_enable = writeback_if.valid && (writeback_if.rd != 0); -`ifdef EXT_F_ENABLE - localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS; - wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; - wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2, raddr3; - - assign waddr = {writeback_if.wid, writeback_if.rd}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; - assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; + wire [(`NUM_THREADS * 4)-1:0] wren; + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}}; + end - for (genvar i = 0; i < `NUM_THREADS; i++) begin - VX_dp_ram #( - .RD_PORTS (3), - .DATAW (32), - .SIZE (RAM_SIZE), - .INIT_ENABLE (1), - .INIT_VALUE (0) - ) dp_ram ( - .clk (clk), - .wren (write_enable && writeback_if.tmask[i]), - .waddr (waddr), - .wdata (writeback_if.data[i]), - .rden (3'b111), - .raddr ({raddr3, raddr2, raddr1}), - .rdata ({rdata3[i], rdata2[i], rdata1[i]}) - ); + reg [`NUM_THREADS-1:0][31:0] last_wdata; + reg [$clog2(RAM_SIZE)-1:0] last_waddr; + reg [`NUM_THREADS-1:0] last_wmask; + + always @(posedge clk) begin + last_wdata <= writeback_if.data; + last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask; + last_waddr <= waddr; end - assign gpr_rsp_if.rs1_data = rdata1; - assign gpr_rsp_if.rs2_data = rdata2; - assign gpr_rsp_if.rs3_data = rdata3; -`else - localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS; wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2; - + assign waddr = {writeback_if.wid, writeback_if.rd}; - assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; - `UNUSED_VAR (gpr_req_if.rs3) + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; - for (genvar i = 0; i < `NUM_THREADS; i++) begin - VX_dp_ram #( - .RD_PORTS (2), - .DATAW (32), - .SIZE (RAM_SIZE), - .INIT_ENABLE (1), - .INIT_VALUE (0) - ) dp_ram ( - .clk (clk), - .wren (write_enable && writeback_if.tmask[i]), - .waddr (waddr), - .wdata (writeback_if.data[i]), - .rden (2'b11), - .raddr ({raddr2, raddr1}), - .rdata ({rdata2[i], rdata1[i]}) - ); + VX_dp_ram #( + .DATAW (32 * `NUM_THREADS), + .SIZE (RAM_SIZE), + .BYTEENW (`NUM_THREADS * 4), + .INIT_ENABLE (1), + .INIT_VALUE (0), + .NO_RWCHECK (1) + ) dp_ram1 ( + .clk (clk), + .wren (wren), + .waddr (waddr), + .wdata (writeback_if.data), + .rden (1'b1), + .raddr (raddr1), + .rdata (rdata1) + ); + + VX_dp_ram #( + .DATAW (32 * `NUM_THREADS), + .SIZE (RAM_SIZE), + .BYTEENW (`NUM_THREADS * 4), + .INIT_ENABLE (1), + .INIT_VALUE (0), + .NO_RWCHECK (1) + ) dp_ram2 ( + .clk (clk), + .wren (wren), + .waddr (waddr), + .wdata (writeback_if.data), + .rden (1'b1), + .raddr (raddr2), + .rdata (rdata2) + ); + + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i]; + assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i]; end + +`ifdef EXT_F_ENABLE + wire [`NUM_THREADS-1:0][31:0] rdata3; + wire [$clog2(RAM_SIZE)-1:0] raddr3; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; - assign gpr_rsp_if.rs1_data = rdata1; - assign gpr_rsp_if.rs2_data = rdata2; - assign gpr_rsp_if.rs3_data = 0; + VX_dp_ram #( + .DATAW (32 * `NUM_THREADS), + .SIZE (RAM_SIZE), + .BYTEENW (`NUM_THREADS * 4), + .INIT_ENABLE (1), + .INIT_VALUE (0), + .NO_RWCHECK (1) + ) dp_ram3 ( + .clk (clk), + .wren (wren), + .waddr (waddr), + .wdata (writeback_if.data), + .rden (1'b1), + .raddr (raddr3), + .rdata (rdata3) + ); + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i]; + end +`else + `UNUSED_VAR (gpr_req_if.rs3) + assign gpr_rsp_if.rs3_data = 'x; `endif assign writeback_if.ready = 1'b1; diff --git a/hw/rtl/libs/VX_dp_ram.v b/hw/rtl/libs/VX_dp_ram.v index e1d6defa..db8e99b8 100644 --- a/hw/rtl/libs/VX_dp_ram.v +++ b/hw/rtl/libs/VX_dp_ram.v @@ -2,7 +2,6 @@ `TRACING_OFF module VX_dp_ram #( - parameter RD_PORTS = 1, parameter DATAW = 1, parameter SIZE = 1, parameter BYTEENW = 1, @@ -14,18 +13,16 @@ module VX_dp_ram #( parameter INIT_FILE = "", parameter [DATAW-1:0] INIT_VALUE = 0 ) ( - input wire clk, - input wire [BYTEENW-1:0] wren, - input wire [ADDRW-1:0] waddr, - input wire [DATAW-1:0] wdata, - input wire [RD_PORTS-1:0] rden, - input wire [RD_PORTS-1:0][ADDRW-1:0] raddr, - output wire [RD_PORTS-1:0][DATAW-1:0] rdata + input wire clk, + input wire [BYTEENW-1:0] wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire rden, + input wire [ADDRW-1:0] raddr, + output wire [DATAW-1:0] rdata ); `STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter")) - `STATIC_ASSERT(!LUTRAM || (RD_PORTS == 1), ("multi-porting not supported on LUTRAM")) - `define RAM_INITIALIZATION \ if (INIT_ENABLE) begin \ @@ -94,7 +91,7 @@ module VX_dp_ram #( end end else begin if (OUTPUT_REG) begin - reg [RD_PORTS-1:0][DATAW-1:0] rdata_r; + reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; @@ -106,10 +103,8 @@ module VX_dp_ram #( if (wren[i]) ram[waddr][i] <= wdata[i * 8 +: 8]; end - for (integer i = 0; i < RD_PORTS; ++i) begin - if (rden[i]) - rdata_r[i] <= ram[raddr[i]]; - end + if (rden) + rdata_r <= ram[raddr]; end end else begin reg [DATAW-1:0] ram [SIZE-1:0]; @@ -119,10 +114,8 @@ module VX_dp_ram #( always @(posedge clk) begin if (wren) ram[waddr] <= wdata; - for (integer i = 0; i < RD_PORTS; ++i) begin - if (rden[i]) - rdata_r[i] <= ram[raddr[i]]; - end + if (rden) + rdata_r <= ram[raddr]; end end assign rdata = rdata_r; @@ -140,9 +133,7 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * 8 +: 8]; end end - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end else begin `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; @@ -152,9 +143,7 @@ module VX_dp_ram #( if (wren) ram[waddr] <= wdata; end - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end end else begin if (BYTEENW > 1) begin @@ -168,9 +157,7 @@ module VX_dp_ram #( ram[waddr][i] <= wdata[i * 8 +: 8]; end end - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end else begin reg [DATAW-1:0] ram [SIZE-1:0]; @@ -180,16 +167,14 @@ module VX_dp_ram #( if (wren) ram[waddr] <= wdata; end - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end end end end `else if (OUTPUT_REG) begin - reg [RD_PORTS-1:0][DATAW-1:0] rdata_r; + reg [DATAW-1:0] rdata_r; if (BYTEENW > 1) begin reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; @@ -200,10 +185,8 @@ module VX_dp_ram #( if (wren[i]) ram[waddr][i] <= wdata[i * 8 +: 8]; end - for (integer i = 0; i < RD_PORTS; ++i) begin - if (rden[i]) - rdata_r[i] <= ram[raddr[i]]; - end + if (rden) + rdata_r <= ram[raddr]; end end else begin reg [DATAW-1:0] ram [SIZE-1:0]; @@ -213,10 +196,8 @@ module VX_dp_ram #( always @(posedge clk) begin if (wren) ram[waddr] <= wdata; - for (integer i = 0; i < RD_PORTS; ++i) begin - if (rden[i]) - rdata_r[i] <= ram[raddr[i]]; - end + if (rden) + rdata_r <= ram[raddr]; end end assign rdata = rdata_r; @@ -244,13 +225,9 @@ module VX_dp_ram #( `UNUSED_VAR (prev_write) `UNUSED_VAR (prev_data) `UNUSED_VAR (prev_waddr) - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end else begin - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]]; - end + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; end end else begin reg [DATAW-1:0] ram [SIZE-1:0]; @@ -271,17 +248,13 @@ module VX_dp_ram #( `UNUSED_VAR (prev_write) `UNUSED_VAR (prev_data) `UNUSED_VAR (prev_waddr) - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = ram[raddr[i]]; - end + assign rdata = ram[raddr]; end else begin - for (genvar i = 0; i < RD_PORTS; ++i) begin - assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]]; - end + assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr]; end end end -`endif +`endif endmodule `TRACING_ON \ No newline at end of file