GPRs optimization - disabling BRAM's read-during-write bypass block.

This commit is contained in:
Blaise Tine
2021-08-28 15:34:36 -07:00
parent 12b8b4af24
commit f3ba27b138
2 changed files with 108 additions and 106 deletions

View File

@@ -17,71 +17,100 @@ module VX_gpr_stage #(
`UNUSED_PARAM (CORE_ID) `UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
// ensure r0 never gets written, which can happen before the reset // ensure r0 never gets written, which can happen before the reset
wire write_enable = writeback_if.valid && (writeback_if.rd != 0); wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
`ifdef EXT_F_ENABLE wire [(`NUM_THREADS * 4)-1:0] wren;
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS; for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2, raddr3; end
assign waddr = {writeback_if.wid, writeback_if.rd}; reg [`NUM_THREADS-1:0][31:0] last_wdata;
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; reg [$clog2(RAM_SIZE)-1:0] last_waddr;
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; reg [`NUM_THREADS-1:0] last_wmask;
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
always @(posedge clk) begin
for (genvar i = 0; i < `NUM_THREADS; i++) begin last_wdata <= writeback_if.data;
VX_dp_ram #( last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
.RD_PORTS (3), last_waddr <= waddr;
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram (
.clk (clk),
.wren (write_enable && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.rden (3'b111),
.raddr ({raddr3, raddr2, raddr1}),
.rdata ({rdata3[i], rdata2[i], rdata1[i]})
);
end end
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = rdata3;
`else
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2; wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
assign waddr = {writeback_if.wid, writeback_if.rd}; assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
`UNUSED_VAR (gpr_req_if.rs3)
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_dp_ram #( VX_dp_ram #(
.RD_PORTS (2), .DATAW (32 * `NUM_THREADS),
.DATAW (32),
.SIZE (RAM_SIZE), .SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1), .INIT_ENABLE (1),
.INIT_VALUE (0) .INIT_VALUE (0),
) dp_ram ( .NO_RWCHECK (1)
) dp_ram1 (
.clk (clk), .clk (clk),
.wren (write_enable && writeback_if.tmask[i]), .wren (wren),
.waddr (waddr), .waddr (waddr),
.wdata (writeback_if.data[i]), .wdata (writeback_if.data),
.rden (2'b11), .rden (1'b1),
.raddr ({raddr2, raddr1}), .raddr (raddr1),
.rdata ({rdata2[i], rdata1[i]}) .rdata (rdata1)
); );
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram2 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr2),
.rdata (rdata2)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
end end
assign gpr_rsp_if.rs1_data = rdata1; `ifdef EXT_F_ENABLE
assign gpr_rsp_if.rs2_data = rdata2; wire [`NUM_THREADS-1:0][31:0] rdata3;
assign gpr_rsp_if.rs3_data = 0; wire [$clog2(RAM_SIZE)-1:0] raddr3;
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram3 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr3),
.rdata (rdata3)
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
end
`else
`UNUSED_VAR (gpr_req_if.rs3)
assign gpr_rsp_if.rs3_data = 'x;
`endif `endif
assign writeback_if.ready = 1'b1; assign writeback_if.ready = 1'b1;

View File

@@ -2,7 +2,6 @@
`TRACING_OFF `TRACING_OFF
module VX_dp_ram #( module VX_dp_ram #(
parameter RD_PORTS = 1,
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter BYTEENW = 1, parameter BYTEENW = 1,
@@ -18,14 +17,12 @@ module VX_dp_ram #(
input wire [BYTEENW-1:0] wren, input wire [BYTEENW-1:0] wren,
input wire [ADDRW-1:0] waddr, input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata, input wire [DATAW-1:0] wdata,
input wire [RD_PORTS-1:0] rden, input wire rden,
input wire [RD_PORTS-1:0][ADDRW-1:0] raddr, input wire [ADDRW-1:0] raddr,
output wire [RD_PORTS-1:0][DATAW-1:0] rdata output wire [DATAW-1:0] rdata
); );
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter")) `STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
`STATIC_ASSERT(!LUTRAM || (RD_PORTS == 1), ("multi-porting not supported on LUTRAM"))
`define RAM_INITIALIZATION \ `define RAM_INITIALIZATION \
if (INIT_ENABLE) begin \ if (INIT_ENABLE) begin \
@@ -94,7 +91,7 @@ module VX_dp_ram #(
end end
end else begin end else begin
if (OUTPUT_REG) begin if (OUTPUT_REG) begin
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
@@ -106,10 +103,8 @@ module VX_dp_ram #(
if (wren[i]) if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8]; ram[waddr][i] <= wdata[i * 8 +: 8];
end end
for (integer i = 0; i < RD_PORTS; ++i) begin if (rden)
if (rden[i]) rdata_r <= ram[raddr];
rdata_r[i] <= ram[raddr[i]];
end
end end
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
@@ -119,10 +114,8 @@ module VX_dp_ram #(
always @(posedge clk) begin always @(posedge clk) begin
if (wren) if (wren)
ram[waddr] <= wdata; ram[waddr] <= wdata;
for (integer i = 0; i < RD_PORTS; ++i) begin if (rden)
if (rden[i]) rdata_r <= ram[raddr];
rdata_r[i] <= ram[raddr[i]];
end
end end
end end
assign rdata = rdata_r; assign rdata = rdata_r;
@@ -140,9 +133,7 @@ module VX_dp_ram #(
ram[waddr][i] <= wdata[i * 8 +: 8]; ram[waddr][i] <= wdata[i * 8 +: 8];
end end
end end
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end else begin end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0]; `NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
@@ -152,9 +143,7 @@ module VX_dp_ram #(
if (wren) if (wren)
ram[waddr] <= wdata; ram[waddr] <= wdata;
end end
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end end
end else begin end else begin
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
@@ -168,9 +157,7 @@ module VX_dp_ram #(
ram[waddr][i] <= wdata[i * 8 +: 8]; ram[waddr][i] <= wdata[i * 8 +: 8];
end end
end end
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
@@ -180,16 +167,14 @@ module VX_dp_ram #(
if (wren) if (wren)
ram[waddr] <= wdata; ram[waddr] <= wdata;
end end
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end end
end end
end end
end end
`else `else
if (OUTPUT_REG) begin if (OUTPUT_REG) begin
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r; reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0]; reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
@@ -200,10 +185,8 @@ module VX_dp_ram #(
if (wren[i]) if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8]; ram[waddr][i] <= wdata[i * 8 +: 8];
end end
for (integer i = 0; i < RD_PORTS; ++i) begin if (rden)
if (rden[i]) rdata_r <= ram[raddr];
rdata_r[i] <= ram[raddr[i]];
end
end end
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
@@ -213,10 +196,8 @@ module VX_dp_ram #(
always @(posedge clk) begin always @(posedge clk) begin
if (wren) if (wren)
ram[waddr] <= wdata; ram[waddr] <= wdata;
for (integer i = 0; i < RD_PORTS; ++i) begin if (rden)
if (rden[i]) rdata_r <= ram[raddr];
rdata_r[i] <= ram[raddr[i]];
end
end end
end end
assign rdata = rdata_r; assign rdata = rdata_r;
@@ -244,13 +225,9 @@ module VX_dp_ram #(
`UNUSED_VAR (prev_write) `UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data) `UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr) `UNUSED_VAR (prev_waddr)
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end else begin end else begin
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
end
end end
end else begin end else begin
reg [DATAW-1:0] ram [SIZE-1:0]; reg [DATAW-1:0] ram [SIZE-1:0];
@@ -271,13 +248,9 @@ module VX_dp_ram #(
`UNUSED_VAR (prev_write) `UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data) `UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr) `UNUSED_VAR (prev_waddr)
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = ram[raddr];
assign rdata[i] = ram[raddr[i]];
end
end else begin end else begin
for (genvar i = 0; i < RD_PORTS; ++i) begin assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
end
end end
end end
end end