GPRs optimization - disabling BRAM's read-during-write bypass block.
This commit is contained in:
@@ -17,71 +17,100 @@ module VX_gpr_stage #(
|
|||||||
`UNUSED_PARAM (CORE_ID)
|
`UNUSED_PARAM (CORE_ID)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
|
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||||
|
|
||||||
// ensure r0 never gets written, which can happen before the reset
|
// ensure r0 never gets written, which can happen before the reset
|
||||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
wire [(`NUM_THREADS * 4)-1:0] wren;
|
||||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
|
||||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2, raddr3;
|
end
|
||||||
|
|
||||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
reg [`NUM_THREADS-1:0][31:0] last_wdata;
|
||||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
reg [$clog2(RAM_SIZE)-1:0] last_waddr;
|
||||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
reg [`NUM_THREADS-1:0] last_wmask;
|
||||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
|
||||||
|
always @(posedge clk) begin
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
last_wdata <= writeback_if.data;
|
||||||
VX_dp_ram #(
|
last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
|
||||||
.RD_PORTS (3),
|
last_waddr <= waddr;
|
||||||
.DATAW (32),
|
|
||||||
.SIZE (RAM_SIZE),
|
|
||||||
.INIT_ENABLE (1),
|
|
||||||
.INIT_VALUE (0)
|
|
||||||
) dp_ram (
|
|
||||||
.clk (clk),
|
|
||||||
.wren (write_enable && writeback_if.tmask[i]),
|
|
||||||
.waddr (waddr),
|
|
||||||
.wdata (writeback_if.data[i]),
|
|
||||||
.rden (3'b111),
|
|
||||||
.raddr ({raddr3, raddr2, raddr1}),
|
|
||||||
.rdata ({rdata3[i], rdata2[i], rdata1[i]})
|
|
||||||
);
|
|
||||||
end
|
end
|
||||||
|
|
||||||
assign gpr_rsp_if.rs1_data = rdata1;
|
|
||||||
assign gpr_rsp_if.rs2_data = rdata2;
|
|
||||||
assign gpr_rsp_if.rs3_data = rdata3;
|
|
||||||
`else
|
|
||||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
|
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
|
||||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
||||||
|
|
||||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||||
`UNUSED_VAR (gpr_req_if.rs3)
|
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
VX_dp_ram #(
|
VX_dp_ram #(
|
||||||
.RD_PORTS (2),
|
.DATAW (32 * `NUM_THREADS),
|
||||||
.DATAW (32),
|
|
||||||
.SIZE (RAM_SIZE),
|
.SIZE (RAM_SIZE),
|
||||||
|
.BYTEENW (`NUM_THREADS * 4),
|
||||||
.INIT_ENABLE (1),
|
.INIT_ENABLE (1),
|
||||||
.INIT_VALUE (0)
|
.INIT_VALUE (0),
|
||||||
) dp_ram (
|
.NO_RWCHECK (1)
|
||||||
|
) dp_ram1 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.wren (write_enable && writeback_if.tmask[i]),
|
.wren (wren),
|
||||||
.waddr (waddr),
|
.waddr (waddr),
|
||||||
.wdata (writeback_if.data[i]),
|
.wdata (writeback_if.data),
|
||||||
.rden (2'b11),
|
.rden (1'b1),
|
||||||
.raddr ({raddr2, raddr1}),
|
.raddr (raddr1),
|
||||||
.rdata ({rdata2[i], rdata1[i]})
|
.rdata (rdata1)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW (32 * `NUM_THREADS),
|
||||||
|
.SIZE (RAM_SIZE),
|
||||||
|
.BYTEENW (`NUM_THREADS * 4),
|
||||||
|
.INIT_ENABLE (1),
|
||||||
|
.INIT_VALUE (0),
|
||||||
|
.NO_RWCHECK (1)
|
||||||
|
) dp_ram2 (
|
||||||
|
.clk (clk),
|
||||||
|
.wren (wren),
|
||||||
|
.waddr (waddr),
|
||||||
|
.wdata (writeback_if.data),
|
||||||
|
.rden (1'b1),
|
||||||
|
.raddr (raddr2),
|
||||||
|
.rdata (rdata2)
|
||||||
|
);
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||||
|
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
|
||||||
|
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
|
||||||
end
|
end
|
||||||
|
|
||||||
assign gpr_rsp_if.rs1_data = rdata1;
|
`ifdef EXT_F_ENABLE
|
||||||
assign gpr_rsp_if.rs2_data = rdata2;
|
wire [`NUM_THREADS-1:0][31:0] rdata3;
|
||||||
assign gpr_rsp_if.rs3_data = 0;
|
wire [$clog2(RAM_SIZE)-1:0] raddr3;
|
||||||
|
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||||
|
|
||||||
|
VX_dp_ram #(
|
||||||
|
.DATAW (32 * `NUM_THREADS),
|
||||||
|
.SIZE (RAM_SIZE),
|
||||||
|
.BYTEENW (`NUM_THREADS * 4),
|
||||||
|
.INIT_ENABLE (1),
|
||||||
|
.INIT_VALUE (0),
|
||||||
|
.NO_RWCHECK (1)
|
||||||
|
) dp_ram3 (
|
||||||
|
.clk (clk),
|
||||||
|
.wren (wren),
|
||||||
|
.waddr (waddr),
|
||||||
|
.wdata (writeback_if.data),
|
||||||
|
.rden (1'b1),
|
||||||
|
.raddr (raddr3),
|
||||||
|
.rdata (rdata3)
|
||||||
|
);
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
|
||||||
|
end
|
||||||
|
`else
|
||||||
|
`UNUSED_VAR (gpr_req_if.rs3)
|
||||||
|
assign gpr_rsp_if.rs3_data = 'x;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign writeback_if.ready = 1'b1;
|
assign writeback_if.ready = 1'b1;
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
`TRACING_OFF
|
`TRACING_OFF
|
||||||
module VX_dp_ram #(
|
module VX_dp_ram #(
|
||||||
parameter RD_PORTS = 1,
|
|
||||||
parameter DATAW = 1,
|
parameter DATAW = 1,
|
||||||
parameter SIZE = 1,
|
parameter SIZE = 1,
|
||||||
parameter BYTEENW = 1,
|
parameter BYTEENW = 1,
|
||||||
@@ -18,14 +17,12 @@ module VX_dp_ram #(
|
|||||||
input wire [BYTEENW-1:0] wren,
|
input wire [BYTEENW-1:0] wren,
|
||||||
input wire [ADDRW-1:0] waddr,
|
input wire [ADDRW-1:0] waddr,
|
||||||
input wire [DATAW-1:0] wdata,
|
input wire [DATAW-1:0] wdata,
|
||||||
input wire [RD_PORTS-1:0] rden,
|
input wire rden,
|
||||||
input wire [RD_PORTS-1:0][ADDRW-1:0] raddr,
|
input wire [ADDRW-1:0] raddr,
|
||||||
output wire [RD_PORTS-1:0][DATAW-1:0] rdata
|
output wire [DATAW-1:0] rdata
|
||||||
);
|
);
|
||||||
|
|
||||||
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
|
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
|
||||||
`STATIC_ASSERT(!LUTRAM || (RD_PORTS == 1), ("multi-porting not supported on LUTRAM"))
|
|
||||||
|
|
||||||
|
|
||||||
`define RAM_INITIALIZATION \
|
`define RAM_INITIALIZATION \
|
||||||
if (INIT_ENABLE) begin \
|
if (INIT_ENABLE) begin \
|
||||||
@@ -94,7 +91,7 @@ module VX_dp_ram #(
|
|||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (OUTPUT_REG) begin
|
if (OUTPUT_REG) begin
|
||||||
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
|
reg [DATAW-1:0] rdata_r;
|
||||||
|
|
||||||
if (BYTEENW > 1) begin
|
if (BYTEENW > 1) begin
|
||||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||||
@@ -106,10 +103,8 @@ module VX_dp_ram #(
|
|||||||
if (wren[i])
|
if (wren[i])
|
||||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||||
end
|
end
|
||||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
if (rden)
|
||||||
if (rden[i])
|
rdata_r <= ram[raddr];
|
||||||
rdata_r[i] <= ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||||
@@ -119,10 +114,8 @@ module VX_dp_ram #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (wren)
|
if (wren)
|
||||||
ram[waddr] <= wdata;
|
ram[waddr] <= wdata;
|
||||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
if (rden)
|
||||||
if (rden[i])
|
rdata_r <= ram[raddr];
|
||||||
rdata_r[i] <= ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
assign rdata = rdata_r;
|
||||||
@@ -140,9 +133,7 @@ module VX_dp_ram #(
|
|||||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||||
|
|
||||||
@@ -152,9 +143,7 @@ module VX_dp_ram #(
|
|||||||
if (wren)
|
if (wren)
|
||||||
ram[waddr] <= wdata;
|
ram[waddr] <= wdata;
|
||||||
end
|
end
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (BYTEENW > 1) begin
|
if (BYTEENW > 1) begin
|
||||||
@@ -168,9 +157,7 @@ module VX_dp_ram #(
|
|||||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||||
|
|
||||||
@@ -180,16 +167,14 @@ module VX_dp_ram #(
|
|||||||
if (wren)
|
if (wren)
|
||||||
ram[waddr] <= wdata;
|
ram[waddr] <= wdata;
|
||||||
end
|
end
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`else
|
`else
|
||||||
if (OUTPUT_REG) begin
|
if (OUTPUT_REG) begin
|
||||||
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
|
reg [DATAW-1:0] rdata_r;
|
||||||
if (BYTEENW > 1) begin
|
if (BYTEENW > 1) begin
|
||||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||||
|
|
||||||
@@ -200,10 +185,8 @@ module VX_dp_ram #(
|
|||||||
if (wren[i])
|
if (wren[i])
|
||||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||||
end
|
end
|
||||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
if (rden)
|
||||||
if (rden[i])
|
rdata_r <= ram[raddr];
|
||||||
rdata_r[i] <= ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||||
@@ -213,10 +196,8 @@ module VX_dp_ram #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (wren)
|
if (wren)
|
||||||
ram[waddr] <= wdata;
|
ram[waddr] <= wdata;
|
||||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
if (rden)
|
||||||
if (rden[i])
|
rdata_r <= ram[raddr];
|
||||||
rdata_r[i] <= ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign rdata = rdata_r;
|
assign rdata = rdata_r;
|
||||||
@@ -244,13 +225,9 @@ module VX_dp_ram #(
|
|||||||
`UNUSED_VAR (prev_write)
|
`UNUSED_VAR (prev_write)
|
||||||
`UNUSED_VAR (prev_data)
|
`UNUSED_VAR (prev_data)
|
||||||
`UNUSED_VAR (prev_waddr)
|
`UNUSED_VAR (prev_waddr)
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||||
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||||
@@ -271,13 +248,9 @@ module VX_dp_ram #(
|
|||||||
`UNUSED_VAR (prev_write)
|
`UNUSED_VAR (prev_write)
|
||||||
`UNUSED_VAR (prev_data)
|
`UNUSED_VAR (prev_data)
|
||||||
`UNUSED_VAR (prev_waddr)
|
`UNUSED_VAR (prev_waddr)
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = ram[raddr];
|
||||||
assign rdata[i] = ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||||
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user