From 29cd2f5dffad583751853087a4e34dba72b0c1ba Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 10 Dec 2020 00:27:56 -0800 Subject: [PATCH] fixed register file initialization to zero synthesis inference --- driver/opae/Makefile | 2 +- driver/opae/vx_scope.h | 4 ++ hw/opae/README | 2 +- hw/opae/sources_1c.txt | 28 +++++------ hw/rtl/VX_csr_unit.v | 2 +- hw/rtl/VX_gpr_ram.v | 83 ------------------------------- hw/rtl/VX_gpr_ram_f.v | 35 +++++++++++++ hw/rtl/VX_gpr_ram_i.v | 34 +++++++++++++ hw/rtl/VX_gpr_stage.v | 109 ++++++++++++++++++++++++++++++----------- hw/rtl/VX_types.vh | 2 +- 10 files changed, 171 insertions(+), 130 deletions(-) delete mode 100644 hw/rtl/VX_gpr_ram.v create mode 100644 hw/rtl/VX_gpr_ram_f.v create mode 100644 hw/rtl/VX_gpr_ram_i.v diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 0756c6ea..a00a3853 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -85,7 +85,7 @@ vlsim-hw: $(SCOPE_H) fpga: $(SRCS) $(SCOPE_H) $(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) -asesim: $(SRCS) $(ASE_DIR) +asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE) vlsim: $(SRCS) vlsim-hw diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index 2bb09c4a..a3e13455 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -1,6 +1,10 @@ #pragma once +#if defined(USE_FPGA) #define HANG_TIMEOUT 60 +#else +#define HANG_TIMEOUT (30*60) +#endif int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); diff --git a/hw/opae/README b/hw/opae/README index eac34ad6..da93a61e 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -61,7 +61,7 @@ make ase # tests ./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16 -./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16 +./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n16 ./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16 ./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 4671c87a..ad731859 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -4,21 +4,21 @@ +define+QUARTUS +define+FPU_FAST #+define+SCOPE -#+define+PERF_ENABLE ++define+PERF_ENABLE -#+define+DBG_PRINT_CORE_ICACHE -#+define+DBG_PRINT_CORE_DCACHE -#+define+DBG_PRINT_CACHE_BANK -#+define+DBG_PRINT_CACHE_SNP -#+define+DBG_PRINT_CACHE_MSRQ -#+define+DBG_PRINT_CACHE_TAG -#+define+DBG_PRINT_CACHE_DATA -#+define+DBG_PRINT_DRAM -#+define+DBG_PRINT_PIPELINE -#+define+DBG_PRINT_OPAE -#+define+DBG_PRINT_AVS -#+define+DBG_PRINT_SCOPE -#+define+DBG_CACHE_REQ_INFO ++define+DBG_PRINT_CORE_ICACHE ++define+DBG_PRINT_CORE_DCACHE ++define+DBG_PRINT_CACHE_BANK ++define+DBG_PRINT_CACHE_SNP ++define+DBG_PRINT_CACHE_MSRQ ++define+DBG_PRINT_CACHE_TAG ++define+DBG_PRINT_CACHE_DATA ++define+DBG_PRINT_DRAM ++define+DBG_PRINT_PIPELINE ++define+DBG_PRINT_OPAE ++define+DBG_PRINT_AVS ++define+DBG_PRINT_SCOPE ++define+DBG_CACHE_REQ_INFO vortex_afu.json QI:vortex_afu.qsf diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index 111a45d9..3b007046 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -102,7 +102,7 @@ module VX_csr_unit #( endcase end - wire stall_in = fpu_pending[csr_pipe_req_if.wid]; + wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid]; wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in; diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v deleted file mode 100644 index 3c25de66..00000000 --- a/hw/rtl/VX_gpr_ram.v +++ /dev/null @@ -1,83 +0,0 @@ -`include "VX_define.vh" - -`TRACING_OFF - -module VX_gpr_ram ( - input wire clk, - input wire wren, - input wire [`NUM_THREADS-1:0] tmask, - input wire [`NW_BITS+`NR_BITS-1:0] waddr, - input wire [`NUM_THREADS-1:0][31:0] wdata, - input wire [`NW_BITS+`NR_BITS-1:0] raddr1, - input wire [`NW_BITS+`NR_BITS-1:0] raddr2, - input wire [`NW_BITS+`NR_BITS-1:0] raddr3, - output wire [`NUM_THREADS-1:0][31:0] rdata1, - output wire [`NUM_THREADS-1:0][31:0] rdata2, - output wire [`NUM_THREADS-1:0][31:0] rdata3 -); - localparam RAM_DATAW = `NUM_THREADS * 32; - localparam RAM_ADDRW = `NW_BITS + `NR_BITS; - localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; - localparam RAM_BYTEEN = `NUM_THREADS * 4; - - `UNUSED_VAR (raddr3) - -`ifdef EXT_F_ENABLE - - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - - reg [31:0] mem_i [(RAM_DEPTH/2)-1:0]; - reg [31:0] mem_f [(RAM_DEPTH/2)-1:0]; - - initial mem_i = '{default: 0}; - - wire waddr_is_fp = waddr[RAM_ADDRW-1]; - wire raddr1_is_fp = raddr1[RAM_ADDRW-1]; - wire raddr2_is_fp = raddr2[RAM_ADDRW-1]; - - wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0]; - wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0]; - wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0]; - wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0]; - - always @(posedge clk) begin - if (wren && tmask[i] && !waddr_is_fp) begin - mem_i[waddr_qual] <= wdata[i]; - end - end - - always @(posedge clk) begin - if (wren && tmask[i] && waddr_is_fp) begin - mem_f[waddr_qual] <= wdata[i]; - end - end - - assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual]; - assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual]; - assign rdata3[i] = mem_f[raddr3_qual]; - end - -`else - - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - - reg [31:0] mem [RAM_DEPTH-1:0]; - - initial mem = '{default: 0}; - - always @(posedge clk) begin - if (wren && tmask[i]) begin - mem[waddr] <= wdata[i]; - end - end - - assign rdata1[i] = mem[raddr1]; - assign rdata2[i] = mem[raddr2]; - assign rdata3[i] = 0; - end - -`endif - -endmodule - -`TRACING_ON \ No newline at end of file diff --git a/hw/rtl/VX_gpr_ram_f.v b/hw/rtl/VX_gpr_ram_f.v new file mode 100644 index 00000000..3b800993 --- /dev/null +++ b/hw/rtl/VX_gpr_ram_f.v @@ -0,0 +1,35 @@ +`include "VX_define.vh" + +`TRACING_OFF + +module VX_gpr_ram_f #( + parameter DATAW = 1, + parameter DEPTH = 1, + parameter ADDRW = $clog2(DEPTH) +) ( + input wire clk, + input wire wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire [ADDRW-1:0] raddr1, + input wire [ADDRW-1:0] raddr2, + input wire [ADDRW-1:0] raddr3, + output wire [DATAW-1:0] rdata1, + output wire [DATAW-1:0] rdata2, + output wire [DATAW-1:0] rdata3 +); + reg [DATAW-1:0] mem [DEPTH-1:0]; + + always @(posedge clk) begin + if (wren) begin + mem [waddr] <= wdata; + end + end + + assign rdata1 = mem [raddr1]; + assign rdata2 = mem [raddr2]; + assign rdata3 = mem [raddr3]; + +endmodule + +`TRACING_ON \ No newline at end of file diff --git a/hw/rtl/VX_gpr_ram_i.v b/hw/rtl/VX_gpr_ram_i.v new file mode 100644 index 00000000..6c96b871 --- /dev/null +++ b/hw/rtl/VX_gpr_ram_i.v @@ -0,0 +1,34 @@ +`include "VX_define.vh" + +`TRACING_OFF + +module VX_gpr_ram_i #( + parameter DATAW = 1, + parameter DEPTH = 1, + parameter ADDRW = $clog2(DEPTH) +) ( + input wire clk, + input wire wren, + input wire [ADDRW-1:0] waddr, + input wire [DATAW-1:0] wdata, + input wire [ADDRW-1:0] raddr1, + input wire [ADDRW-1:0] raddr2, + output wire [DATAW-1:0] rdata1, + output wire [DATAW-1:0] rdata2 +); + reg [DATAW-1:0] mem [DEPTH-1:0]; + + initial mem = '{default: 0}; + + always @(posedge clk) begin + if (wren) begin + mem [waddr] <= wdata; + end + end + + assign rdata1 = mem [raddr1]; + assign rdata2 = mem [raddr2]; + +endmodule + +`TRACING_ON \ No newline at end of file diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 5a0af76e..39e81b2a 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -13,41 +13,92 @@ module VX_gpr_stage #( // outputs VX_gpr_rsp_if gpr_rsp_if ); - `UNUSED_VAR (reset) - - wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3; - wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3; - + `UNUSED_VAR (reset) + `ifdef EXT_F_ENABLE - assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; - assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; - assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; - assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; + localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2); + wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f; + wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3; + + wire waddr_is_fp = writeback_if.rd[`NR_BITS-1]; + wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1]; + wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1]; + wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1]; + `UNUSED_VAR (raddr3_is_fp) + + assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]}; + assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]}; + assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]}; + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + VX_gpr_ram_i #( + .DATAW (32), + .DEPTH (RAM_DEPTH) + ) gpr_ram_i ( + .clk (clk), + .wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .raddr1 (raddr1), + .raddr2 (raddr2), + .rdata1 (rdata1_i[i]), + .rdata2 (rdata2_i[i]) + ); + end + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + VX_gpr_ram_f #( + .DATAW (32), + .DEPTH (RAM_DEPTH) + ) gpr_ram_f ( + .clk (clk), + .wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .raddr1 (raddr1), + .raddr2 (raddr2), + .raddr3 (raddr3), + .rdata1 (rdata1_f[i]), + .rdata2 (rdata2_f[i]), + .rdata3 (rdata3_f[i]) + ); + end + + assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i; + assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i; + assign gpr_rsp_if.rs3_data = rdata3_f; `else + localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS; + wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i; + wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2; + assign waddr = {writeback_if.wid, writeback_if.rd}; assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; - assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; - assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; + assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; + `UNUSED_VAR (gpr_req_if.rs3) + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + VX_gpr_ram_i #( + .DATAW (32), + .DEPTH (RAM_DEPTH) + ) gpr_ram_i ( + .clk (clk), + .wren (writeback_if.valid && writeback_if.tmask[i]), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .raddr1 (raddr1), + .raddr2 (raddr2), + .rdata1 (rdata1_i[i]), + .rdata2 (rdata2_i[i]) + ); + end + + assign gpr_rsp_if.rs1_data = rdata1_i; + assign gpr_rsp_if.rs2_data = rdata2_i; + assign gpr_rsp_if.rs3_data = 0; `endif - - VX_gpr_ram gpr_ram ( - .clk (clk), - .wren (writeback_if.valid), - .tmask (writeback_if.tmask), - .waddr (waddr), - .wdata (writeback_if.data), - .raddr1 (raddr1), - .raddr2 (raddr2), - .raddr3 (raddr3), - .rdata1 (rdata1), - .rdata2 (rdata2), - .rdata3 (rdata3) - ); - assign gpr_rsp_if.rs1_data = rdata1; - assign gpr_rsp_if.rs2_data = rdata2; - assign gpr_rsp_if.rs3_data = rdata3; - assign writeback_if.ready = 1'b1; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 26d051ae..8874cdec 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -54,6 +54,6 @@ typedef struct packed { logic [`NW_BITS-1:0] size_m1; } gpu_barrier_t; -`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS) +`define GPU_BARRIER_SIZE (1+`NB_BITS+`NW_BITS) `endif \ No newline at end of file