gpr pipeline optimization

This commit is contained in:
Blaise Tine
2020-08-01 12:38:30 -04:00
parent 31ee824862
commit b8cd3b0b28
17 changed files with 121 additions and 140 deletions

View File

@@ -13,7 +13,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS) #DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 #CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1

View File

@@ -2,11 +2,6 @@
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src' dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores ../rtl/fp_cores/altera ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src'
inc_list=""
for dir in $dir_list; do
inc_list="$inc_list -I$dir"
done
# read design sources # read design sources
for dir in $dir_list; do for dir in $dir_list; do
echo "+incdir+$dir" echo "+incdir+$dir"

View File

@@ -65,11 +65,6 @@
`define MULRQ_SIZE 8 `define MULRQ_SIZE 8
`endif `endif
// Size of FPU Request Queue Size
`ifndef FPURQ_SIZE
`define FPURQ_SIZE 8
`endif
// Size of issue queue // Size of issue queue
`ifndef ISSUEQ_SIZE `ifndef ISSUEQ_SIZE
`define ISSUEQ_SIZE (8 + `NUM_WARPS) `define ISSUEQ_SIZE (8 + `NUM_WARPS)

View File

@@ -237,7 +237,6 @@ module VX_decode #(
wire is_fl = 0; wire is_fl = 0;
wire is_fs = 0; wire is_fs = 0;
wire is_fci = 0; wire is_fci = 0;
wire is_fcmp = 0;
wire is_fcvti = 0; wire is_fcvti = 0;
wire is_fcvtf = 0; wire is_fcvtf = 0;
wire is_fmvcls = 0; wire is_fmvcls = 0;
@@ -287,10 +286,6 @@ module VX_decode #(
wire use_rs3 = is_fr4; wire use_rs3 = is_fr4;
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1; wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1;
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -324,6 +319,11 @@ module VX_decode #(
assign decode_tmp_if.wb = use_rd; assign decode_tmp_if.wb = use_rd;
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
assign decode_tmp_if.rd = {rd_is_fp, rd}; assign decode_tmp_if.rd = {rd_is_fp, rd};
assign decode_tmp_if.rs1 = {rs1_is_fp, rs1_qual}; assign decode_tmp_if.rs1 = {rs1_is_fp, rs1_qual};
assign decode_tmp_if.rs2 = {rs2_is_fp, rs2}; assign decode_tmp_if.rs2 = {rs2_is_fp, rs2};

View File

@@ -110,11 +110,7 @@ module VX_execute #(
assign fpu_commit_if.issue_tag = 0; assign fpu_commit_if.issue_tag = 0;
assign fpu_commit_if.data = 0; assign fpu_commit_if.data = 0;
assign fpu_commit_if.has_fflags = 0; assign fpu_commit_if.has_fflags = 0;
assign fpu_commit_if.fflags_NV = 0; assign fpu_commit_if.fflags = 0;
assign fpu_commit_if.fflags_DZ = 0;
assign fpu_commit_if.fflags_OF = 0;
assign fpu_commit_if.fflags_UF = 0;
assign fpu_commit_if.fflags_NX = 0;
`endif `endif
VX_gpu_unit #( VX_gpu_unit #(

View File

@@ -10,7 +10,7 @@ module VX_gpr_fp_ctrl (
input wire [`NUM_THREADS-1:0][31:0] rs2_data, input wire [`NUM_THREADS-1:0][31:0] rs2_data,
// outputs // outputs
output wire [`NR_BITS-1:0] raddr1, output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
VX_gpr_read_if gpr_read_if VX_gpr_read_if gpr_read_if
); );
@@ -18,32 +18,30 @@ module VX_gpr_fp_ctrl (
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data; reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
reg read_rs3; reg read_rs3;
wire gpr_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3; wire delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
wire gpr_fire = gpr_read_if.valid && gpr_read_if.ready; wire read_fire = gpr_read_if.valid && gpr_read_if.out_ready;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
read_rs3 <= 0; read_rs3 <= 0;
end else if (gpr_delay) begin end else if (delay) begin
read_rs3 <= 1; read_rs3 <= 1;
end else if (gpr_fire) begin end else if (read_fire) begin
read_rs3 <= 0; read_rs3 <= 0;
end end
end end
// backup original rs1 data // backup original rs1 data
always @(posedge clk) begin always @(posedge clk) begin
if (gpr_delay) begin if (delay) begin
tmp_rs1_data <= rs1_data; tmp_rs1_data <= rs1_data;
end end
end end
// outputs // outputs
assign raddr1 = {gpr_read_if.warp_num, (read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1)};
assign raddr1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1; assign gpr_read_if.in_ready = ~delay;
assign gpr_read_if.ready = ~gpr_delay;
assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data; assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data;
assign gpr_read_if.rs2_data = rs2_data; assign gpr_read_if.rs2_data = rs2_data;
assign gpr_read_if.rs3_data = rs1_data; assign gpr_read_if.rs3_data = rs1_data;

View File

@@ -3,26 +3,28 @@
module VX_gpr_ram ( module VX_gpr_ram (
input wire clk, input wire clk,
input wire [`NUM_THREADS-1:0] we, input wire [`NUM_THREADS-1:0] we,
input wire [`NR_BITS-1:0] waddr, input wire [`NW_BITS+`NR_BITS-1:0] waddr,
input wire [`NUM_THREADS-1:0][31:0] wdata, input wire [`NUM_THREADS-1:0][31:0] wdata,
input wire [`NR_BITS-1:0] rs1, input wire [`NW_BITS+`NR_BITS-1:0] rs1,
input wire [`NR_BITS-1:0] rs2, input wire [`NW_BITS+`NR_BITS-1:0] rs2,
output wire [`NUM_THREADS-1:0][31:0] rs1_data, output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data output wire [`NUM_THREADS-1:0][31:0] rs2_data
); );
`ifndef ASIC `ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] ram [`NUM_REGS-1:0]; reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
integer i; integer i, j;
initial begin initial begin
// initialize r0 to 0 // initialize r0 to 0
for (i = 0; i < `NUM_THREADS; i++) begin for (j = 0; j < `NUM_WARPS; j++) begin
ram[0][i][0] = 0; for (i = 0; i < `NUM_THREADS; i++) begin
ram[0][i][1] = 0; ram[j * `NUM_REGS][i][0] = 8'h0;
ram[0][i][2] = 0; ram[j * `NUM_REGS][i][1] = 8'h0;
ram[0][i][3] = 0; ram[j * `NUM_REGS][i][2] = 8'h0;
ram[j * `NUM_REGS][i][3] = 8'h0;
end
end end
end end

View File

@@ -14,52 +14,43 @@ module VX_gpr_stage #(
); );
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
wire [`NUM_THREADS-1:0][31:0] rs1_data [`NUM_WARPS-1:0]; wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data [`NUM_WARPS-1:0]; wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
wire [`NR_BITS-1:0] raddr1; VX_gpr_ram gpr_int_ram (
.clk (clk),
genvar i; .we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask),
.waddr ({writeback_if.warp_num, writeback_if.rd}),
for (i = 0; i < `NUM_WARPS; i++) begin .wdata (writeback_if.data),
wire [`NUM_THREADS-1:0] we = writeback_if.thread_mask .rs1 (raddr1),
& {`NUM_THREADS{writeback_if.valid && (i == writeback_if.warp_num)}}; .rs2 ({gpr_read_if.warp_num, gpr_read_if.rs2}),
VX_gpr_ram gpr_int_ram ( .rs1_data (rs1_data),
.clk (clk), .rs2_data (rs2_data)
.we (we), );
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 (gpr_read_if.rs2),
.rs1_data (rs1_data[i]),
.rs2_data (rs2_data[i])
);
end
`ifdef EXT_F_ENABLE `ifdef EXT_F_ENABLE
VX_gpr_fp_ctrl VX_gpr_fp_ctrl ( VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.rs1_data (rs1_data),
//inputs .rs2_data (rs2_data),
.rs1_data (rs1_data[gpr_read_if.warp_num]),
.rs2_data (rs2_data[gpr_read_if.warp_num]),
// outputs
.raddr1 (raddr1), .raddr1 (raddr1),
.gpr_read_if(gpr_read_if) .gpr_read_if(gpr_read_if)
); );
`else `else
assign raddr1 = gpr_read_if.rs1; assign raddr1 = {gpr_read_if.warp_num, gpr_read_if.rs1};
assign gpr_read_if.rs1_data = rs1_data[gpr_read_if.warp_num]; assign gpr_read_if.rs1_data = rs1_data;
assign gpr_read_if.rs2_data = rs2_data[gpr_read_if.warp_num]; assign gpr_read_if.rs2_data = rs2_data;
assign gpr_read_if.rs3_data = 0; assign gpr_read_if.rs3_data = 0;
assign gpr_read_if.ready = 1; assign gpr_read_if.in_ready = 1;
wire valid = gpr_read_if.valid; wire valid = gpr_read_if.valid;
wire out_ready = gpr_read_if.out_ready;
wire use_rs3 = gpr_read_if.use_rs3; wire use_rs3 = gpr_read_if.use_rs3;
wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3; wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3;
`UNUSED_VAR (valid); `UNUSED_VAR (valid);
`UNUSED_VAR (out_ready);
`UNUSED_VAR (use_rs3); `UNUSED_VAR (use_rs3);
`UNUSED_VAR (rs3); `UNUSED_VAR (rs3);
`endif `endif

View File

@@ -24,10 +24,11 @@ module VX_issue #(
assign gpr_read_if.rs2 = decode_if.rs2; assign gpr_read_if.rs2 = decode_if.rs2;
assign gpr_read_if.rs3 = decode_if.rs3; assign gpr_read_if.rs3 = decode_if.rs3;
assign gpr_read_if.use_rs3 = decode_if.use_rs3; assign gpr_read_if.use_rs3 = decode_if.use_rs3;
assign gpr_read_if.out_ready = decode_if.ready;
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag; wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
wire gpr_busy = ~gpr_read_if.ready; wire gpr_busy = ~gpr_read_if.in_ready;
wire alu_busy = ~alu_req_if.ready; wire alu_busy = ~alu_req_if.ready;
wire lsu_busy = ~lsu_req_if.ready; wire lsu_busy = ~lsu_req_if.ready;
wire csr_busy = ~csr_req_if.ready; wire csr_busy = ~csr_req_if.ready;
@@ -63,7 +64,7 @@ module VX_issue #(
); );
VX_decode_if decode_tmp_if(); VX_decode_if decode_tmp_if();
VX_gpr_read_if gpr_data_tmp_if(); VX_gpr_read_if gpr_read_tmp_if();
wire stall = ~alu_req_if.ready || ~decode_if.ready; wire stall = ~alu_req_if.ready || ~decode_if.ready;
wire flush = alu_req_if.ready && ~decode_if.ready; wire flush = alu_req_if.ready && ~decode_if.ready;
@@ -75,13 +76,13 @@ module VX_issue #(
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (flush), .flush (flush),
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}), .in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}),
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data}) .out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data})
); );
VX_issue_demux issue_demux ( VX_issue_demux issue_demux (
.decode_if (decode_tmp_if), .decode_if (decode_tmp_if),
.gpr_read_if (gpr_data_tmp_if), .gpr_read_if (gpr_read_tmp_if),
.issue_tag (issue_tmp_tag), .issue_tag (issue_tmp_tag),
.alu_req_if (alu_req_if), .alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),

View File

@@ -19,14 +19,15 @@ module VX_writeback #(
VX_wb_if writeback_if VX_wb_if writeback_if
); );
reg [`NUM_THREADS-1:0][31:0] wb_data [`ISSUEQ_SIZE-1:0]; reg [`NUM_THREADS-1:0][31:0] wb_data_table [`ISSUEQ_SIZE-1:0];
reg [`NW_BITS-1:0] wb_warp_num [`ISSUEQ_SIZE-1:0]; reg [`NW_BITS-1:0] wb_warp_num_table [`ISSUEQ_SIZE-1:0];
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0]; reg [`NUM_THREADS-1:0] wb_thread_mask_table [`ISSUEQ_SIZE-1:0];
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0]; reg [31:0] wb_curr_PC_table [`ISSUEQ_SIZE-1:0];
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0]; reg [`NR_BITS-1:0] wb_rd_table [`ISSUEQ_SIZE-1:0];
reg [`ISSUEQ_SIZE-1:0] wb_pending; reg [`ISSUEQ_SIZE-1:0] wb_pending;
reg [`ISSUEQ_SIZE-1:0] wb_pending_n; reg [`ISSUEQ_SIZE-1:0] wb_pending_n;
reg [`ISTAG_BITS-1:0] wb_index; reg [`ISTAG_BITS-1:0] wb_index;
wire [`ISTAG_BITS-1:0] wb_index_n; wire [`ISTAG_BITS-1:0] wb_index_n;
@@ -72,39 +73,43 @@ module VX_writeback #(
wb_valid <= 0; wb_valid <= 0;
end else begin end else begin
if (alu_commit_if.valid) begin if (alu_commit_if.valid) begin
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data; wb_data_table [alu_commit_if.issue_tag] <= alu_commit_if.data;
wb_warp_num [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num; wb_warp_num_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num;
wb_thread_mask [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask; wb_thread_mask_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask;
wb_curr_PC [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC; wb_curr_PC_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC;
wb_rd [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd; wb_rd_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd;
end end
if (lsu_commit_if.valid) begin if (lsu_commit_if.valid) begin
wb_data [lsu_commit_if.issue_tag] <= lsu_commit_if.data; wb_data_table [lsu_commit_if.issue_tag] <= lsu_commit_if.data;
wb_warp_num [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num; wb_warp_num_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num;
wb_thread_mask [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask; wb_thread_mask_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask;
wb_curr_PC [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC; wb_curr_PC_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC;
wb_rd [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd; wb_rd_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd;
end end
if (csr_commit_if.valid) begin if (csr_commit_if.valid) begin
wb_data [csr_commit_if.issue_tag] <= csr_commit_if.data; wb_data_table [csr_commit_if.issue_tag] <= csr_commit_if.data;
wb_warp_num [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num; wb_warp_num_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num;
wb_thread_mask [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask; wb_thread_mask_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask;
wb_curr_PC [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC; wb_curr_PC_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC;
wb_rd [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd; wb_rd_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd;
end end
if (mul_commit_if.valid) begin if (mul_commit_if.valid) begin
wb_data [mul_commit_if.issue_tag] <= mul_commit_if.data; wb_data_table [mul_commit_if.issue_tag] <= mul_commit_if.data;
wb_warp_num [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num; wb_warp_num_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num;
wb_thread_mask [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask; wb_thread_mask_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask;
wb_curr_PC [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC; wb_curr_PC_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC;
wb_rd [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd; wb_rd_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd;
end end
if (fpu_commit_if.valid) begin if (fpu_commit_if.valid) begin
wb_data [fpu_commit_if.issue_tag] <= fpu_commit_if.data; wb_data_table [fpu_commit_if.issue_tag] <= fpu_commit_if.data;
wb_warp_num [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num; wb_warp_num_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num;
wb_thread_mask [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask; wb_thread_mask_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask;
wb_curr_PC [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC; wb_curr_PC_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC;
wb_rd [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd; wb_rd_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd;
end end
wb_pending <= wb_pending_n; wb_pending <= wb_pending_n;
@@ -115,11 +120,11 @@ module VX_writeback #(
// writeback request // writeback request
assign writeback_if.valid = wb_valid; assign writeback_if.valid = wb_valid;
assign writeback_if.warp_num = wb_warp_num [wb_index]; assign writeback_if.warp_num = wb_warp_num_table [wb_index];
assign writeback_if.thread_mask = wb_thread_mask [wb_index]; assign writeback_if.thread_mask = wb_thread_mask_table [wb_index];
assign writeback_if.curr_PC = wb_curr_PC [wb_index]; assign writeback_if.curr_PC = wb_curr_PC_table [wb_index];
assign writeback_if.rd = wb_rd [wb_index]; assign writeback_if.rd = wb_rd_table [wb_index];
assign writeback_if.data = wb_data [wb_index]; assign writeback_if.data = wb_data_table [wb_index];
// commit back-pressure // commit back-pressure
assign alu_commit_if.ready = 1'b1; assign alu_commit_if.ready = 1'b1;

View File

@@ -38,8 +38,8 @@ module VX_fp_fpga (
VX_fpnew #( VX_fpnew #(
.FMULADD (0), .FMULADD (0),
.FDIVSQRT (1), .FDIVSQRT (0),
.FNONCOMP (1), .FNONCOMP (0),
.FCONV (1) .FCONV (1)
) fp_core ( ) fp_core (
.clk (clk), .clk (clk),
@@ -67,13 +67,15 @@ module VX_fp_fpga (
.out_valid (fpnew_out_valid) .out_valid (fpnew_out_valid)
); );
acl_fp_add fp_add ( for (i = 0; i < `NUM_THREADS; i++) begin
.clock (clk), acl_fp_add fp_add (
.dataa (dataa), .clock (clk),
.datab (datab), .dataa (dataa),
.enable (add_out_ready), .datab (datab),
.result (add_result) .enable (add_out_ready),
); .result (add_result[i])
);
end
assign in_reqady = fpnew_in_ready; assign in_reqady = fpnew_in_ready;
assign has_fflags = fpnew_has_fflags; assign has_fflags = fpnew_has_fflags;
@@ -81,6 +83,8 @@ module VX_fp_fpga (
assign out_tag = fpnew_out_tag; assign out_tag = fpnew_out_tag;
assign fpnew_out_ready = out_ready; assign fpnew_out_ready = out_ready;
assign add_out_ready = out_ready;
assign result = fpnew_out_valid ? fpnew_result : add_result; assign result = fpnew_out_valid ? fpnew_result : add_result;
assign out_valid = fpnew_out_valid; assign out_valid = fpnew_out_valid;

View File

@@ -66,7 +66,7 @@ module VX_fpnew #(
wire fpu_in_ready, fpu_in_valid; wire fpu_in_ready, fpu_in_valid;
wire fpu_out_ready, fpu_out_valid; wire fpu_out_ready, fpu_out_valid;
reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag; reg [`ISTAG_BITS-1:0] fpu_in_tag, fpu_out_tag;
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands; reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
@@ -138,7 +138,7 @@ module VX_fpnew #(
fpnew_top #( fpnew_top #(
.Features (FPU_FEATURES), .Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION), .Implementation (FPU_IMPLEMENTATION),
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0]) .TagType (logic[`ISTAG_BITS+1+1-1:0])
) fpnew_core ( ) fpnew_core (
.clk_i (clk), .clk_i (clk),
.rst_ni (1'b1), .rst_ni (1'b1),

View File

@@ -19,7 +19,8 @@ interface VX_gpr_read_if ();
wire [`NUM_THREADS-1:0][31:0] rs2_data; wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data; wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready; wire in_ready;
wire out_ready;
endinterface endinterface

View File

@@ -58,13 +58,12 @@ if args.outc != 'none':
translation_rules = [ translation_rules = [
(re.compile(r'^$'), r''), (re.compile(r'^$'), r''),
(re.compile(r'^( *)`ifndef ([^ ]+)$'), r'\1#ifndef \2'), (re.compile(r'^(\s*)`ifndef\s+([^ ]+)'), r'\1#ifndef \2'),
(re.compile(r'^( *)`define ([^ ]+)$'), r'\1#define \2'), (re.compile(r'^(\s*)`define\s+([^ ]+)'), r'\1#define \2'),
# (re.compile(r'^( *)`include "\./VX_define_synth\.v"$'), r'\1#include "VX_define_synth.h"'), (re.compile(r'^(\s*)`include "VX_user_config\.vh"'), r''),
(re.compile(r'^( *)`include "VX_user_config\.vh"$'), r''), (re.compile(r'^(\s*)`define\s+([^ ]+) (.+)'), r'\1#define \2 \3'),
(re.compile(r'^( *)`define ([^ ]+) (.+)$'), r'\1#define \2 \3'), (re.compile(r'^(\s*)`endif\s+'), r'\1#endif'),
(re.compile(r'^( *)`endif$'), r'\1#endif'), (re.compile(r'^(\s*)//(.*)'), r'\1// \2'),
(re.compile(r'^( *)// (.*)$'), r'\1// \2'),
] ]
post_rules = [ post_rules = [
@@ -78,7 +77,6 @@ post_rules = [
(re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'), (re.compile(r"`([A-Za-z_][$_0-9A-Za-z]*)"), r'\1'),
] ]
def post_process_line(line): def post_process_line(line):
for pat, repl in post_rules: for pat, repl in post_rules:
line = pat.sub(repl, line) line = pat.sub(repl, line)

View File

@@ -14,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS) #DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO DBG_FLAGS += -DDBG_CORE_REQ_INFO
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src

View File

@@ -25,7 +25,6 @@ Simulator::Simulator() {
#ifdef VCD_OUTPUT #ifdef VCD_OUTPUT
Verilated::traceEverOn(true); Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC(); trace_ = new VerilatedVcdC();
trace_->set_time_unit("1ns");
vortex_->trace(trace_, 99); vortex_->trace(trace_, 99);
trace_->open("trace.vcd"); trace_->open("trace.vcd");
#endif #endif

View File

@@ -3,9 +3,10 @@
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#define ALL_TESTS
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc == 1) { if (argc == 1) {
#define ALL_TESTS
#ifdef ALL_TESTS #ifdef ALL_TESTS
bool passed = true; bool passed = true;
@@ -137,12 +138,7 @@ int main(int argc, char **argv) {
simulator.load_ihex(test); simulator.load_ihex(test);
simulator.run(); simulator.run();
bool status = (1 == simulator.get_last_wb_value(3)); return 0;
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
return !status;
#endif #endif