From df25bae45697318d256b5c3113e96e4b455c6ced Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 24 Aug 2020 05:36:00 -0700 Subject: [PATCH] optimize warp_sched --- hw/rtl/VX_decode.v | 16 ++++++++-------- hw/rtl/VX_print_instr.vh | 17 +---------------- hw/rtl/VX_warp_sched.v | 30 +++++++----------------------- hw/rtl/interfaces/VX_join_if.v | 2 +- hw/rtl/interfaces/VX_wstall_if.v | 2 +- hw/simulate/testbench.cpp | 2 +- 6 files changed, 19 insertions(+), 50 deletions(-) diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 0e9b153d..5b53129d 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -371,13 +371,15 @@ module VX_decode #( wire decode_fire = decode_if.valid && decode_if.ready; - assign join_if.is_join = decode_fire && is_gpu && (gpu_op == `GPU_JOIN); + assign join_if.valid = decode_fire && is_gpu && (gpu_op == `GPU_JOIN); assign join_if.wid = ifetch_rsp_if.wid; - assign wstall_if.wstall = decode_fire && (is_btype || is_jal || is_jalr - || (is_gpu && (gpu_op == `GPU_TMC - || gpu_op == `GPU_SPLIT - || gpu_op == `GPU_BAR))); + assign wstall_if.valid = decode_fire && (is_btype + || is_jal + || is_jalr + || (is_gpu && (gpu_op == `GPU_TMC + || gpu_op == `GPU_SPLIT + || gpu_op == `GPU_BAR))); assign wstall_if.wid = ifetch_rsp_if.wid; /////////////////////////////////////////////////////////////////////////// @@ -391,9 +393,7 @@ module VX_decode #( print_ex_type(decode_if.ex_type); $write(", op="); print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod); - $write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm); - print_frm(decode_if.frm); - $write("\n"); + $write("mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm); end end `endif diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 42320d87..68fd8756 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -21,7 +21,7 @@ endtask task print_ex_op; input [`EX_BITS-1:0] ex_type; input [`OP_BITS-1:0] op_type; - input [`OP_BITS-1:0] op_mod; + input [`MOD_BITS-1:0] op_mod; begin case (ex_type) `EX_ALU: begin @@ -141,19 +141,4 @@ task print_ex_op; end endtask -task print_frm; - input [`FRM_BITS-1:0] frm; - begin - case (frm) - `FRM_RNE: $write("RNE"); - `FRM_RTZ: $write("RTZ"); - `FRM_RDN: $write("RDN"); - `FRM_RUP: $write("RUP"); - `FRM_RMM: $write("RMM"); - `FRM_DYN: $write("DYN"); - default: $write("?"); - endcase - end -endtask - `endif diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index be11f1c2..16d65aa3 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -52,10 +52,7 @@ module VX_warp_sched #( && warp_ctl_if.tmc.valid && (0 == warp_ctl_if.tmc.thread_mask)) begin schedule_table_n[warp_ctl_if.wid] = 0; - end - if (wstall_if.wstall) begin - schedule_table_n[wstall_if.wid] = 0; - end + end if (scheduled_warp) begin // remove scheduled warp (round-robin) schedule_table_n[warp_to_schedule] = 0; end @@ -103,7 +100,7 @@ module VX_warp_sched #( if (0 == warp_ctl_if.tmc.thread_mask) begin active_warps[warp_ctl_if.wid] <= 0; end - end else if (join_if.is_join && !didnt_split) begin + end else if (join_if.valid && !didnt_split) begin if (!join_fall) begin warp_pcs[join_if.wid] <= join_pc; end @@ -125,15 +122,10 @@ module VX_warp_sched #( end // Stalling the scheduling of warps - if (wstall_if.wstall) begin + if (wstall_if.valid) begin stalled_warps[wstall_if.wid] <= 1; end - // Advance PC - if (scheduled_warp) begin - warp_pcs[warp_to_schedule] <= warp_pc + 4; - end - // Branch if (branch_ctl_if.valid) begin if (branch_ctl_if.taken) begin @@ -148,6 +140,7 @@ module VX_warp_sched #( end if (ifetch_rsp_fire) begin fetch_lock[ifetch_rsp_if.wid] <= 0; + warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.curr_PC + 4; end // reset 'schedule_table' when it goes to zero @@ -191,8 +184,7 @@ module VX_warp_sched #( && warp_ctl_if.split.diverged && (i == warp_ctl_if.wid); - wire pop = join_if.is_join - && (i == join_if.wid); + wire pop = join_if.valid && (i == join_if.wid); VX_ipdom_stack #( .WIDTH(1+32+`NUM_THREADS), @@ -232,17 +224,9 @@ module VX_warp_sched #( end end - wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid; - - wire branch_hazard = branch_ctl_if.valid - && branch_ctl_if.taken - && (branch_ctl_if.wid == warp_to_schedule); + wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid; - wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule); - - wire stall = stall_out || wstall_this_cycle || branch_hazard || join_if.is_join; - - assign scheduled_warp = schedule_valid && ~stall; + assign scheduled_warp = schedule_valid && ~stall_out; VX_generic_register #( .N(1 + `NUM_THREADS + 32 + `NW_BITS) diff --git a/hw/rtl/interfaces/VX_join_if.v b/hw/rtl/interfaces/VX_join_if.v index 6e96ad31..0ee163ab 100644 --- a/hw/rtl/interfaces/VX_join_if.v +++ b/hw/rtl/interfaces/VX_join_if.v @@ -5,7 +5,7 @@ interface VX_join_if (); - wire is_join; + wire valid; wire [`NW_BITS-1:0] wid; endinterface diff --git a/hw/rtl/interfaces/VX_wstall_if.v b/hw/rtl/interfaces/VX_wstall_if.v index 5b4e5039..2e984085 100644 --- a/hw/rtl/interfaces/VX_wstall_if.v +++ b/hw/rtl/interfaces/VX_wstall_if.v @@ -5,7 +5,7 @@ interface VX_wstall_if(); - wire wstall; + wire valid; wire [`NW_BITS-1:0] wid; endinterface diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 554974a5..18c1c887 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -3,7 +3,7 @@ #include #include -//#define ALL_TESTS +#define ALL_TESTS int main(int argc, char **argv) { if (argc == 1) {