optimize warp_sched
This commit is contained in:
@@ -371,13 +371,15 @@ module VX_decode #(
|
|||||||
|
|
||||||
wire decode_fire = decode_if.valid && decode_if.ready;
|
wire decode_fire = decode_if.valid && decode_if.ready;
|
||||||
|
|
||||||
assign join_if.is_join = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
|
assign join_if.valid = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
|
||||||
assign join_if.wid = ifetch_rsp_if.wid;
|
assign join_if.wid = ifetch_rsp_if.wid;
|
||||||
|
|
||||||
assign wstall_if.wstall = decode_fire && (is_btype || is_jal || is_jalr
|
assign wstall_if.valid = decode_fire && (is_btype
|
||||||
|| (is_gpu && (gpu_op == `GPU_TMC
|
|| is_jal
|
||||||
|| gpu_op == `GPU_SPLIT
|
|| is_jalr
|
||||||
|| gpu_op == `GPU_BAR)));
|
|| (is_gpu && (gpu_op == `GPU_TMC
|
||||||
|
|| gpu_op == `GPU_SPLIT
|
||||||
|
|| gpu_op == `GPU_BAR)));
|
||||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -391,9 +393,7 @@ module VX_decode #(
|
|||||||
print_ex_type(decode_if.ex_type);
|
print_ex_type(decode_if.ex_type);
|
||||||
$write(", op=");
|
$write(", op=");
|
||||||
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
$write("mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
||||||
print_frm(decode_if.frm);
|
|
||||||
$write("\n");
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ endtask
|
|||||||
task print_ex_op;
|
task print_ex_op;
|
||||||
input [`EX_BITS-1:0] ex_type;
|
input [`EX_BITS-1:0] ex_type;
|
||||||
input [`OP_BITS-1:0] op_type;
|
input [`OP_BITS-1:0] op_type;
|
||||||
input [`OP_BITS-1:0] op_mod;
|
input [`MOD_BITS-1:0] op_mod;
|
||||||
begin
|
begin
|
||||||
case (ex_type)
|
case (ex_type)
|
||||||
`EX_ALU: begin
|
`EX_ALU: begin
|
||||||
@@ -141,19 +141,4 @@ task print_ex_op;
|
|||||||
end
|
end
|
||||||
endtask
|
endtask
|
||||||
|
|
||||||
task print_frm;
|
|
||||||
input [`FRM_BITS-1:0] frm;
|
|
||||||
begin
|
|
||||||
case (frm)
|
|
||||||
`FRM_RNE: $write("RNE");
|
|
||||||
`FRM_RTZ: $write("RTZ");
|
|
||||||
`FRM_RDN: $write("RDN");
|
|
||||||
`FRM_RUP: $write("RUP");
|
|
||||||
`FRM_RMM: $write("RMM");
|
|
||||||
`FRM_DYN: $write("DYN");
|
|
||||||
default: $write("?");
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
endtask
|
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -52,10 +52,7 @@ module VX_warp_sched #(
|
|||||||
&& warp_ctl_if.tmc.valid
|
&& warp_ctl_if.tmc.valid
|
||||||
&& (0 == warp_ctl_if.tmc.thread_mask)) begin
|
&& (0 == warp_ctl_if.tmc.thread_mask)) begin
|
||||||
schedule_table_n[warp_ctl_if.wid] = 0;
|
schedule_table_n[warp_ctl_if.wid] = 0;
|
||||||
end
|
end
|
||||||
if (wstall_if.wstall) begin
|
|
||||||
schedule_table_n[wstall_if.wid] = 0;
|
|
||||||
end
|
|
||||||
if (scheduled_warp) begin // remove scheduled warp (round-robin)
|
if (scheduled_warp) begin // remove scheduled warp (round-robin)
|
||||||
schedule_table_n[warp_to_schedule] = 0;
|
schedule_table_n[warp_to_schedule] = 0;
|
||||||
end
|
end
|
||||||
@@ -103,7 +100,7 @@ module VX_warp_sched #(
|
|||||||
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
||||||
active_warps[warp_ctl_if.wid] <= 0;
|
active_warps[warp_ctl_if.wid] <= 0;
|
||||||
end
|
end
|
||||||
end else if (join_if.is_join && !didnt_split) begin
|
end else if (join_if.valid && !didnt_split) begin
|
||||||
if (!join_fall) begin
|
if (!join_fall) begin
|
||||||
warp_pcs[join_if.wid] <= join_pc;
|
warp_pcs[join_if.wid] <= join_pc;
|
||||||
end
|
end
|
||||||
@@ -125,15 +122,10 @@ module VX_warp_sched #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Stalling the scheduling of warps
|
// Stalling the scheduling of warps
|
||||||
if (wstall_if.wstall) begin
|
if (wstall_if.valid) begin
|
||||||
stalled_warps[wstall_if.wid] <= 1;
|
stalled_warps[wstall_if.wid] <= 1;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Advance PC
|
|
||||||
if (scheduled_warp) begin
|
|
||||||
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
|
||||||
end
|
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
if (branch_ctl_if.valid) begin
|
if (branch_ctl_if.valid) begin
|
||||||
if (branch_ctl_if.taken) begin
|
if (branch_ctl_if.taken) begin
|
||||||
@@ -148,6 +140,7 @@ module VX_warp_sched #(
|
|||||||
end
|
end
|
||||||
if (ifetch_rsp_fire) begin
|
if (ifetch_rsp_fire) begin
|
||||||
fetch_lock[ifetch_rsp_if.wid] <= 0;
|
fetch_lock[ifetch_rsp_if.wid] <= 0;
|
||||||
|
warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.curr_PC + 4;
|
||||||
end
|
end
|
||||||
|
|
||||||
// reset 'schedule_table' when it goes to zero
|
// reset 'schedule_table' when it goes to zero
|
||||||
@@ -191,8 +184,7 @@ module VX_warp_sched #(
|
|||||||
&& warp_ctl_if.split.diverged
|
&& warp_ctl_if.split.diverged
|
||||||
&& (i == warp_ctl_if.wid);
|
&& (i == warp_ctl_if.wid);
|
||||||
|
|
||||||
wire pop = join_if.is_join
|
wire pop = join_if.valid && (i == join_if.wid);
|
||||||
&& (i == join_if.wid);
|
|
||||||
|
|
||||||
VX_ipdom_stack #(
|
VX_ipdom_stack #(
|
||||||
.WIDTH(1+32+`NUM_THREADS),
|
.WIDTH(1+32+`NUM_THREADS),
|
||||||
@@ -232,17 +224,9 @@ module VX_warp_sched #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||||
|
|
||||||
wire branch_hazard = branch_ctl_if.valid
|
|
||||||
&& branch_ctl_if.taken
|
|
||||||
&& (branch_ctl_if.wid == warp_to_schedule);
|
|
||||||
|
|
||||||
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule);
|
assign scheduled_warp = schedule_valid && ~stall_out;
|
||||||
|
|
||||||
wire stall = stall_out || wstall_this_cycle || branch_hazard || join_if.is_join;
|
|
||||||
|
|
||||||
assign scheduled_warp = schedule_valid && ~stall;
|
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
interface VX_join_if ();
|
interface VX_join_if ();
|
||||||
|
|
||||||
wire is_join;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] wid;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
interface VX_wstall_if();
|
interface VX_wstall_if();
|
||||||
|
|
||||||
wire wstall;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] wid;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
//#define ALL_TESTS
|
#define ALL_TESTS
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
|
|||||||
Reference in New Issue
Block a user