Barriers impl + tested

This commit is contained in:
felsabbagh3
2019-10-22 01:47:39 -04:00
parent 31d3d51392
commit b3f464dd89
8 changed files with 70 additions and 21 deletions

Binary file not shown.

View File

@@ -8,7 +8,7 @@
_start:
# li a0, 4
# la a1, SPAWN
# .word 0x00b5106b
# .word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
# j SPAWN
# nop
# nop
@@ -21,8 +21,10 @@ _start:
# SPAWN:
# li a2, 7
# li a0, 0
# li a1, 4
# .word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
# .word 0x0005006b # tmc a0
###########################
##########################
# li a0, 4
# .word 0x0005006b # tmc a0
# csrr a1, 0x20 # read thread IDs

View File

@@ -10,6 +10,8 @@
// `define ONLY
`define NUM_BARRIERS 4
`define R_INST 7'd51
`define L_INST 7'd3
`define ALU_INST 7'd19

View File

@@ -30,6 +30,12 @@ module VX_fetch (
.clk (clk),
.reset (reset),
.stall (pipe_stall),
.is_barrier (VX_warp_ctl.is_barrier),
.barrier_id (VX_warp_ctl.barrier_id),
.num_warps (VX_warp_ctl.num_warps),
.barrier_warp_num (VX_warp_ctl.warp_num),
// Wspawn
.wspawn (VX_warp_ctl.wspawn),
.wsapwn_pc (VX_warp_ctl.wspawn_pc),

View File

@@ -37,6 +37,9 @@ module VX_gpgpu_inst (
end
assign VX_warp_ctl.is_barrier = VX_gpu_inst_req.is_barrier && valid_inst;
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
assign VX_warp_ctl.num_warps = VX_gpu_inst_req.rd2 - 1;
assign VX_warp_ctl.wspawn = wspawn;
assign VX_warp_ctl.wspawn_pc = wspawn_pc;

View File

@@ -18,6 +18,11 @@ module VX_warp_scheduler (
input wire whalt,
input wire[`NW_M1:0] whalt_warp_num,
input wire is_barrier,
input wire[31:0] barrier_id,
input wire[`NW_M1:0] num_warps,
input wire[`NW_M1:0] barrier_warp_num,
// WSTALL
input wire wstall,
input wire[`NW_M1:0] wstall_warp_num,
@@ -72,10 +77,16 @@ module VX_warp_scheduler (
reg[`NW-1:0] visible_active;
wire[`NW-1:0] use_active;
wire wstall_this_cycle;
reg[`NT_M1:0] thread_masks[`NW-1:0];
reg[31:0] warp_pcs[`NW-1:0];
// barriers
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit;
wire[`NW-1:0] curr_barrier_mask;
wire[($clog2(`NUM_BARRIERS)-1):0] curr_barrier_count;
// wsapwn
reg[31:0] use_wsapwn_pc;
@@ -91,27 +102,23 @@ module VX_warp_scheduler (
wire[31:0] new_pc;
reg[`NW-1:0] total_barrier_stall;
/* verilator lint_off UNUSED */
wire[`NW_M1:0] num_active;
/* verilator lint_on UNUSED */
reg[1:0] start;
// initial begin
// warp_pcs[0] = (32'h80000000 - 4);
// start = 0;
// warp_active[0] = 1; // Activating first warp
// visible_active[0] = 1; // Activating first warp
// thread_masks[0][0] = 1; // Activating first thread in first warp
// end
integer curr_w_help;
always @(posedge clk or posedge reset) begin
if (reset) begin
start <= 0;
warp_pcs[0] <= (32'h80000000 - 4);
warp_active[0] <= 1; // Activating first warp
visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp
barrier_stall_mask[0] <= 0;
barrier_stall_mask[1] <= 0;
use_wsapwn_pc <= 0;
use_wsapwn <= 0;
warp_pcs[0] <= (32'h80000000 - 4);
warp_active[0] <= 1; // Activating first warp
visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp
@@ -127,6 +134,15 @@ module VX_warp_scheduler (
use_wsapwn <= wspawn_new_active & (~`NW'b1);
end
if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0;
end else begin
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
end
end
if (update_use_wspawn) begin
use_wsapwn[warp_to_schedule] <= 0;
end
@@ -162,7 +178,7 @@ module VX_warp_scheduler (
// Refilling active warps
if (update_visible_active) begin
visible_active <= warp_active & (~warp_stalled);
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall);
end
// Don't change state if stall
@@ -185,8 +201,23 @@ module VX_warp_scheduler (
end
end
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0];
assign curr_barrier_count = $countones(curr_barrier_mask);
assign reached_barrier_limit = curr_barrier_count == (num_warps);
assign update_visible_active = ($countones(visible_active) < 1) && !(stall || wstall || hazard || is_join);
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
genvar curr_b;
always @(*) begin
total_barrier_stall = 0;
for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
begin
total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b[($clog2(`NUM_BARRIERS)-1):0]][`NW-1:0];
end
end
assign update_visible_active = ($countones(visible_active) < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
@@ -221,9 +252,9 @@ module VX_warp_scheduler (
assign hazard = (should_jal || should_bra) && schedule;
assign real_schedule = schedule && !warp_stalled[warp_to_schedule];
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule];
assign global_stall = (stall || wstall || hazard || !real_schedule || is_join);
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
@@ -237,7 +268,7 @@ module VX_warp_scheduler (
assign new_pc = warp_pc + 4;
assign use_active = (num_active < 1) ? (warp_active & (~warp_stalled)) : visible_active;
assign use_active = (num_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall)) : visible_active;
// Choosing a warp to schedule
VX_priority_encoder choose_schedule(

View File

@@ -11,6 +11,7 @@ interface VX_gpu_inst_req_inter();
wire is_wspawn;
wire is_tmc;
wire is_split;
wire is_barrier;
wire[31:0] pc_next;

View File

@@ -17,6 +17,10 @@ interface VX_warp_ctl_inter ();
wire ebreak;
// barrier
wire is_barrier;
wire[31:0] barrier_id;
wire[`NW_M1:0] num_warps;
wire is_split;
wire[`NW_M1:0] split_warp_num;