From 143319d557c9850632a463b70ed15f3f71709536 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 18 Feb 2021 16:03:16 -0800 Subject: [PATCH] minor optimization --- hw/rtl/VX_ibuffer.v | 63 ++++++++++++++++--------------- hw/rtl/libs/VX_priority_encoder.v | 25 ++++++++---- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index afb8b402..6556da74 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -15,14 +15,13 @@ module VX_ibuffer #( ); localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS; localparam SIZE = `IBUF_SIZE; - localparam ADDRW = $clog2(SIZE); + localparam ADDRW = $clog2(SIZE+1); localparam NWARPSW = $clog2(`NUM_WARPS+1); reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r; - reg [`NUM_WARPS-1:0] full_r, empty_r, sizeMany_r; + reg [`NUM_WARPS-1:0] full_r, empty_r, alm_empty_r; - wire [`NUM_WARPS-1:0] q_full, q_empty; - wire [`NUM_WARPS-1:0] q_sizeMany; + wire [`NUM_WARPS-1:0] q_full, q_empty, q_alm_empty; wire [DATAW-1:0] q_data_in; wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev; reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out; @@ -35,10 +34,10 @@ module VX_ibuffer #( wire writing = enq_fire && (i == ibuf_enq_if.wid); wire reading = deq_fire && (i == ibuf_deq_if.wid); - wire is_slot0 = (0 == used_r[i]) || ((1 == used_r[i]) && reading); + wire is_slot0 = empty_r[i] || (alm_empty_r[i] && reading); wire push = writing && !is_slot0; - wire pop = reading && sizeMany_r[i]; + wire pop = reading && !alm_empty_r[i]; VX_fifo_queue #( .DATAW (DATAW), @@ -60,25 +59,25 @@ module VX_ibuffer #( always @(posedge clk) begin if (reset) begin - used_r[i] <= 0; - full_r[i] <= 0; - empty_r[i] <= 1; - sizeMany_r[i] <= 0; + used_r[i] <= 0; + full_r[i] <= 0; + empty_r[i] <= 1; + alm_empty_r[i] <= 1; end else begin if (writing) begin if (!reading) begin empty_r[i] <= 0; - if (used_r[i] == ADDRW'(SIZE-1)) - full_r[i] <= 1; if (used_r[i] == 1) - sizeMany_r[i] <= 1; + alm_empty_r[i] <= 0; + if (used_r[i] == ADDRW'(SIZE)) + full_r[i] <= 1; end end else if (reading) begin full_r[i] <= 0; if (used_r[i] == ADDRW'(1)) empty_r[i] <= 1; if (used_r[i] == ADDRW'(2)) - sizeMany_r[i] <= 0; + alm_empty_r[i] <= 1; end used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading))); end @@ -91,29 +90,29 @@ module VX_ibuffer #( end end - assign q_full[i] = full_r[i]; - assign q_empty[i] = empty_r[i]; - assign q_sizeMany[i] = sizeMany_r[i]; + assign q_full[i] = full_r[i]; + assign q_empty[i] = empty_r[i]; + assign q_alm_empty[i] = alm_empty_r[i]; end /////////////////////////////////////////////////////////////////////////// reg [`NUM_WARPS-1:0] valid_table, valid_table_n; reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n; - reg [NWARPSW-1:0] num_warps; reg [`NW_BITS-1:0] deq_wid, deq_wid_n; reg deq_valid, deq_valid_n; reg [DATAW-1:0] deq_instr, deq_instr_n; + reg [NWARPSW-1:0] num_warps; always @(*) begin valid_table_n = valid_table; if (deq_fire) begin - valid_table_n[deq_wid] = q_sizeMany[deq_wid]; + valid_table_n[deq_wid] = !q_alm_empty[deq_wid]; end if (enq_fire) begin valid_table_n[ibuf_enq_if.wid] = 1; end - end + end // schedule the next instruction to issue // do round-robin when multiple warps are active @@ -121,19 +120,20 @@ module VX_ibuffer #( deq_valid_n = 0; deq_wid_n = 'x; deq_instr_n = 'x; - - schedule_table_n = schedule_table; + schedule_table_n = 'x; - if (0 == num_warps) begin + if ((0 == num_warps) + || (1 == num_warps && deq_fire && q_alm_empty[deq_wid])) begin deq_valid_n = enq_fire; deq_wid_n = ibuf_enq_if.wid; deq_instr_n = q_data_in; end else if ((1 == num_warps) || freeze) begin - deq_valid_n = (!deq_fire || q_sizeMany[deq_wid]) || enq_fire; - deq_wid_n = (!deq_fire || q_sizeMany[deq_wid]) ? deq_wid : ibuf_enq_if.wid; - deq_instr_n = deq_fire ? (q_sizeMany[deq_wid] ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid]; + deq_valid_n = 1; + deq_wid_n = deq_wid; + deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid]; end else begin deq_valid_n = (| schedule_table); + schedule_table_n = schedule_table; for (integer i = 0; i < `NUM_WARPS; i++) begin if (schedule_table[i]) begin deq_wid_n = `NW_BITS'(i); @@ -146,7 +146,7 @@ module VX_ibuffer #( end wire warp_added = enq_fire && q_empty[ibuf_enq_if.wid]; - wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == deq_wid) && ~q_sizeMany[deq_wid]; + wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == deq_wid) && q_alm_empty[deq_wid]; always @(posedge clk) begin if (reset) begin @@ -157,11 +157,12 @@ module VX_ibuffer #( end else begin valid_table <= valid_table_n; - if ((| schedule_table_n)) begin - schedule_table <= schedule_table_n; - end else begin + if (0 == (| schedule_table_n) + || (num_warps < 2)) begin schedule_table <= valid_table_n; - schedule_table[deq_wid_n] <= 0; + schedule_table[deq_wid_n] <= 0; + end else begin + schedule_table <= schedule_table_n; end deq_valid <= deq_valid_n; diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index d1c6c479..9809b58c 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -40,7 +40,8 @@ module VX_priority_encoder #( end VX_onehot_encoder #( - .N (N) + .N (N), + .REVERSE (REVERSE) ) b ( .data_in (onehot), .data_out (index), @@ -51,15 +52,25 @@ module VX_priority_encoder #( reg [N-1:0] onehot_r; reg [LN-1:0] index_r; - + always @(*) begin index_r = 'x; onehot_r = 0; - for (integer i = 0; i < N; i++) begin - if (data_in[i]) begin - index_r = LN'(i); - onehot_r[i] = 1'b1; - break; + if (REVERSE) begin + for (integer i = N-1; i >= 0; i--) begin + if (data_in[i]) begin + index_r = LN'(i); + onehot_r[i] = 1'b1; + break; + end + end + end else begin + for (integer i = 0; i < N; i++) begin + if (data_in[i]) begin + index_r = LN'(i); + onehot_r[i] = 1'b1; + break; + end end end end