From 83e151a189f19afe09c10822755d7104d632860c Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Sat, 23 Mar 2024 00:01:15 -0700 Subject: [PATCH] Add valid / fire / cycles-issued perf counters to dispatch --- hw/rtl/core/VX_dispatch.sv | 19 +++++++++++++++++++ hw/rtl/core/VX_issue.sv | 4 ++++ hw/rtl/interfaces/VX_pipeline_perf_if.sv | 14 +++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/hw/rtl/core/VX_dispatch.sv b/hw/rtl/core/VX_dispatch.sv index 10d6018d..fa7c99de 100644 --- a/hw/rtl/core/VX_dispatch.sv +++ b/hw/rtl/core/VX_dispatch.sv @@ -22,6 +22,9 @@ module VX_dispatch import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS], + output wire [`PERF_CTR_BITS-1:0] perf_valids [`NUM_EX_UNITS], + output wire [`PERF_CTR_BITS-1:0] perf_fires [`NUM_EX_UNITS], + output wire [`PERF_CTR_BITS-1:0] perf_any_fire_cycles, `endif // inputs VX_operands_if.slave operands_if [`ISSUE_WIDTH], @@ -188,6 +191,7 @@ module VX_dispatch import VX_gpu_pkg::*; #( reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r; reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_valids_r; reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_fires_r; + reg [`PERF_CTR_BITS-1:0] perf_any_fire_cycles_r; for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin always @(*) begin @@ -232,23 +236,38 @@ module VX_dispatch import VX_gpu_pkg::*; #( `BUFFER(perf_unit_valids_per_cycle_r, perf_unit_valids_per_cycle); `BUFFER(perf_unit_fires_per_cycle_r, perf_unit_fires_per_cycle); + reg perf_any_fire; + always @(*) begin + perf_any_fire = 1'b0; + for (integer i = 0; i < `NUM_EX_UNITS; ++i) begin + if (perf_unit_fires_per_cycle_r[i] != '0) begin + perf_any_fire = 1'b1; + end + end + end + for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin always @(posedge clk) begin if (reset) begin perf_stalls_r[i] <= '0; perf_valids_r[i] <= '0; perf_fires_r[i] <= '0; + perf_any_fire_cycles_r <= '0; end else begin perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]); perf_valids_r[i] <= perf_valids_r[i] + `PERF_CTR_BITS'(perf_unit_valids_per_cycle_r[i]); perf_fires_r[i] <= perf_fires_r[i] + `PERF_CTR_BITS'(perf_unit_fires_per_cycle_r[i]); + perf_any_fire_cycles_r <= perf_any_fire_cycles_r + `PERF_CTR_BITS'(perf_any_fire); end end end for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin assign perf_stalls[i] = perf_stalls_r[i]; + assign perf_valids[i] = perf_valids_r[i]; + assign perf_fires[i] = perf_fires_r[i]; end + assign perf_any_fire_cycles = perf_any_fire_cycles_r; `endif `ifdef DBG_TRACE_CORE_PIPELINE_VCS diff --git a/hw/rtl/core/VX_issue.sv b/hw/rtl/core/VX_issue.sv index 1ba4ca28..f3f1424e 100644 --- a/hw/rtl/core/VX_issue.sv +++ b/hw/rtl/core/VX_issue.sv @@ -86,6 +86,10 @@ module VX_issue #( .reset (dispatch_reset), `ifdef PERF_ENABLE `UNUSED_PIN (perf_stalls), + .perf_stalls (perf_issue_if.dispatch_stalls), + .perf_valids (perf_issue_if.dispatch_valids), + .perf_fires (perf_issue_if.dispatch_fires), + .perf_any_fire_cycles (perf_issue_if.dispatch_any_fire_cycles), `endif .operands_if (operands_if), .alu_dispatch_if(alu_dispatch_if), diff --git a/hw/rtl/interfaces/VX_pipeline_perf_if.sv b/hw/rtl/interfaces/VX_pipeline_perf_if.sv index 661ebcdf..29b2903a 100644 --- a/hw/rtl/interfaces/VX_pipeline_perf_if.sv +++ b/hw/rtl/interfaces/VX_pipeline_perf_if.sv @@ -21,6 +21,10 @@ interface VX_pipeline_perf_if (); wire [`PERF_CTR_BITS-1:0] scb_stalls; wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS]; wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS]; + wire [`PERF_CTR_BITS-1:0] dispatch_stalls [`NUM_EX_UNITS]; + wire [`PERF_CTR_BITS-1:0] dispatch_valids [`NUM_EX_UNITS]; + wire [`PERF_CTR_BITS-1:0] dispatch_fires [`NUM_EX_UNITS]; + wire [`PERF_CTR_BITS-1:0] dispatch_any_fire_cycles; wire [`PERF_CTR_BITS-1:0] ifetches; wire [`PERF_CTR_BITS-1:0] loads; @@ -38,7 +42,11 @@ interface VX_pipeline_perf_if (); output ibf_stalls, output scb_stalls, output units_uses, - output sfu_uses + output sfu_uses, + output dispatch_stalls, + output dispatch_valids, + output dispatch_fires, + output dispatch_any_fire_cycles ); modport slave ( @@ -49,6 +57,10 @@ interface VX_pipeline_perf_if (); input scb_stalls, input units_uses, input sfu_uses, + input dispatch_stalls, + input dispatch_valids, + input dispatch_fires, + input dispatch_any_fire_cycles, input ifetches, input loads, input stores,