From f5f9e3dfdba742acb35aebbeb29c37d380821e52 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 5 Dec 2023 17:10:30 -0800 Subject: [PATCH] profiling timing optimization --- hw/rtl/core/VX_core.sv | 14 ++++++++------ hw/rtl/mem/VX_shared_mem.sv | 6 ++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/rtl/core/VX_core.sv b/hw/rtl/core/VX_core.sv index 684a9b84..1776024f 100644 --- a/hw/rtl/core/VX_core.sv +++ b/hw/rtl/core/VX_core.sv @@ -266,9 +266,8 @@ module VX_core import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE - wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle; - wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle; - + wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_per_cycle_r; + wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_per_cycle_r; wire [`CLOG2(DCACHE_NUM_REQS+1)-1:0] perf_dcache_rsp_per_cycle; wire [1:0] perf_icache_pending_read_cycle; @@ -295,9 +294,12 @@ module VX_core import VX_gpu_pkg::*; #( `POP_COUNT(perf_dcache_rd_req_per_cycle, perf_dcache_rd_req_fire); `POP_COUNT(perf_dcache_wr_req_per_cycle, perf_dcache_wr_req_fire); `POP_COUNT(perf_dcache_rsp_per_cycle, perf_dcache_rsp_fire); + + `BUFFER(perf_dcache_rd_req_per_cycle_r, perf_dcache_rd_req_per_cycle); + `BUFFER(perf_dcache_wr_req_per_cycle_r, perf_dcache_wr_req_per_cycle); assign perf_icache_pending_read_cycle = perf_icache_req_fire - perf_icache_rsp_fire; - assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle - perf_dcache_rsp_per_cycle; + assign perf_dcache_pending_read_cycle = perf_dcache_rd_req_per_cycle_r - perf_dcache_rsp_per_cycle; always @(posedge clk) begin if (reset) begin @@ -321,8 +323,8 @@ module VX_core import VX_gpu_pkg::*; #( perf_dcache_lat <= '0; end else begin perf_ifetches <= perf_ifetches + `PERF_CTR_BITS'(perf_icache_req_fire); - perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle); - perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle); + perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_dcache_rd_req_per_cycle_r); + perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_dcache_wr_req_per_cycle_r); perf_icache_lat <= perf_icache_lat + perf_icache_pending_reads; perf_dcache_lat <= perf_dcache_lat + perf_dcache_pending_reads; end diff --git a/hw/rtl/mem/VX_shared_mem.sv b/hw/rtl/mem/VX_shared_mem.sv index 1c25c7cf..0f1f4171 100644 --- a/hw/rtl/mem/VX_shared_mem.sv +++ b/hw/rtl/mem/VX_shared_mem.sv @@ -229,8 +229,8 @@ module VX_shared_mem import VX_gpu_pkg::*; #( `ifdef PERF_ENABLE // per cycle: reads, writes - wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle, perf_reads_per_cycle_r; + wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle, perf_writes_per_cycle_r; wire [`CLOG2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle; wire [NUM_REQS-1:0] perf_reads_per_req = req_valid & req_ready & ~req_rw; @@ -245,8 +245,6 @@ module VX_shared_mem import VX_gpu_pkg::*; #( reg [`PERF_CTR_BITS-1:0] perf_writes; reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_reads_per_cycle_r; - wire [`CLOG2(NUM_REQS+1)-1:0] perf_writes_per_cycle_r; `BUFFER(perf_reads_per_cycle_r, perf_reads_per_cycle); `BUFFER(perf_writes_per_cycle_r, perf_writes_per_cycle);