using 44-bit perf counters - aligned with DSP counters width

This commit is contained in:
Blaise Tine
2021-02-28 02:05:47 -08:00
parent 9fda618815
commit e64996946d
9 changed files with 111 additions and 107 deletions

View File

@@ -7,7 +7,7 @@ module VX_csr_data #(
input wire reset,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
`endif
@@ -123,61 +123,61 @@ module VX_csr_data #(
`ifdef PERF_ENABLE
// PERF: pipeline
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibf_stalls[63:32];
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[43:32]);
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scb_stalls[63:32];
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[43:32]);
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[43:32]);
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[43:32]);
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[43:32]);
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[43:32]);
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[43:32]);
// PERF: icache
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
`CSR_MPM_ICACHE_READS_H : read_data_r = perf_memsys_if.icache_reads[63:32];
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[43:32]);
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_read_misses[63:32];
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[43:32]);
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_pipe_stalls[63:32];
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[43:32]);
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_crsp_stalls[63:32];
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[43:32]);
// PERF: dcache
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
`CSR_MPM_DCACHE_READS_H : read_data_r = perf_memsys_if.dcache_reads[63:32];
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[43:32]);
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
`CSR_MPM_DCACHE_WRITES_H : read_data_r = perf_memsys_if.dcache_writes[63:32];
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[43:32]);
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_read_misses[63:32];
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[43:32]);
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_write_misses[63:32];
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[43:32]);
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = perf_memsys_if.dcache_bank_stalls[63:32];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[43:32]);
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_mshr_stalls[63:32];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[43:32]);
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_pipe_stalls[63:32];
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[43:32]);
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_crsp_stalls[63:32];
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[43:32]);
// PERF: smem
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
`CSR_MPM_SMEM_READS_H : read_data_r = perf_memsys_if.smem_reads[63:32];
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[43:32]);
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
`CSR_MPM_SMEM_WRITES_H : read_data_r = perf_memsys_if.smem_writes[63:32];
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[43:32]);
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = perf_memsys_if.smem_bank_stalls[63:32];
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[43:32]);
// PERF: DRAM
`CSR_MPM_DRAM_READS : read_data_r = perf_memsys_if.dram_reads[31:0];
`CSR_MPM_DRAM_READS_H : read_data_r = perf_memsys_if.dram_reads[63:32];
`CSR_MPM_DRAM_READS_H : read_data_r = 32'(perf_memsys_if.dram_reads[43:32]);
`CSR_MPM_DRAM_WRITES : read_data_r = perf_memsys_if.dram_writes[31:0];
`CSR_MPM_DRAM_WRITES_H : read_data_r = perf_memsys_if.dram_writes[63:32];
`CSR_MPM_DRAM_WRITES_H : read_data_r = 32'(perf_memsys_if.dram_writes[43:32]);
`CSR_MPM_DRAM_ST : read_data_r = perf_memsys_if.dram_stalls[31:0];
`CSR_MPM_DRAM_ST_H : read_data_r = perf_memsys_if.dram_stalls[63:32];
`CSR_MPM_DRAM_ST_H : read_data_r = 32'(perf_memsys_if.dram_stalls[43:32]);
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
`CSR_MPM_DRAM_LAT_H : read_data_r = 32'(perf_memsys_if.dram_latency[43:32]);
`endif
`CSR_SATP : read_data_r = 32'(csr_satp);
@@ -195,9 +195,9 @@ module VX_csr_data #(
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[43:32]);
`CSR_INSTRET : read_data_r = csr_instret[31:0];
`CSR_INSTRET_H : read_data_r = csr_instret[63:32];
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[43:32]);
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;

View File

@@ -121,14 +121,14 @@ module VX_issue #(
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
`ifdef PERF_ENABLE
reg [63:0] perf_ibf_stalls;
reg [63:0] perf_scb_stalls;
reg [63:0] perf_alu_stalls;
reg [63:0] perf_lsu_stalls;
reg [63:0] perf_csr_stalls;
reg [63:0] perf_gpu_stalls;
reg [43:0] perf_ibf_stalls;
reg [43:0] perf_scb_stalls;
reg [43:0] perf_alu_stalls;
reg [43:0] perf_lsu_stalls;
reg [43:0] perf_csr_stalls;
reg [43:0] perf_gpu_stalls;
`ifdef EXT_F_ENABLE
reg [63:0] perf_fpu_stalls;
reg [43:0] perf_fpu_stalls;
`endif
always @(posedge clk) begin
@@ -144,26 +144,26 @@ module VX_issue #(
`endif
end else begin
if (decode_if.valid & !decode_if.ready) begin
perf_ibf_stalls <= perf_ibf_stalls + 64'd1;
perf_ibf_stalls <= perf_ibf_stalls + 44'd1;
end
if (ibuf_deq_if.valid & scoreboard_delay) begin
perf_scb_stalls <= perf_scb_stalls + 64'd1;
perf_scb_stalls <= perf_scb_stalls + 44'd1;
end
if (alu_req_if.valid & !alu_req_if.ready) begin
perf_alu_stalls <= perf_alu_stalls + 64'd1;
perf_alu_stalls <= perf_alu_stalls + 44'd1;
end
if (lsu_req_if.valid & !lsu_req_if.ready) begin
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
perf_lsu_stalls <= perf_lsu_stalls + 44'd1;
end
if (csr_req_if.valid & !csr_req_if.ready) begin
perf_csr_stalls <= perf_csr_stalls + 64'd1;
perf_csr_stalls <= perf_csr_stalls + 44'd1;
end
if (gpu_req_if.valid & !gpu_req_if.ready) begin
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
perf_gpu_stalls <= perf_gpu_stalls + 44'd1;
end
`ifdef EXT_F_ENABLE
if (fpu_req_if.valid & !fpu_req_if.ready) begin
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
perf_fpu_stalls <= perf_fpu_stalls + 44'd1;
end
`endif
end

View File

@@ -323,19 +323,22 @@ end else begin
assign perf_memsys_if.smem_bank_stalls = 0;
end
reg [63:0] perf_dram_lat_per_cycle;
reg [43:0] perf_dram_lat_per_cycle;
always @(posedge clk) begin
if (reset) begin
perf_dram_lat_per_cycle <= 0;
end else begin
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle +
64'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
44'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
2'((dram_rsp_if.valid && dram_rsp_if.ready) && !(dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready))));
end
end
reg [63:0] perf_dram_reads, perf_dram_writes, perf_dram_lat, perf_dram_stalls;
reg [43:0] perf_dram_reads;
reg [43:0] perf_dram_writes;
reg [43:0] perf_dram_lat;
reg [43:0] perf_dram_stalls;
always @(posedge clk) begin
if (reset) begin
@@ -345,13 +348,13 @@ end
perf_dram_stalls <= 0;
end else begin
if (dram_req_if.valid && dram_req_if.ready && !dram_req_if.rw) begin
perf_dram_reads <= perf_dram_reads + 64'd1;
perf_dram_reads <= perf_dram_reads + 44'd1;
end
if (dram_req_if.valid && dram_req_if.ready && dram_req_if.rw) begin
perf_dram_writes <= perf_dram_writes + 64'd1;
perf_dram_writes <= perf_dram_writes + 44'd1;
end
if (dram_req_if.valid && !dram_req_if.ready) begin
perf_dram_stalls <= perf_dram_stalls + 64'd1;
perf_dram_stalls <= perf_dram_stalls + 44'd1;
end
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
end

View File

@@ -399,7 +399,8 @@ module VX_cache #(
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
@@ -422,13 +423,13 @@ module VX_cache #(
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
reg [63:0] perf_core_reads;
reg [63:0] perf_core_writes;
reg [63:0] perf_read_misses;
reg [63:0] perf_write_misses;
reg [63:0] perf_mshr_stalls;
reg [63:0] perf_pipe_stalls;
reg [63:0] perf_crsp_stalls;
reg [43:0] perf_core_reads;
reg [43:0] perf_core_writes;
reg [43:0] perf_read_misses;
reg [43:0] perf_write_misses;
reg [43:0] perf_mshr_stalls;
reg [43:0] perf_pipe_stalls;
reg [43:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
@@ -440,13 +441,13 @@ module VX_cache #(
perf_pipe_stalls <= 0;
perf_crsp_stalls <= 0;
end else begin
perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle);
perf_read_misses <= perf_read_misses + 64'(perf_read_miss_per_cycle);
perf_write_misses <= perf_write_misses+ 64'(perf_write_miss_per_cycle);
perf_mshr_stalls <= perf_mshr_stalls + 64'(perf_mshr_stall_per_cycle);
perf_pipe_stalls <= perf_pipe_stalls + 64'(perf_pipe_stall_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle);
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
perf_read_misses <= perf_read_misses + 44'(perf_read_miss_per_cycle);
perf_write_misses <= perf_write_misses+ 44'(perf_write_miss_per_cycle);
perf_mshr_stalls <= perf_mshr_stalls + 44'(perf_mshr_stall_per_cycle);
perf_pipe_stalls <= perf_pipe_stalls + 44'(perf_pipe_stall_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
end
end

View File

@@ -22,7 +22,7 @@ module VX_cache_core_req_bank_sel #(
input wire reset,
`ifdef PERF_ENABLE
output wire [63:0] bank_stalls,
output wire [43:0] bank_stalls,
`endif
input wire [NUM_REQS-1:0] core_req_valid,
@@ -303,13 +303,13 @@ module VX_cache_core_req_bank_sel #(
end
end
reg [63:0] bank_stalls_r;
reg [43:0] bank_stalls_r;
always @(posedge clk) begin
if (reset) begin
bank_stalls_r <= 0;
end else begin
bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_sel_r & ~core_req_ready));
bank_stalls_r <= bank_stalls_r + 44'($countones(core_req_sel_r & ~core_req_ready));
end
end

View File

@@ -248,9 +248,9 @@ module VX_shared_mem #(
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
end
reg [63:0] perf_core_reads;
reg [63:0] perf_core_writes;
reg [63:0] perf_crsp_stalls;
reg [43:0] perf_core_reads;
reg [43:0] perf_core_writes;
reg [43:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
@@ -258,9 +258,9 @@ module VX_shared_mem #(
perf_core_writes <= 0;
perf_crsp_stalls <= 0;
end else begin
perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle);
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
end
end

View File

@@ -5,14 +5,14 @@
interface VX_perf_cache_if ();
wire [63:0] reads;
wire [63:0] writes;
wire [63:0] read_misses;
wire [63:0] write_misses;
wire [63:0] bank_stalls;
wire [63:0] mshr_stalls;
wire [63:0] pipe_stalls;
wire [63:0] crsp_stalls;
wire [43:0] reads;
wire [43:0] writes;
wire [43:0] read_misses;
wire [43:0] write_misses;
wire [43:0] bank_stalls;
wire [43:0] mshr_stalls;
wire [43:0] pipe_stalls;
wire [43:0] crsp_stalls;
endinterface

View File

@@ -5,28 +5,28 @@
interface VX_perf_memsys_if ();
wire [63:0] icache_reads;
wire [63:0] icache_read_misses;
wire [63:0] icache_pipe_stalls;
wire [63:0] icache_crsp_stalls;
wire [43:0] icache_reads;
wire [43:0] icache_read_misses;
wire [43:0] icache_pipe_stalls;
wire [43:0] icache_crsp_stalls;
wire [63:0] dcache_reads;
wire [63:0] dcache_writes;
wire [63:0] dcache_read_misses;
wire [63:0] dcache_write_misses;
wire [63:0] dcache_bank_stalls;
wire [63:0] dcache_mshr_stalls;
wire [63:0] dcache_pipe_stalls;
wire [63:0] dcache_crsp_stalls;
wire [43:0] dcache_reads;
wire [43:0] dcache_writes;
wire [43:0] dcache_read_misses;
wire [43:0] dcache_write_misses;
wire [43:0] dcache_bank_stalls;
wire [43:0] dcache_mshr_stalls;
wire [43:0] dcache_pipe_stalls;
wire [43:0] dcache_crsp_stalls;
wire [63:0] smem_reads;
wire [63:0] smem_writes;
wire [63:0] smem_bank_stalls;
wire [43:0] smem_reads;
wire [43:0] smem_writes;
wire [43:0] smem_bank_stalls;
wire [63:0] dram_reads;
wire [63:0] dram_writes;
wire [63:0] dram_stalls;
wire [63:0] dram_latency;
wire [43:0] dram_reads;
wire [43:0] dram_writes;
wire [43:0] dram_stalls;
wire [43:0] dram_latency;
endinterface

View File

@@ -4,14 +4,14 @@
`include "VX_define.vh"
interface VX_perf_pipeline_if ();
wire [63:0] ibf_stalls;
wire [63:0] scb_stalls;
wire [63:0] lsu_stalls;
wire [63:0] csr_stalls;
wire [63:0] alu_stalls;
wire [63:0] gpu_stalls;
wire [43:0] ibf_stalls;
wire [43:0] scb_stalls;
wire [43:0] lsu_stalls;
wire [43:0] csr_stalls;
wire [43:0] alu_stalls;
wire [43:0] gpu_stalls;
`ifdef EXT_F_ENABLE
wire [63:0] fpu_stalls;
wire [43:0] fpu_stalls;
`endif
endinterface