adding sockets support to simx and cache subsystem refactoring

minor update

minor update

minor updates
This commit is contained in:
Blaise Tine
2023-12-20 11:57:44 -08:00
parent 914b680aed
commit c7a81d1493
24 changed files with 541 additions and 388 deletions

View File

@@ -85,8 +85,8 @@ module VX_cluster import VX_gpu_pkg::*; #(
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
) per_socket_mem_bus_if[`NUM_SOCKETS]();
.TAG_WIDTH (L1_MEM_TAG_WIDTH)
) l1_mem_bus_if[2]();
`RESET_RELAY (l2_reset, reset);
@@ -102,7 +102,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
.MREQ_SIZE (`L2_MREQ_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH),
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.UUID_WIDTH (`UUID_WIDTH),
.CORE_OUT_REG (2),
@@ -115,10 +115,65 @@ module VX_cluster import VX_gpu_pkg::*; #(
`ifdef PERF_ENABLE
.cache_perf (perf_l2cache),
`endif
.core_bus_if (per_socket_mem_bus_if),
.core_bus_if (l1_mem_bus_if),
.mem_bus_if (mem_bus_if)
);
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_TAG_WIDTH)
) per_socket_icache_mem_bus_if[`NUM_SOCKETS]();
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH)
) per_socket_dcache_mem_bus_if[`NUM_SOCKETS]();
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_ARB_TAG_WIDTH)
) icache_mem_bus_if[1]();
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_ARB_TAG_WIDTH)
) dcache_mem_bus_if[1]();
`RESET_RELAY (l1_mem_arb_reset, reset);
VX_mem_arb #(
.NUM_INPUTS (`NUM_SOCKETS),
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.ARBITER ("R"),
.OUT_REG_REQ (2),
.OUT_REG_RSP (2)
) icache_mem_arb (
.clk (clk),
.reset (l1_mem_arb_reset),
.bus_in_if (per_socket_icache_mem_bus_if),
.bus_out_if (icache_mem_bus_if)
);
VX_mem_arb #(
.NUM_INPUTS (`NUM_SOCKETS),
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.ARBITER ("R"),
.OUT_REG_REQ (2),
.OUT_REG_RSP (2)
) dcache_mem_arb (
.clk (clk),
.reset (l1_mem_arb_reset),
.bus_in_if (per_socket_dcache_mem_bus_if),
.bus_out_if (dcache_mem_bus_if)
);
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[0], icache_mem_bus_if[0], L1_MEM_TAG_WIDTH, ICACHE_MEM_ARB_TAG_WIDTH);
`ASSIGN_VX_MEM_BUS_IF_X (l1_mem_bus_if[1], dcache_mem_bus_if[0], L1_MEM_TAG_WIDTH, DCACHE_MEM_ARB_TAG_WIDTH);
///////////////////////////////////////////////////////////////////////////
wire [`NUM_SOCKETS-1:0] per_socket_sim_ebreak;
@@ -155,7 +210,8 @@ module VX_cluster import VX_gpu_pkg::*; #(
.dcr_bus_if (socket_dcr_bus_if),
.mem_bus_if (per_socket_mem_bus_if[i]),
.icache_mem_bus_if (per_socket_icache_mem_bus_if[i]),
.dcache_mem_bus_if (per_socket_dcache_mem_bus_if[i]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[i]),

View File

@@ -262,7 +262,10 @@
`endif
// LSU Duplicate Address Check
`ifdef LSU_DUP
`ifndef LSU_DUP_DISABLE
`define LSU_DUP_ENABLE
`endif
`ifdef LSU_DUP_ENABLE
`define LSU_DUP_ENABLED 1
`else
`define LSU_DUP_ENABLED 0
@@ -381,7 +384,7 @@
// Number of Cache Units
`ifndef NUM_ICACHES
`define NUM_ICACHES `UP(`NUM_CORES / 4)
`define NUM_ICACHES `UP(`SOCKET_SIZE / 4)
`endif
// Cache Size
@@ -430,7 +433,7 @@
// Number of Cache Units
`ifndef NUM_DCACHES
`define NUM_DCACHES `UP(`NUM_CORES / 4)
`define NUM_DCACHES `UP(`SOCKET_SIZE / 4)
`endif
// Cache Size

View File

@@ -410,8 +410,22 @@
assign dst = src; \
end
`define TO_DISPATCH_DATA(data, tid) \
{data.uuid, data.wis, data.tmask, data.op_type, data.op_mod, data.wb, data.use_PC, data.use_imm, data.PC, data.imm, data.rd, tid, data.rs1_data, data.rs2_data, data.rs3_data}
`define TO_DISPATCH_DATA(data, tid) { \
data.uuid, \
data.wis, \
data.tmask, \
data.op_type, \
data.op_mod, \
data.wb, \
data.use_PC, \
data.use_imm, \
data.PC, \
data.imm, \
data.rd, \
tid, \
data.rs1_data, \
data.rs2_data, \
data.rs3_data}
///////////////////////////////////////////////////////////////////////////////

View File

@@ -141,8 +141,9 @@ package VX_gpu_pkg;
/////////////////////////////// L1 Parameters /////////////////////////////
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
localparam L1_MEM_ARB_TAG_WIDTH = (L1_MEM_TAG_WIDTH + `CLOG2(2));
localparam ICACHE_MEM_ARB_TAG_WIDTH = (ICACHE_MEM_TAG_WIDTH + `CLOG2(`NUM_SOCKETS));
localparam DCACHE_MEM_ARB_TAG_WIDTH = (DCACHE_MEM_TAG_WIDTH + `CLOG2(`NUM_SOCKETS));
localparam L1_MEM_TAG_WIDTH = `MAX(ICACHE_MEM_ARB_TAG_WIDTH, DCACHE_MEM_ARB_TAG_WIDTH);
/////////////////////////////// L2 Parameters /////////////////////////////
@@ -150,10 +151,10 @@ package VX_gpu_pkg;
localparam L2_WORD_SIZE = `L1_LINE_SIZE;
// Input request size
localparam L2_NUM_REQS = `NUM_SOCKETS;
localparam L2_NUM_REQS = 2;
// Core request tag bits
localparam L2_TAG_WIDTH = L1_MEM_ARB_TAG_WIDTH;
localparam L2_TAG_WIDTH = L1_MEM_TAG_WIDTH;
// Memory request data bits
localparam L2_MEM_DATA_WIDTH = (`L2_LINE_SIZE * 8);

View File

@@ -30,7 +30,8 @@ module VX_socket import VX_gpu_pkg::*; #(
VX_dcr_bus_if.slave dcr_bus_if,
// Memory
VX_mem_bus_if.master mem_bus_if,
VX_mem_bus_if.master icache_mem_bus_if,
VX_mem_bus_if.master dcache_mem_bus_if,
`ifdef GBAR_ENABLE
// Barrier
@@ -76,47 +77,7 @@ module VX_socket import VX_gpu_pkg::*; #(
assign mem_perf_tmp_if.mem = mem_perf_if.mem;
`endif
VX_mem_bus_if #(
.DATA_SIZE (ICACHE_LINE_SIZE),
.TAG_WIDTH (ICACHE_MEM_TAG_WIDTH)
) icache_mem_bus_if();
VX_mem_bus_if #(
.DATA_SIZE (DCACHE_LINE_SIZE),
.TAG_WIDTH (DCACHE_MEM_TAG_WIDTH)
) dcache_mem_bus_if();
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_TAG_WIDTH)
) cache_mem_bus_if[2]();
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
) mem_bus_tmp_if[1]();
`ASSIGN_VX_MEM_BUS_IF_X (cache_mem_bus_if[0], icache_mem_bus_if, L1_MEM_TAG_WIDTH, ICACHE_MEM_TAG_WIDTH);
`ASSIGN_VX_MEM_BUS_IF_X (cache_mem_bus_if[1], dcache_mem_bus_if, L1_MEM_TAG_WIDTH, DCACHE_MEM_TAG_WIDTH);
`RESET_RELAY (mem_arb_reset, reset);
VX_mem_arb #(
.NUM_INPUTS (2),
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_TAG_WIDTH),
.TAG_SEL_IDX (1), // Skip 0 for NC flag
.ARBITER ("R"),
.OUT_REG_REQ (2),
.OUT_REG_RSP (2)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
.bus_in_if (cache_mem_bus_if),
.bus_out_if (mem_bus_tmp_if)
);
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if[0]);
///////////////////////////////////////////////////////////////////////////

View File

@@ -78,33 +78,25 @@
`define VX_CSR_MPM_IBUF_ST_H 12'hB85
`define VX_CSR_MPM_SCRB_ST 12'hB06
`define VX_CSR_MPM_SCRB_ST_H 12'hB86
`define VX_CSR_MPM_ALU_ST 12'hB07
`define VX_CSR_MPM_ALU_ST_H 12'hB87
`define VX_CSR_MPM_LSU_ST 12'hB08
`define VX_CSR_MPM_LSU_ST_H 12'hB88
`define VX_CSR_MPM_FPU_ST 12'hB09
`define VX_CSR_MPM_FPU_ST_H 12'hB89
`define VX_CSR_MPM_SFU_ST 12'hB0A
`define VX_CSR_MPM_SFU_ST_H 12'hB8A
`define VX_CSR_MPM_SCRB_ALU 12'hB0B
`define VX_CSR_MPM_SCRB_ALU_H 12'hB8B
`define VX_CSR_MPM_SCRB_FPU 12'hB0C
`define VX_CSR_MPM_SCRB_FPU_H 12'hB8C
`define VX_CSR_MPM_SCRB_LSU 12'hB0D
`define VX_CSR_MPM_SCRB_LSU_H 12'hB8D
`define VX_CSR_MPM_SCRB_SFU 12'hB0E
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8E
`define VX_CSR_MPM_SCRB_ALU 12'hB07
`define VX_CSR_MPM_SCRB_ALU_H 12'hB87
`define VX_CSR_MPM_SCRB_FPU 12'hB08
`define VX_CSR_MPM_SCRB_FPU_H 12'hB88
`define VX_CSR_MPM_SCRB_LSU 12'hB09
`define VX_CSR_MPM_SCRB_LSU_H 12'hB89
`define VX_CSR_MPM_SCRB_SFU 12'hB0A
`define VX_CSR_MPM_SCRB_SFU_H 12'hB8A
// PERF: memory
`define VX_CSR_MPM_IFETCHES 12'hB0F
`define VX_CSR_MPM_IFETCHES_H 12'hB8F
`define VX_CSR_MPM_LOADS 12'hB10
`define VX_CSR_MPM_LOADS_H 12'hB90
`define VX_CSR_MPM_STORES 12'hB11
`define VX_CSR_MPM_STORES_H 12'hB91
`define VX_CSR_MPM_IFETCH_LT 12'hB12
`define VX_CSR_MPM_IFETCH_LT_H 12'hB92
`define VX_CSR_MPM_LOAD_LT 12'hB13
`define VX_CSR_MPM_LOAD_LT_H 12'hB93
`define VX_CSR_MPM_IFETCHES 12'hB0B
`define VX_CSR_MPM_IFETCHES_H 12'hB8B
`define VX_CSR_MPM_LOADS 12'hB0C
`define VX_CSR_MPM_LOADS_H 12'hB8C
`define VX_CSR_MPM_STORES 12'hB0D
`define VX_CSR_MPM_STORES_H 12'hB8D
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
`define VX_CSR_MPM_LOAD_LT 12'hB0F
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
// Machine Performance-monitoring memory counters
// PERF: icache

View File

@@ -273,23 +273,23 @@ module VX_core import VX_gpu_pkg::*; #(
wire [1:0] perf_icache_pending_read_cycle;
wire [`CLOG2(DCACHE_NUM_REQS+1)+1-1:0] perf_dcache_pending_read_cycle;
reg [`PERF_CTR_BITS-1:0] perf_icache_pending_reads;
reg [`PERF_CTR_BITS-1:0] perf_dcache_pending_reads;
reg [`PERF_CTR_BITS-1:0] perf_icache_pending_reads;
reg [`PERF_CTR_BITS-1:0] perf_dcache_pending_reads;
reg [`PERF_CTR_BITS-1:0] perf_ifetches;
reg [`PERF_CTR_BITS-1:0] perf_loads;
reg [`PERF_CTR_BITS-1:0] perf_stores;
reg [`PERF_CTR_BITS-1:0] perf_ifetches;
reg [`PERF_CTR_BITS-1:0] perf_loads;
reg [`PERF_CTR_BITS-1:0] perf_stores;
wire perf_icache_req_fire = icache_bus_if.req_valid & icache_bus_if.req_ready;
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid & icache_bus_if.rsp_ready;
wire perf_icache_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready;
wire perf_icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rd_req_fire, perf_dcache_rd_req_fire_r;
wire [DCACHE_NUM_REQS-1:0] perf_dcache_wr_req_fire, perf_dcache_wr_req_fire_r;
wire [DCACHE_NUM_REQS-1:0] perf_dcache_rsp_fire;
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && ~dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
assign perf_dcache_wr_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_data.rw && dcache_bus_if[i].req_ready;
assign perf_dcache_rd_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_ready && ~dcache_bus_if[i].req_data.rw;
assign perf_dcache_wr_req_fire[i] = dcache_bus_if[i].req_valid && dcache_bus_if[i].req_ready && dcache_bus_if[i].req_data.rw;
assign perf_dcache_rsp_fire[i] = dcache_bus_if[i].rsp_valid && dcache_bus_if[i].rsp_ready;
end

View File

@@ -195,19 +195,6 @@ import VX_fpu_pkg::*;
`VX_CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(pipeline_perf_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SCRB_ST : read_data_ro_r = pipeline_perf_if.scb_stalls[31:0];
`VX_CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(pipeline_perf_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_ALU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_ALU][31:0];
`VX_CSR_MPM_ALU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_ALU][`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_LSU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_LSU][31:0];
`VX_CSR_MPM_LSU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_LSU][`PERF_CTR_BITS-1:32]);
`ifdef EXT_F_ENABLE
`VX_CSR_MPM_FPU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_FPU][31:0];
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_FPU][`PERF_CTR_BITS-1:32]);
`else
`VX_CSR_MPM_FPU_ST : read_data_ro_r = '0;
`VX_CSR_MPM_FPU_ST_H : read_data_ro_r = '0;
`endif
`VX_CSR_MPM_SFU_ST : read_data_ro_r = pipeline_perf_if.dsp_stalls[`EX_SFU][31:0];
`VX_CSR_MPM_SFU_ST_H : read_data_ro_r = 32'(pipeline_perf_if.dsp_stalls[`EX_SFU][`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SCRB_ALU : read_data_ro_r = 32'(pipeline_perf_if.scb_uses[`EX_ALU][`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SCRB_ALU_H : read_data_ro_r = pipeline_perf_if.scb_uses[`EX_ALU][31:0];
`ifdef EXT_F_ENABLE
@@ -220,7 +207,7 @@ import VX_fpu_pkg::*;
`VX_CSR_MPM_SCRB_LSU : read_data_ro_r = 32'(pipeline_perf_if.scb_uses[`EX_LSU][`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SCRB_LSU_H : read_data_ro_r = pipeline_perf_if.scb_uses[`EX_LSU][31:0];
`VX_CSR_MPM_SCRB_SFU : read_data_ro_r = 32'(pipeline_perf_if.scb_uses[`EX_SFU][`PERF_CTR_BITS-1:32]);
`VX_CSR_MPM_SCRB_SFU_H : read_data_ro_r = pipeline_perf_if.scb_uses[`EX_SFU][31:0];
`VX_CSR_MPM_SCRB_SFU_H : read_data_ro_r = pipeline_perf_if.scb_uses[`EX_SFU][31:0];
// PERF: memory
`VX_CSR_MPM_IFETCHES : read_data_ro_r = pipeline_perf_if.ifetches[31:0];
`VX_CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(pipeline_perf_if.ifetches[`PERF_CTR_BITS-1:32]);

View File

@@ -84,7 +84,7 @@ module VX_issue #(
.clk (clk),
.reset (dispatch_reset),
`ifdef PERF_ENABLE
.perf_stalls (perf_issue_if.dsp_stalls),
`UNUSED_PIN (perf_stalls),
`endif
.operands_if (operands_if),
.alu_dispatch_if(alu_dispatch_if),

View File

@@ -96,7 +96,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
// detect duplicate addresses
wire lsu_is_dup;
`ifdef LSU_DUP
`ifdef LSU_DUP_ENABLE
if (NUM_LANES > 1) begin
wire [NUM_LANES-2:0] addr_matches;
for (genvar i = 0; i < (NUM_LANES-1); ++i) begin
@@ -304,7 +304,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
assign mem_req_tag = {
execute_if[0].data.uuid, lsu_addr_type, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.rd, execute_if[0].data.op_type, req_align, execute_if[0].data.pid, pkt_waddr
`ifdef LSU_DUP
`ifdef LSU_DUP_ENABLE
, lsu_is_dup
`endif
};
@@ -448,13 +448,13 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
wire [PID_WIDTH-1:0] rsp_pid;
wire rsp_is_dup;
`ifndef LSU_DUP
`ifndef LSU_DUP_ENABLE
assign rsp_is_dup = 0;
`endif
assign {
rsp_uuid, rsp_addr_type, rsp_wid, rsp_tmask_uq, rsp_pc, rsp_rd, rsp_op_type, rsp_align, rsp_pid, pkt_raddr
`ifdef LSU_DUP
`ifdef LSU_DUP_ENABLE
, rsp_is_dup
`endif
} = mem_rsp_tag;

View File

@@ -14,18 +14,17 @@
`include "VX_define.vh"
interface VX_pipeline_perf_if ();
wire [`PERF_CTR_BITS-1:0] sched_idles;
wire [`PERF_CTR_BITS-1:0] sched_stalls;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] scb_uses [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] dsp_stalls [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] sched_idles;
wire [`PERF_CTR_BITS-1:0] sched_stalls;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] scb_uses [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] ifetches;
wire [`PERF_CTR_BITS-1:0] loads;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
wire [`PERF_CTR_BITS-1:0] load_latency;
wire [`PERF_CTR_BITS-1:0] ifetches;
wire [`PERF_CTR_BITS-1:0] loads;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
wire [`PERF_CTR_BITS-1:0] load_latency;
modport schedule (
output sched_idles,
@@ -35,8 +34,7 @@ interface VX_pipeline_perf_if ();
modport issue (
output ibf_stalls,
output scb_stalls,
output scb_uses,
output dsp_stalls
output scb_uses
);
modport slave (
@@ -45,7 +43,6 @@ interface VX_pipeline_perf_if ();
input ibf_stalls,
input scb_stalls,
input scb_uses,
input dsp_stalls,
input ifetches,
input loads,
input stores,