cummulative fixes, RTL uuid trace, texture unit fixes, simx timing fixes
This commit is contained in:
@@ -96,6 +96,7 @@ module VX_alu_unit #(
|
||||
wire alu_ready_in;
|
||||
wire alu_valid_out;
|
||||
wire alu_ready_out;
|
||||
wire [63:0] alu_uuid;
|
||||
wire [`NW_BITS-1:0] alu_wid;
|
||||
wire [`NUM_THREADS-1:0] alu_tmask;
|
||||
wire [31:0] alu_PC;
|
||||
@@ -112,14 +113,14 @@ module VX_alu_unit #(
|
||||
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (alu_ready_in),
|
||||
.data_in ({alu_valid_in, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_valid_out, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
|
||||
.data_in ({alu_valid_in, alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_valid_out, alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
@@ -138,6 +139,7 @@ module VX_alu_unit #(
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
wire [63:0] mul_uuid;
|
||||
wire [`NW_BITS-1:0] mul_wid;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask;
|
||||
wire [31:0] mul_PC;
|
||||
@@ -153,6 +155,7 @@ module VX_alu_unit #(
|
||||
|
||||
// Inputs
|
||||
.alu_op (mul_op),
|
||||
.uuid_in (alu_req_if.uuid),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
@@ -163,6 +166,7 @@ module VX_alu_unit #(
|
||||
|
||||
// Outputs
|
||||
.wid_out (mul_wid),
|
||||
.uuid_out (mul_uuid),
|
||||
.tmask_out (mul_tmask),
|
||||
.PC_out (mul_PC),
|
||||
.rd_out (mul_rd),
|
||||
@@ -184,6 +188,7 @@ module VX_alu_unit #(
|
||||
assign mul_valid_in = alu_req_if.valid && is_mul_op;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out || mul_valid_out;
|
||||
assign alu_commit_if.uuid = alu_valid_out ? alu_uuid : mul_uuid;
|
||||
assign alu_commit_if.wid = alu_valid_out ? alu_wid : mul_wid;
|
||||
assign alu_commit_if.tmask = alu_valid_out ? alu_tmask : mul_tmask;
|
||||
assign alu_commit_if.PC = alu_valid_out ? alu_PC : mul_PC;
|
||||
@@ -201,6 +206,7 @@ module VX_alu_unit #(
|
||||
assign alu_valid_in = alu_req_if.valid;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out;
|
||||
assign alu_commit_if.uuid = alu_uuid;
|
||||
assign alu_commit_if.wid = alu_wid;
|
||||
assign alu_commit_if.tmask = alu_tmask;
|
||||
assign alu_commit_if.PC = alu_PC;
|
||||
@@ -220,8 +226,8 @@ module VX_alu_unit #(
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
dpi_trace("%d: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest);
|
||||
dpi_trace("%d: core%0d-branch: wid=%0d, PC=%0h, taken=%b, dest=%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, alu_commit_if.PC, branch_ctl_if.taken, branch_ctl_if.dest, alu_uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -40,27 +40,35 @@ module VX_commit #(
|
||||
`endif
|
||||
|| gpu_commit_fire;
|
||||
|
||||
wire [`NUM_THREADS-1:0] commit_tmask;
|
||||
assign commit_tmask = alu_commit_fire ? alu_commit_if.tmask:
|
||||
ld_commit_fire ? ld_commit_if.tmask:
|
||||
st_commit_fire ? st_commit_if.tmask:
|
||||
csr_commit_fire ? csr_commit_if.tmask:
|
||||
`ifdef EXT_F_ENABLE
|
||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||
`endif
|
||||
/*gpu_commit_fire ?*/ gpu_commit_if.tmask;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [(6*`NUM_THREADS)-1:0] commit_tmask;
|
||||
`else
|
||||
wire [(5*`NUM_THREADS)-1:0] commit_tmask;
|
||||
`endif
|
||||
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_cnt;
|
||||
`POP_COUNT(commit_cnt, commit_tmask);
|
||||
wire [$clog2($bits(commit_tmask)+1)-1:0] commit_size;
|
||||
|
||||
assign commit_tmask = {
|
||||
{`NUM_THREADS{alu_commit_fire}} & alu_commit_if.tmask,
|
||||
{`NUM_THREADS{ld_commit_fire}} & ld_commit_if.tmask,
|
||||
{`NUM_THREADS{st_commit_fire}} & st_commit_if.tmask,
|
||||
{`NUM_THREADS{csr_commit_fire}} & csr_commit_if.tmask,
|
||||
`ifdef EXT_F_ENABLE
|
||||
{`NUM_THREADS{fpu_commit_fire}} & fpu_commit_if.tmask,
|
||||
`endif
|
||||
{`NUM_THREADS{gpu_commit_fire}} & gpu_commit_if.tmask
|
||||
};
|
||||
|
||||
`POP_COUNT(commit_size, commit_tmask);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + $clog2(`NUM_THREADS+1)),
|
||||
.DATAW (1 + $bits(commit_size)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({commit_fire, commit_cnt}),
|
||||
.data_in ({commit_fire, commit_size}),
|
||||
.data_out ({cmt_to_csr_if.valid, cmt_to_csr_if.commit_size})
|
||||
);
|
||||
|
||||
@@ -90,32 +98,32 @@ module VX_commit #(
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.wb, alu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(alu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", alu_commit_if.uuid);
|
||||
end
|
||||
if (ld_commit_if.valid && ld_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, ld_commit_if.wid, ld_commit_if.PC, ld_commit_if.tmask, ld_commit_if.wb, ld_commit_if.rd);
|
||||
`TRACE_ARRAY1D(ld_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", ld_commit_if.uuid);
|
||||
end
|
||||
if (st_commit_if.valid && st_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d\n", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd);
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d (#%0d)\n", $time, CORE_ID, st_commit_if.wid, st_commit_if.PC, st_commit_if.tmask, st_commit_if.wb, st_commit_if.rd, st_commit_if.uuid);
|
||||
end
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd);
|
||||
`TRACE_ARRAY1D(csr_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", csr_commit_if.uuid);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(fpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", fpu_commit_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
dpi_trace("%d: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.wb, gpu_commit_if.rd);
|
||||
`TRACE_ARRAY1D(gpu_commit_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", gpu_commit_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -171,48 +171,50 @@
|
||||
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||
`define CSR_MPM_GPU_ST 12'hB09
|
||||
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||
// PERF: decode
|
||||
`define CSR_MPM_LOADS 12'hB0A
|
||||
`define CSR_MPM_LOADS_H 12'hB8A
|
||||
`define CSR_MPM_STORES 12'hB0B
|
||||
`define CSR_MPM_STORES_H 12'hB8B
|
||||
`define CSR_MPM_BRANCHES 12'hB0C
|
||||
`define CSR_MPM_BRANCHES_H 12'hB8C
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0A // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8A
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0B // total misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8B
|
||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0C // pipeline stalls
|
||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8C
|
||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0D // core response stalls
|
||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0D // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8D
|
||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0E // read misses
|
||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8E
|
||||
// PERF: dcache
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0E // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8E
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB0F // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB10 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB11 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB12 // bank conflicts stalls
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB13 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB14 // pipeline stalls
|
||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB94
|
||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
|
||||
`define CSR_MPM_DCACHE_READS_H 12'hB8F
|
||||
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
|
||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
|
||||
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
|
||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
|
||||
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
|
||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
|
||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts
|
||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
|
||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
|
||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
|
||||
// PERF: smem
|
||||
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB96
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB97
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
|
||||
`define CSR_MPM_SMEM_READS 12'hB15 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB95
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB16 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB96
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB17 // bank conflicts
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB97
|
||||
// PERF: memory
|
||||
`define CSR_MPM_MEM_READS 12'hB19 // memory reads
|
||||
`define CSR_MPM_MEM_READS_H 12'hB99
|
||||
`define CSR_MPM_MEM_WRITES 12'hB1A // memory writes
|
||||
`define CSR_MPM_MEM_WRITES_H 12'hB9A
|
||||
`define CSR_MPM_MEM_ST 12'hB1B // memory request stalls
|
||||
`define CSR_MPM_MEM_ST_H 12'hB9B
|
||||
`define CSR_MPM_MEM_LAT 12'hB1C // memory latency (total)
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9C
|
||||
`define CSR_MPM_MEM_READS 12'hB18 // memory reads
|
||||
`define CSR_MPM_MEM_READS_H 12'hB98
|
||||
`define CSR_MPM_MEM_WRITES 12'hB19 // memory writes
|
||||
`define CSR_MPM_MEM_WRITES_H 12'hB99
|
||||
`define CSR_MPM_MEM_LAT 12'hB1A // memory latency
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9A
|
||||
// PERF: texunit
|
||||
`define CSR_MPM_TEX_READS 12'hB1B // texture accesses
|
||||
`define CSR_MPM_TEX_READS_H 12'hB9B
|
||||
`define CSR_MPM_TEX_LAT 12'hB1C // texture latency
|
||||
`define CSR_MPM_TEX_LAT_H 12'hB9C
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
@@ -254,12 +256,22 @@
|
||||
`define TEX_STATE_WRAPU 5
|
||||
`define TEX_STATE_WRAPV 6
|
||||
`define TEX_STATE_MIPOFF(lod) (7+(lod))
|
||||
`define NUM_TEX_STATES (`TEX_STATE_MIPOFF(`TEX_LOD_MAX)+1)
|
||||
|
||||
`define NUM_TEX_STATES (7+`TEX_LOD_MAX)
|
||||
`define CSR_TEX_UNIT 12'hFD0
|
||||
|
||||
`define CSR_TEX(unit,state) (12'hFD0 + ((unit) * `NUM_TEX_STATES) + (state))
|
||||
`define CSR_TEX_UNIT(csr) (((csr) - 12'hFD0) / `NUM_TEX_STATES)
|
||||
`define CSR_TEX_STATE(csr) (((csr) - 12'hFD0) % `NUM_TEX_STATES)
|
||||
`define CSR_TEX_STATE_BEGIN 12'hFD1
|
||||
`define CSR_TEX_ADDR (`CSR_TEX_STATE_BEGIN+`TEX_STATE_ADDR)
|
||||
`define CSR_TEX_WIDTH (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WIDTH)
|
||||
`define CSR_TEX_HEIGHT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_HEIGHT)
|
||||
`define CSR_TEX_FORMAT (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FORMAT)
|
||||
`define CSR_TEX_FILTER (`CSR_TEX_STATE_BEGIN+`TEX_STATE_FILTER)
|
||||
`define CSR_TEX_WRAPU (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPU)
|
||||
`define CSR_TEX_WRAPV (`CSR_TEX_STATE_BEGIN+`TEX_STATE_WRAPV)
|
||||
`define CSR_TEX_MIPOFF(lod) (`CSR_TEX_STATE_BEGIN+`TEX_STATE_MIPOFF(lod))
|
||||
`define CSR_TEX_STATE_END (`CSR_TEX_STATE_BEGIN + `NUM_TEX_STATES)
|
||||
|
||||
`define CSR_TEX_STATE(addr) ((addr) - `CSR_TEX_STATE_BEGIN)
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
@@ -7,6 +7,9 @@ module VX_csr_data #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_perf_tex_if.slave perf_tex_if,
|
||||
`endif
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
@@ -22,11 +25,13 @@ module VX_csr_data #(
|
||||
`endif
|
||||
|
||||
input wire read_enable,
|
||||
input wire [63:0] read_uuid,
|
||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||
input wire[`NW_BITS-1:0] read_wid,
|
||||
output wire[31:0] read_data,
|
||||
|
||||
input wire write_enable,
|
||||
input wire [63:0] write_uuid,
|
||||
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire[`NW_BITS-1:0] write_wid,
|
||||
input wire[31:0] write_data,
|
||||
@@ -56,7 +61,7 @@ module VX_csr_data #(
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
if (write_enable) begin
|
||||
@@ -75,11 +80,12 @@ module VX_csr_data #(
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
default: begin
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ASSERT(write_addr >= `CSR_TEX(0,0)
|
||||
&& write_addr < `CSR_TEX(`NUM_TEX_UNITS, 0),
|
||||
("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
`ASSERT((write_addr == `CSR_TEX_UNIT)
|
||||
|| (write_addr >= `CSR_TEX_STATE_BEGIN
|
||||
&& write_addr < `CSR_TEX_STATE_END),
|
||||
("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`else
|
||||
`ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
`ASSERT(~write_enable, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
@@ -152,20 +158,28 @@ module VX_csr_data #(
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`else
|
||||
`CSR_MPM_FPU_ST : read_data_r = '0;
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = '0;
|
||||
`endif
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: decode
|
||||
`CSR_MPM_LOADS : read_data_r = perf_pipeline_if.loads[31:0];
|
||||
`CSR_MPM_LOADS_H : read_data_r = 32'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_STORES : read_data_r = perf_pipeline_if.stores[31:0];
|
||||
`CSR_MPM_STORES_H : read_data_r = 32'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_BRANCHES : read_data_r = perf_pipeline_if.branches[31:0];
|
||||
`CSR_MPM_BRANCHES_H : read_data_r = 32'(perf_pipeline_if.branches[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
@@ -178,26 +192,27 @@ module VX_csr_data #(
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: MEM
|
||||
// PERF: memory
|
||||
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
|
||||
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_ST : read_data_r = perf_memsys_if.mem_stalls[31:0];
|
||||
`CSR_MPM_MEM_ST_H : read_data_r = 32'(perf_memsys_if.mem_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
// PERF: texunit
|
||||
`CSR_MPM_TEX_READS : read_data_r = perf_tex_if.mem_reads[31:0];
|
||||
`CSR_MPM_TEX_READS_H : read_data_r = 32'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_LAT : read_data_r = perf_tex_if.mem_latency[31:0];
|
||||
`CSR_MPM_TEX_LAT_H : read_data_r = 32'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
// PERF: reserved
|
||||
`CSR_MPM_RESERVED : read_data_r = '0;
|
||||
`CSR_MPM_RESERVED_H : read_data_r = '0;
|
||||
@@ -227,7 +242,9 @@ module VX_csr_data #(
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
if (read_addr >= `CSR_TEX(0,0) && read_addr < `CSR_TEX(`NUM_TEX_UNITS,0)) begin
|
||||
if ((read_addr == `CSR_TEX_UNIT)
|
||||
|| (read_addr >= `CSR_TEX_STATE_BEGIN
|
||||
&& read_addr < `CSR_TEX_STATE_END)) begin
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`endif
|
||||
@@ -236,7 +253,7 @@ module VX_csr_data #(
|
||||
endcase
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("invalid CSR read address: %0h", read_addr))
|
||||
`RUNTIME_ASSERT(~read_enable || read_addr_valid_r, ("%t: *** invalid CSR read address: %0h (#%0d)", $time, read_addr, read_uuid))
|
||||
|
||||
assign read_data = read_data_r;
|
||||
|
||||
|
||||
@@ -7,6 +7,9 @@ module VX_csr_unit #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
VX_perf_tex_if.slave perf_tex_if,
|
||||
`endif
|
||||
VX_perf_memsys_if.slave perf_memsys_if,
|
||||
VX_perf_pipeline_if.slave perf_pipeline_if,
|
||||
`endif
|
||||
@@ -29,7 +32,8 @@ module VX_csr_unit #(
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_read_data;
|
||||
wire [31:0] csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
@@ -42,8 +46,11 @@ module VX_csr_unit #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fetch_to_csr_if(fetch_to_csr_if),
|
||||
@@ -54,10 +61,12 @@ module VX_csr_unit #(
|
||||
.tex_csr_if (tex_csr_if),
|
||||
`endif
|
||||
.read_enable (csr_req_if.valid),
|
||||
.read_uuid (csr_req_if.uuid),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_wid (csr_req_if.wid),
|
||||
.read_data (csr_read_data),
|
||||
.write_enable (write_enable),
|
||||
.write_uuid (csr_commit_if.uuid),
|
||||
.write_addr (csr_addr_s1),
|
||||
.write_wid (csr_commit_if.wid),
|
||||
.write_data (csr_updated_data_s1),
|
||||
@@ -101,14 +110,14 @@ module VX_csr_unit #(
|
||||
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({csr_req_valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_commit_if.valid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
|
||||
.data_in ({csr_req_valid, csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_commit_if.valid, csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
@@ -20,6 +20,10 @@ module VX_decode #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if.decode perf_decode_if,
|
||||
`endif
|
||||
|
||||
// inputs
|
||||
VX_ifetch_rsp_if.slave ifetch_rsp_if,
|
||||
|
||||
@@ -57,7 +61,6 @@ module VX_decode #(
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
|
||||
`UNUSED_VAR (rs3)
|
||||
|
||||
@@ -169,7 +172,7 @@ module VX_decode #(
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
@@ -192,7 +195,7 @@ module VX_decode #(
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_F: begin
|
||||
`INST_FENCE: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_mod = `INST_MOD_BITS'(1);
|
||||
end
|
||||
@@ -411,6 +414,7 @@ module VX_decode #(
|
||||
wire wb = use_rd && (| rd_r);
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.uuid = ifetch_rsp_if.uuid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
@@ -439,6 +443,42 @@ module VX_decode #(
|
||||
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_loads_per_cycle;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_stores_per_cycle;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_branches_per_cycle;
|
||||
|
||||
wire [`NUM_THREADS-1:0] perf_loads_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && decode_if.wb}};
|
||||
wire [`NUM_THREADS-1:0] perf_stores_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_LSU && `INST_LSU_IS_MEM(decode_if.op_mod) && ~decode_if.wb}};
|
||||
wire [`NUM_THREADS-1:0] perf_branches_per_mask = decode_if.tmask & {`NUM_THREADS{decode_if.ex_type == `EX_ALU && `INST_ALU_IS_BR(decode_if.op_mod)}};
|
||||
|
||||
`POP_COUNT(perf_loads_per_cycle, perf_loads_per_mask);
|
||||
`POP_COUNT(perf_stores_per_cycle, perf_stores_per_mask);
|
||||
`POP_COUNT(perf_branches_per_cycle, perf_branches_per_mask);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_loads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_stores;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_branches;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_loads <= 0;
|
||||
perf_stores <= 0;
|
||||
perf_branches <= 0;
|
||||
end else begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
perf_loads <= perf_loads + `PERF_CTR_BITS'(perf_loads_per_cycle);
|
||||
perf_stores <= perf_stores + `PERF_CTR_BITS'(perf_stores_per_cycle);
|
||||
perf_branches <= perf_branches + `PERF_CTR_BITS'(perf_branches_per_cycle);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_decode_if.loads = perf_loads;
|
||||
assign perf_decode_if.stores = perf_stores;
|
||||
assign perf_decode_if.branches = perf_branches;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
@@ -446,7 +486,8 @@ module VX_decode #(
|
||||
trace_ex_type(decode_if.ex_type);
|
||||
dpi_trace(", op=");
|
||||
trace_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
dpi_trace(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm);
|
||||
dpi_trace(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b (#%0d)\n",
|
||||
decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.use_PC, decode_if.use_imm, decode_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
`define INST_S 7'b0100011 // store instructions
|
||||
`define INST_I 7'b0010011 // immediate instructions
|
||||
`define INST_R 7'b0110011 // register instructions
|
||||
`define INST_F 7'b0001111 // Fence instructions
|
||||
`define INST_FENCE 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
@@ -155,6 +155,7 @@
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
`define INST_LSU_IS_MEM(x) (3'h0 == x)
|
||||
`define INST_LSU_IS_FENCE(x) (3'h1 == x)
|
||||
`define INST_LSU_IS_PREFETCH(x) (3'h2 == x)
|
||||
|
||||
|
||||
@@ -42,15 +42,15 @@ module VX_dispatch (
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
@@ -63,15 +63,15 @@ module VX_dispatch (
|
||||
wire lsu_is_prefetch = `INST_LSU_IS_PREFETCH(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
|
||||
.OUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, lsu_is_prefetch}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.is_prefetch}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, lsu_is_prefetch}),
|
||||
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data, lsu_req_if.is_prefetch}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
@@ -85,15 +85,15 @@ module VX_dispatch (
|
||||
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
|
||||
.OUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
|
||||
.data_out ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
@@ -105,15 +105,15 @@ module VX_dispatch (
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
@@ -127,15 +127,15 @@ module VX_dispatch (
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.uuid, ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
||||
@@ -75,6 +75,10 @@ module VX_execute #(
|
||||
|
||||
VX_tex_csr_if tex_csr_if();
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_tex_if perf_tex_if();
|
||||
`endif
|
||||
|
||||
VX_cache_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
@@ -165,6 +169,9 @@ module VX_execute #(
|
||||
.clk (clk),
|
||||
.reset (csr_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.perf_memsys_if (perf_memsys_if),
|
||||
.perf_pipeline_if(perf_pipeline_if),
|
||||
`endif
|
||||
@@ -209,6 +216,9 @@ module VX_execute #(
|
||||
.reset (gpu_reset),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.dcache_req_if (tex_dcache_req_if),
|
||||
.dcache_rsp_if (tex_dcache_rsp_if),
|
||||
|
||||
@@ -22,6 +22,7 @@ module VX_fpu_unit #(
|
||||
wire valid_out;
|
||||
wire ready_out;
|
||||
|
||||
wire [63:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
@@ -39,7 +40,7 @@ module VX_fpu_unit #(
|
||||
wire fpuq_pop = valid_out && ready_out;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
@@ -48,8 +49,8 @@ module VX_fpu_unit #(
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.write_data ({fpu_req_if.uuid, fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full),
|
||||
`UNUSED_PIN (empty)
|
||||
@@ -180,14 +181,14 @@ module VX_fpu_unit #(
|
||||
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
|
||||
.data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
.data_in ({valid_out, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
|
||||
.data_out ({fpu_commit_if.valid, fpu_commit_if.uuid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
);
|
||||
|
||||
assign fpu_commit_if.eop = 1'b1;
|
||||
|
||||
@@ -12,6 +12,10 @@ module VX_gpu_unit #(
|
||||
VX_gpu_req_if.slave gpu_req_if,
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_tex_if.master perf_tex_if,
|
||||
`endif
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
VX_tex_csr_if.slave tex_csr_if,
|
||||
@@ -28,12 +32,13 @@ module VX_gpu_unit #(
|
||||
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
|
||||
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
|
||||
|
||||
wire rsp_valid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire rsp_valid;
|
||||
wire [63:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
|
||||
wire [RSP_DATAW-1:0] rsp_data, rsp_data_r;
|
||||
|
||||
@@ -112,6 +117,7 @@ module VX_gpu_unit #(
|
||||
wire is_tex = (gpu_req_if.op_type == `INST_GPU_TEX);
|
||||
|
||||
assign tex_req_if.valid = gpu_req_if.valid && is_tex;
|
||||
assign tex_req_if.uuid = gpu_req_if.uuid;
|
||||
assign tex_req_if.wid = gpu_req_if.wid;
|
||||
assign tex_req_if.tmask = gpu_req_if.tmask;
|
||||
assign tex_req_if.PC = gpu_req_if.PC;
|
||||
@@ -128,6 +134,9 @@ module VX_gpu_unit #(
|
||||
) tex_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_tex_if (perf_tex_if),
|
||||
`endif
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_csr_if (tex_csr_if),
|
||||
.tex_rsp_if (tex_rsp_if),
|
||||
@@ -143,6 +152,7 @@ module VX_gpu_unit #(
|
||||
assign is_warp_ctl = !(is_tex || tex_rsp_if.valid);
|
||||
|
||||
assign rsp_valid = tex_rsp_if.valid || (gpu_req_if.valid && ~is_tex);
|
||||
assign rsp_uuid = tex_rsp_if.valid ? tex_rsp_if.uuid : gpu_req_if.uuid;
|
||||
assign rsp_wid = tex_rsp_if.valid ? tex_rsp_if.wid : gpu_req_if.wid;
|
||||
assign rsp_tmask = tex_rsp_if.valid ? tex_rsp_if.tmask : gpu_req_if.tmask;
|
||||
assign rsp_PC = tex_rsp_if.valid ? tex_rsp_if.PC : gpu_req_if.PC;
|
||||
@@ -161,6 +171,7 @@ module VX_gpu_unit #(
|
||||
assign is_warp_ctl = 1;
|
||||
|
||||
assign rsp_valid = gpu_req_if.valid;
|
||||
assign rsp_uuid = gpu_req_if.uuid;
|
||||
assign rsp_wid = gpu_req_if.wid;
|
||||
assign rsp_tmask = gpu_req_if.tmask;
|
||||
assign rsp_PC = gpu_req_if.PC;
|
||||
@@ -176,14 +187,14 @@ module VX_gpu_unit #(
|
||||
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, rsp_data_r, is_warp_ctl_r})
|
||||
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data, is_warp_ctl}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.uuid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, rsp_data_r, is_warp_ctl_r})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.data = rsp_data_r[(`NUM_THREADS * 32)-1:0];
|
||||
@@ -200,7 +211,7 @@ module VX_gpu_unit #(
|
||||
assign gpu_req_if.ready = ~stall_in;
|
||||
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_uuid, gpu_commit_if.uuid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split.valid);
|
||||
|
||||
@@ -15,7 +15,7 @@ module VX_ibuffer #(
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
|
||||
localparam DATAW = 64 + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
|
||||
localparam ADDRW = $clog2(`IBUF_SIZE+1);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
@@ -168,7 +168,8 @@ module VX_ibuffer #(
|
||||
|
||||
assign decode_if.ready = ~q_full[decode_if.wid];
|
||||
|
||||
assign q_data_in = {decode_if.tmask,
|
||||
assign q_data_in = {decode_if.uuid,
|
||||
decode_if.tmask,
|
||||
decode_if.PC,
|
||||
decode_if.ex_type,
|
||||
decode_if.op_type,
|
||||
@@ -184,7 +185,8 @@ module VX_ibuffer #(
|
||||
|
||||
assign ibuffer_if.valid = deq_valid;
|
||||
assign ibuffer_if.wid = deq_wid;
|
||||
assign {ibuffer_if.tmask,
|
||||
assign {ibuffer_if.uuid,
|
||||
ibuffer_if.tmask,
|
||||
ibuffer_if.PC,
|
||||
ibuffer_if.ex_type,
|
||||
ibuffer_if.op_type,
|
||||
|
||||
@@ -25,35 +25,36 @@ module VX_icache_stage #(
|
||||
localparam OUT_REG = 0;
|
||||
|
||||
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_id;
|
||||
wire [`NW_BITS-1:0] req_tag, rsp_tag;
|
||||
|
||||
`UNUSED_VAR (rsp_req_id)
|
||||
`UNUSED_VAR (rsp_id)
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
assign req_tag = ifetch_req_if.wid;
|
||||
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
|
||||
assign rsp_req_id = icache_rsp_if.tag[`NW_BITS +: `DBG_CACHE_REQ_IDW];
|
||||
assign req_tag = ifetch_req_if.wid;
|
||||
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
|
||||
assign rsp_id = icache_rsp_if.tag[`NW_BITS +: `DBG_CACHE_REQ_IDW];
|
||||
|
||||
wire [63:0] rsp_uuid;
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 + `NUM_THREADS),
|
||||
.DATAW (32 + `NUM_THREADS + 64),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
.wren (icache_req_fire),
|
||||
.waddr (req_tag),
|
||||
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask}),
|
||||
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask, ifetch_req_if.uuid}),
|
||||
.raddr (rsp_tag),
|
||||
.rdata ({rsp_PC, rsp_tmask})
|
||||
.rdata ({rsp_PC, rsp_tmask, rsp_uuid})
|
||||
);
|
||||
|
||||
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC >= `STARTUP_ADDR),
|
||||
("invalid PC=%0h, wid=%0d, tmask=%b", ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask))
|
||||
("%t: *** invalid PC=%0h, wid=%0d, tmask=%b (#%0d)", $time, ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.uuid))
|
||||
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||
@@ -78,35 +79,37 @@ module VX_icache_stage #(
|
||||
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + 64),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({icache_rsp_if.valid, rsp_wid, rsp_tmask, rsp_PC, icache_rsp_if.data}),
|
||||
.data_out ({ifetch_rsp_if.valid, ifetch_rsp_if.wid, ifetch_rsp_if.tmask, ifetch_rsp_if.PC, ifetch_rsp_if.data})
|
||||
.data_in ({icache_rsp_if.valid, rsp_wid, rsp_tmask, rsp_PC, icache_rsp_if.data, rsp_uuid}),
|
||||
.data_out ({ifetch_rsp_if.valid, ifetch_rsp_if.wid, ifetch_rsp_if.tmask, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid})
|
||||
);
|
||||
|
||||
// Can accept new response?
|
||||
assign icache_rsp_if.ready = ~stall_out;
|
||||
|
||||
`SCOPE_ASSIGN (icache_req_fire, icache_req_fire);
|
||||
`SCOPE_ASSIGN (icache_req_wid, ifetch_req_if.wid);
|
||||
`SCOPE_ASSIGN (icache_req_uuid, ifetch_req_if.uuid);
|
||||
`SCOPE_ASSIGN (icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (icache_req_tag, req_tag);
|
||||
|
||||
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (icache_rsp_uuid, rsp_uuid);
|
||||
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_fire) begin
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h, req_id=%0h\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, req_id);
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h, req_id=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, req_id, ifetch_req_if.uuid);
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, req_id=%0h, data=%0h\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, rsp_req_id, ifetch_rsp_if.data);
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, req_id=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, rsp_id, ifetch_rsp_if.data, ifetch_rsp_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -9,7 +9,7 @@ module VX_issue #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_pipeline_if.master perf_pipeline_if,
|
||||
VX_perf_pipeline_if.issue perf_issue_if,
|
||||
`endif
|
||||
|
||||
VX_decode_if.slave decode_if,
|
||||
@@ -38,6 +38,7 @@ module VX_issue #(
|
||||
|
||||
// scoreboard writeback interface
|
||||
assign sboard_wb_if.valid = writeback_if.valid;
|
||||
assign sboard_wb_if.uuid = writeback_if.uuid;
|
||||
assign sboard_wb_if.wid = writeback_if.wid;
|
||||
assign sboard_wb_if.PC = writeback_if.PC;
|
||||
assign sboard_wb_if.rd = writeback_if.rd;
|
||||
@@ -45,6 +46,7 @@ module VX_issue #(
|
||||
|
||||
// scoreboard interface
|
||||
assign scoreboard_if.valid = ibuffer_if.valid && dispatch_if.ready;
|
||||
assign scoreboard_if.uuid = ibuffer_if.uuid;
|
||||
assign scoreboard_if.wid = ibuffer_if.wid;
|
||||
assign scoreboard_if.PC = ibuffer_if.PC;
|
||||
assign scoreboard_if.wb = ibuffer_if.wb;
|
||||
@@ -57,6 +59,7 @@ module VX_issue #(
|
||||
|
||||
// dispatch interface
|
||||
assign dispatch_if.valid = ibuffer_if.valid && scoreboard_if.ready;
|
||||
assign dispatch_if.uuid = ibuffer_if.uuid;
|
||||
assign dispatch_if.wid = ibuffer_if.wid;
|
||||
assign dispatch_if.tmask = ibuffer_if.tmask;
|
||||
assign dispatch_if.PC = ibuffer_if.PC;
|
||||
@@ -121,9 +124,8 @@ module VX_issue #(
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
|
||||
`SCOPE_ASSIGN (issue_wid, ibuffer_if.wid);
|
||||
`SCOPE_ASSIGN (issue_uuid, ibuffer_if.uuid);
|
||||
`SCOPE_ASSIGN (issue_tmask, ibuffer_if.tmask);
|
||||
`SCOPE_ASSIGN (issue_pc, ibuffer_if.PC);
|
||||
`SCOPE_ASSIGN (issue_ex_type, ibuffer_if.ex_type);
|
||||
`SCOPE_ASSIGN (issue_op_type, ibuffer_if.op_type);
|
||||
`SCOPE_ASSIGN (issue_op_mod, ibuffer_if.op_mod);
|
||||
@@ -140,10 +142,9 @@ module VX_issue #(
|
||||
`SCOPE_ASSIGN (gpr_rs1, gpr_rsp_if.rs1_data);
|
||||
`SCOPE_ASSIGN (gpr_rs2, gpr_rsp_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpr_rs3, gpr_rsp_if.rs3_data);
|
||||
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (writeback_uuid, writeback_if.uuid);
|
||||
`SCOPE_ASSIGN (writeback_tmask, writeback_if.tmask);
|
||||
`SCOPE_ASSIGN (writeback_wid, writeback_if.wid);
|
||||
`SCOPE_ASSIGN (writeback_pc, writeback_if.PC);
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
|
||||
@@ -171,40 +172,35 @@ module VX_issue #(
|
||||
perf_fpu_stalls <= 0;
|
||||
`endif
|
||||
end else begin
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
if (decode_if.valid & ~decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (scoreboard_if.valid & !scoreboard_if.ready) begin
|
||||
if (scoreboard_if.valid & ~scoreboard_if.ready) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
|
||||
if (dispatch_if.valid & ~dispatch_if.ready) begin
|
||||
case (dispatch_if.ex_type)
|
||||
`EX_ALU: perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`EX_FPU: perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
|
||||
`endif
|
||||
`EX_LSU: perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
|
||||
`EX_CSR: perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
|
||||
//`EX_GPU:
|
||||
default: perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
|
||||
endcase
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_pipeline_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_pipeline_if.scb_stalls = perf_scb_stalls;
|
||||
assign perf_pipeline_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||
assign perf_issue_if.ibf_stalls = perf_ibf_stalls;
|
||||
assign perf_issue_if.scb_stalls = perf_scb_stalls;
|
||||
assign perf_issue_if.alu_stalls = perf_alu_stalls;
|
||||
assign perf_issue_if.lsu_stalls = perf_lsu_stalls;
|
||||
assign perf_issue_if.csr_stalls = perf_csr_stalls;
|
||||
assign perf_issue_if.gpu_stalls = perf_gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||
assign perf_issue_if.fpu_stalls = perf_fpu_stalls;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
@@ -216,7 +212,7 @@ module VX_issue #(
|
||||
`TRACE_ARRAY1D(alu_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace(", rs2_data=");
|
||||
`TRACE_ARRAY1D(alu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", alu_req_if.uuid);
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, offset=%0h, addr=",
|
||||
@@ -224,13 +220,13 @@ module VX_issue #(
|
||||
`TRACE_ARRAY1D(lsu_req_if.base_addr, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(lsu_req_if.store_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", lsu_req_if.uuid);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
dpi_trace("%d: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=",
|
||||
$time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.addr);
|
||||
`TRACE_ARRAY1D(csr_req_if.rs1_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", csr_req_if.uuid);
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
@@ -241,7 +237,7 @@ module VX_issue #(
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(fpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", fpu_req_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
@@ -252,7 +248,7 @@ module VX_issue #(
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs2_data, `NUM_THREADS);
|
||||
dpi_trace(", rs3_data=");
|
||||
`TRACE_ARRAY1D(gpu_req_if.rs3_data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(" (#%0d)\n", gpu_req_if.uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -21,7 +21,6 @@ module VX_lsu_unit #(
|
||||
);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
|
||||
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
@@ -29,6 +28,7 @@ module VX_lsu_unit #(
|
||||
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire req_valid;
|
||||
wire [63:0] req_uuid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
wire [`NUM_THREADS-1:0][31:0] req_addr;
|
||||
wire [`INST_LSU_BITS-1:0] req_type;
|
||||
@@ -54,16 +54,16 @@ module VX_lsu_unit #(
|
||||
for (genvar i = 0; i < (`NUM_THREADS-1); i++) begin
|
||||
assign addr_matches[i] = (lsu_req_if.base_addr[i+1] == lsu_req_if.base_addr[0]) || ~lsu_req_if.tmask[i+1];
|
||||
end
|
||||
|
||||
wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
// is non-cacheable address
|
||||
wire is_addr_nc = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
// is shared memory address
|
||||
wire is_addr_sm = (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT))
|
||||
& (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT));
|
||||
& (full_addr[i][MEM_ASHIFT +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT));
|
||||
assign lsu_addr_type[i] = {is_addr_nc, is_addr_sm};
|
||||
end else begin
|
||||
assign lsu_addr_type[i] = is_addr_nc;
|
||||
@@ -81,19 +81,20 @@ module VX_lsu_unit #(
|
||||
wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 1 + 1 + 64 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_in),
|
||||
.data_in ({lsu_valid, lsu_is_dup, lsu_req_if.is_prefetch, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_is_prefetch, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
.data_in ({lsu_valid, lsu_is_dup, lsu_req_if.is_prefetch, lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_is_prefetch, req_uuid, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
);
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in && ~fence_wait;
|
||||
|
||||
wire [63:0] rsp_uuid;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [31:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
@@ -146,7 +147,7 @@ module VX_lsu_unit #(
|
||||
wire req_wb2 = req_wb && ~req_is_prefetch;
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
|
||||
.DATAW (64 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
@@ -154,8 +155,8 @@ module VX_lsu_unit #(
|
||||
.write_addr (mbuf_waddr),
|
||||
.acquire_slot (mbuf_push),
|
||||
.read_addr (mbuf_raddr),
|
||||
.write_data ({req_wid, req_pc, req_tmask, req_rd, req_wb2, req_type, req_offset, req_is_dup, req_is_prefetch}),
|
||||
.read_data ({rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup, rsp_is_prefetch}),
|
||||
.write_data ({req_uuid, req_wid, req_pc, req_tmask, req_rd, req_wb2, req_type, req_offset, req_is_dup, req_is_prefetch}),
|
||||
.read_data ({rsp_uuid, rsp_wid, rsp_pc, rsp_tmask, rsp_rd, rsp_wb, rsp_type, rsp_offset, rsp_is_dup, rsp_is_prefetch}),
|
||||
.release_addr (mbuf_raddr),
|
||||
.release_slot (mbuf_pop),
|
||||
.full (mbuf_full),
|
||||
@@ -259,6 +260,7 @@ module VX_lsu_unit #(
|
||||
wire is_store_rsp = req_valid && ~req_wb && dcache_req_ready;
|
||||
|
||||
assign st_commit_if.valid = is_store_rsp;
|
||||
assign st_commit_if.uuid = req_uuid;
|
||||
assign st_commit_if.wid = req_wid;
|
||||
assign st_commit_if.tmask = req_tmask;
|
||||
assign st_commit_if.PC = req_pc;
|
||||
@@ -295,14 +297,14 @@ module VX_lsu_unit #(
|
||||
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
|
||||
.RESETW (1)
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!load_rsp_stall),
|
||||
.data_in ({dcache_rsp_if.valid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
.data_in ({dcache_rsp_if.valid, rsp_uuid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.uuid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
@@ -310,19 +312,19 @@ module VX_lsu_unit #(
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_fire);
|
||||
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
|
||||
`SCOPE_ASSIGN (dcache_req_uuid, req_uuid);
|
||||
`SCOPE_ASSIGN (dcache_req_addr, req_addr);
|
||||
`SCOPE_ASSIGN (dcache_req_rw, ~req_wb);
|
||||
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
|
||||
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_fire}});
|
||||
`SCOPE_ASSIGN (dcache_rsp_uuid, rsp_uuid);
|
||||
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + 64 + 1)-1:0] pending_reqs;
|
||||
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + 64 + 64 + 1)-1:0] pending_reqs;
|
||||
wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -330,7 +332,7 @@ module VX_lsu_unit #(
|
||||
pending_reqs <= '0;
|
||||
end begin
|
||||
if (mbuf_push) begin
|
||||
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, req_rd, $time, 1'b1};
|
||||
pending_reqs[mbuf_waddr] <= {req_wid, req_pc, req_rd, req_uuid, $time, 1'b1};
|
||||
end
|
||||
if (mbuf_pop) begin
|
||||
pending_reqs[mbuf_raddr] <= '0;
|
||||
@@ -340,8 +342,11 @@ module VX_lsu_unit #(
|
||||
for (integer i = 0; i < `LSUQ_SIZE; ++i) begin
|
||||
if (pending_reqs[i][0]) begin
|
||||
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
|
||||
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+32+`NR_BITS +: `NW_BITS], pending_reqs[i][1+64+`NR_BITS +: 32], pending_reqs[i][1+64 +: `NR_BITS]));
|
||||
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+64+32+`NR_BITS +: `NW_BITS],
|
||||
pending_reqs[i][1+64+64+`NR_BITS +: 32],
|
||||
pending_reqs[i][1+64+64 +: `NR_BITS],
|
||||
pending_reqs[i][1+64 +: 64]));
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -360,20 +365,20 @@ module VX_lsu_unit #(
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
dpi_trace(", req_id=%0h\n", req_id);
|
||||
dpi_trace(", (#%0d)\n", req_uuid);
|
||||
end else begin
|
||||
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire, req_id);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup);
|
||||
dpi_trace(", rd=%0d, is_dup=%b (#%0d)\n", req_rd, req_is_dup, req_uuid);
|
||||
end
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, rsp_req_id, mbuf_raddr, rsp_rd);
|
||||
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
dpi_trace(", is_dup=%b\n", rsp_is_dup);
|
||||
dpi_trace(", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -358,19 +358,17 @@ module VX_mem_unit # (
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
||||
`UNUSED_VAR (perf_dcache_if.mem_stalls)
|
||||
`UNUSED_VAR (perf_dcache_if.crsp_stalls)
|
||||
|
||||
assign perf_memsys_if.icache_reads = perf_icache_if.reads;
|
||||
assign perf_memsys_if.icache_read_misses = perf_icache_if.read_misses;
|
||||
assign perf_memsys_if.icache_pipe_stalls = perf_icache_if.pipe_stalls;
|
||||
assign perf_memsys_if.icache_crsp_stalls = perf_icache_if.crsp_stalls;
|
||||
|
||||
assign perf_memsys_if.dcache_reads = perf_dcache_if.reads;
|
||||
assign perf_memsys_if.dcache_writes = perf_dcache_if.writes;
|
||||
assign perf_memsys_if.dcache_read_misses = perf_dcache_if.read_misses;
|
||||
assign perf_memsys_if.dcache_write_misses= perf_dcache_if.write_misses;
|
||||
assign perf_memsys_if.dcache_bank_stalls = perf_dcache_if.bank_stalls;
|
||||
assign perf_memsys_if.dcache_mshr_stalls = perf_dcache_if.mshr_stalls;
|
||||
assign perf_memsys_if.dcache_pipe_stalls = perf_dcache_if.pipe_stalls;
|
||||
assign perf_memsys_if.dcache_crsp_stalls = perf_dcache_if.crsp_stalls;
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
assign perf_memsys_if.smem_reads = perf_smem_if.reads;
|
||||
@@ -382,47 +380,41 @@ end else begin
|
||||
assign perf_memsys_if.smem_bank_stalls = 0;
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_lat_per_cycle;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_lat_per_cycle <= 0;
|
||||
perf_mem_pending_reads <= 0;
|
||||
end else begin
|
||||
perf_mem_lat_per_cycle <= perf_mem_lat_per_cycle +
|
||||
`PERF_CTR_BITS'($signed(2'((mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready) && !(mem_rsp_if.valid && mem_rsp_if.ready)) -
|
||||
2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready))));
|
||||
perf_mem_pending_reads <= perf_mem_pending_reads +
|
||||
`PERF_CTR_BITS'($signed(2'((mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) && !(mem_rsp_if.valid && mem_rsp_if.ready)) -
|
||||
2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw))));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_reads <= 0;
|
||||
perf_mem_writes <= 0;
|
||||
perf_mem_lat <= 0;
|
||||
perf_mem_stalls <= 0;
|
||||
end else begin
|
||||
if (mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) begin
|
||||
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (mem_req_if.valid && mem_req_if.ready && mem_req_if.rw) begin
|
||||
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
if (mem_req_if.valid && !mem_req_if.ready) begin
|
||||
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'd1;
|
||||
end
|
||||
perf_mem_lat <= perf_mem_lat + perf_mem_lat_per_cycle;
|
||||
end
|
||||
perf_mem_lat <= perf_mem_lat + perf_mem_pending_reads;
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_memsys_if.mem_reads = perf_mem_reads;
|
||||
assign perf_memsys_if.mem_writes = perf_mem_writes;
|
||||
assign perf_memsys_if.mem_latency = perf_mem_lat;
|
||||
assign perf_memsys_if.mem_stalls = perf_mem_stalls;
|
||||
assign perf_memsys_if.mem_latency = perf_mem_lat;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -6,6 +6,7 @@ module VX_muldiv (
|
||||
|
||||
// Inputs
|
||||
input wire [`INST_MUL_BITS-1:0] alu_op,
|
||||
input wire [63:0] uuid_in,
|
||||
input wire [`NW_BITS-1:0] wid_in,
|
||||
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||
input wire [31:0] PC_in,
|
||||
@@ -15,6 +16,7 @@ module VX_muldiv (
|
||||
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
|
||||
|
||||
// Outputs
|
||||
output wire [63:0] uuid_out,
|
||||
output wire [`NW_BITS-1:0] wid_out,
|
||||
output wire [`NUM_THREADS-1:0] tmask_out,
|
||||
output wire [31:0] PC_out,
|
||||
@@ -32,6 +34,7 @@ module VX_muldiv (
|
||||
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire [63:0] mul_uuid_out;
|
||||
wire [`NW_BITS-1:0] mul_wid_out;
|
||||
wire [`NUM_THREADS-1:0] mul_tmask_out;
|
||||
wire [31:0] mul_PC_out;
|
||||
@@ -63,15 +66,15 @@ module VX_muldiv (
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}),
|
||||
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result})
|
||||
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result})
|
||||
);
|
||||
|
||||
`else
|
||||
@@ -103,15 +106,15 @@ module VX_muldiv (
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (mul_ready_in),
|
||||
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
|
||||
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||
.data_in ({mul_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
|
||||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||
);
|
||||
|
||||
`endif
|
||||
@@ -119,6 +122,7 @@ module VX_muldiv (
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire [63:0] div_uuid_out;
|
||||
wire [`NW_BITS-1:0] div_wid_out;
|
||||
wire [`NUM_THREADS-1:0] div_tmask_out;
|
||||
wire [31:0] div_PC_out;
|
||||
@@ -147,15 +151,15 @@ module VX_muldiv (
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (div_ready_in),
|
||||
.data_in ({div_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}),
|
||||
.data_out ({div_valid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result})
|
||||
.data_in ({div_valid_in, uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}),
|
||||
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result})
|
||||
);
|
||||
|
||||
assign div_ready_in = div_ready_out || ~div_valid_out;
|
||||
@@ -171,21 +175,21 @@ module VX_muldiv (
|
||||
.WIDTHQ (32),
|
||||
.WIDTHR (32),
|
||||
.LANES (`NUM_THREADS),
|
||||
.TAGW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
|
||||
.TAGW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
|
||||
) divide (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.signed_mode(is_signed_div),
|
||||
.tag_in ({wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
|
||||
.tag_in ({uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
|
||||
.numer (alu_in1),
|
||||
.denom (alu_in2),
|
||||
.quotient (div_result_tmp),
|
||||
.remainder (rem_result_tmp),
|
||||
.ready_out (div_ready_out),
|
||||
.valid_out (div_valid_out),
|
||||
.tag_out ({div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out})
|
||||
.tag_out ({div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out})
|
||||
);
|
||||
|
||||
assign div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp;
|
||||
@@ -195,6 +199,7 @@ module VX_muldiv (
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire rsp_valid = mul_valid_out || div_valid_out;
|
||||
wire [63:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
|
||||
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
|
||||
wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out;
|
||||
@@ -205,14 +210,14 @@ module VX_muldiv (
|
||||
assign stall_out = ~ready_out && valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({valid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
|
||||
.data_in ({rsp_valid, rsp_uuid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({valid_out, uuid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
|
||||
@@ -165,6 +165,9 @@ module VX_pipeline #(
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_decode_if (perf_pipeline_if.decode),
|
||||
`endif
|
||||
.ifetch_rsp_if (ifetch_rsp_if),
|
||||
.decode_if (decode_if),
|
||||
.wstall_if (wstall_if),
|
||||
@@ -180,7 +183,7 @@ module VX_pipeline #(
|
||||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_pipeline_if (perf_pipeline_if),
|
||||
.perf_issue_if (perf_pipeline_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
|
||||
@@ -60,22 +60,22 @@ module VX_scoreboard #(
|
||||
end else begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
dpi_trace("%d: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b\n",
|
||||
dpi_trace("%d: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)\n",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3);
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, ibuffer_if.uuid);
|
||||
end
|
||||
`endif
|
||||
if (release_reg) begin
|
||||
`ASSERT(inuse_regs[writeback_if.wid][writeback_if.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd));
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd,writeback_if.uuid));
|
||||
end
|
||||
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
|
||||
deadlock_ctr <= deadlock_ctr + 1;
|
||||
`ASSERT(deadlock_ctr < deadlock_timeout,
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b (#%0d)",
|
||||
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3));
|
||||
deq_inuse_rd, deq_inuse_rs1, deq_inuse_rs2, deq_inuse_rs3, ibuffer_if.uuid));
|
||||
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
|
||||
deadlock_ctr <= 0;
|
||||
end
|
||||
|
||||
@@ -35,9 +35,9 @@ task trace_ex_op (
|
||||
`INST_BR_JALR: dpi_trace("JALR");
|
||||
`INST_BR_ECALL: dpi_trace("ECALL");
|
||||
`INST_BR_EBREAK:dpi_trace("EBREAK");
|
||||
`INST_BR_MRET: dpi_trace("MRET");
|
||||
`INST_BR_URET: dpi_trace("URET");
|
||||
`INST_BR_SRET: dpi_trace("SRET");
|
||||
`INST_BR_DRET: dpi_trace("DRET");
|
||||
`INST_BR_MRET: dpi_trace("MRET");
|
||||
default: dpi_trace("?");
|
||||
endcase
|
||||
end else if (`INST_ALU_IS_MUL(op_mod)) begin
|
||||
|
||||
@@ -46,6 +46,8 @@ module VX_warp_sched #(
|
||||
wire schedule_valid;
|
||||
wire warp_scheduled;
|
||||
|
||||
reg [63:0] issued_instrs;
|
||||
|
||||
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
|
||||
|
||||
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
|
||||
@@ -62,12 +64,13 @@ module VX_warp_sched #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
barrier_masks <= 0;
|
||||
use_wspawn <= 0;
|
||||
stalled_warps <= 0;
|
||||
barrier_masks <= '0;
|
||||
use_wspawn <= '0;
|
||||
stalled_warps <= '0;
|
||||
warp_pcs <= '0;
|
||||
active_warps <= '0;
|
||||
thread_masks <= '0;
|
||||
issued_instrs <= '0;
|
||||
|
||||
// activate first warp
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
@@ -117,6 +120,8 @@ module VX_warp_sched #(
|
||||
if (use_wspawn[schedule_wid]) begin
|
||||
thread_masks[schedule_wid] <= 1;
|
||||
end
|
||||
|
||||
issued_instrs <= issued_instrs + 1;
|
||||
end
|
||||
|
||||
if (ifetch_req_fire) begin
|
||||
@@ -223,20 +228,23 @@ module VX_warp_sched #(
|
||||
|
||||
assign warp_scheduled = schedule_valid && ~stall_out;
|
||||
|
||||
wire [63:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + 64'(CORE_ID);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.DATAW (1 + 64 + `NUM_THREADS + 32 + `NW_BITS),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({schedule_valid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
.data_in ({schedule_valid, instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
|
||||
.data_out ({ifetch_req_if.valid, ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
assign busy = (active_warps != 0);
|
||||
|
||||
`SCOPE_ASSIGN (wsched_scheduled, warp_scheduled);
|
||||
`SCOPE_ASSIGN (wsched_schedule_uuid, instr_uuid);
|
||||
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
|
||||
`SCOPE_ASSIGN (wsched_stalled_warps, stalled_warps);
|
||||
`SCOPE_ASSIGN (wsched_schedule_wid, schedule_wid);
|
||||
|
||||
@@ -23,17 +23,9 @@ module VX_writeback #(
|
||||
|
||||
localparam DATAW = `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
localparam NUM_RSPS = 5;
|
||||
`else
|
||||
localparam NUM_RSPS = 4;
|
||||
`endif
|
||||
`else
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
localparam NUM_RSPS = 4;
|
||||
`else
|
||||
localparam NUM_RSPS = 3;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
wire wb_valid;
|
||||
@@ -50,9 +42,7 @@ module VX_writeback #(
|
||||
wire stall;
|
||||
|
||||
assign rsp_valid = {
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
gpu_commit_if.valid && gpu_commit_if.wb,
|
||||
`endif
|
||||
csr_commit_if.valid && csr_commit_if.wb,
|
||||
alu_commit_if.valid && alu_commit_if.wb,
|
||||
`ifdef EXT_F_ENABLE
|
||||
@@ -62,9 +52,7 @@ module VX_writeback #(
|
||||
};
|
||||
|
||||
assign rsp_data = {
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
{gpu_commit_if.wid, gpu_commit_if.PC, gpu_commit_if.tmask, gpu_commit_if.rd, gpu_commit_if.data, gpu_commit_if.eop},
|
||||
`endif
|
||||
{csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.rd, csr_commit_if.data, csr_commit_if.eop},
|
||||
{alu_commit_if.wid, alu_commit_if.PC, alu_commit_if.tmask, alu_commit_if.rd, alu_commit_if.data, alu_commit_if.eop},
|
||||
`ifdef EXT_F_ENABLE
|
||||
@@ -88,28 +76,17 @@ module VX_writeback #(
|
||||
.ready_out (~stall)
|
||||
);
|
||||
|
||||
assign ld_commit_if.ready = rsp_ready[0] || ~ld_commit_if.wb;
|
||||
assign ld_commit_if.ready = rsp_ready[0] || ~ld_commit_if.wb;
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_commit_if.ready = rsp_ready[1] || ~fpu_commit_if.wb;
|
||||
assign alu_commit_if.ready = rsp_ready[2] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[3] || ~csr_commit_if.wb;
|
||||
assign gpu_commit_if.ready = rsp_ready[4] || ~gpu_commit_if.wb;
|
||||
`else
|
||||
assign alu_commit_if.ready = rsp_ready[1] || ~alu_commit_if.wb;
|
||||
assign csr_commit_if.ready = rsp_ready[2] || ~csr_commit_if.wb;
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign gpu_commit_if.ready = rsp_ready[4] || ~gpu_commit_if.wb;
|
||||
`else
|
||||
assign gpu_commit_if.ready = rsp_ready[3] || ~gpu_commit_if.wb;
|
||||
`endif
|
||||
`else
|
||||
assign gpu_commit_if.ready = 1;
|
||||
`endif
|
||||
|
||||
assign stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
|
||||
@@ -124,7 +124,8 @@ module VX_to_mem #(
|
||||
end
|
||||
end
|
||||
assign mem_rsp_tag_in_w = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_in;
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_w == mem_rsp_tag_in), ("out-of-order memory reponse! cur=%d, expected=%d", mem_rsp_tag_in_w, mem_rsp_tag_in))
|
||||
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_w == mem_rsp_tag_in),
|
||||
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_w, mem_rsp_tag_in))
|
||||
|
||||
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
|
||||
|
||||
|
||||
2
hw/rtl/cache/VX_bank.sv
vendored
2
hw/rtl/cache/VX_bank.sv
vendored
@@ -48,7 +48,6 @@ module VX_bank #(
|
||||
output wire perf_read_misses,
|
||||
output wire perf_write_misses,
|
||||
output wire perf_mshr_stalls,
|
||||
output wire perf_pipe_stalls,
|
||||
`endif
|
||||
|
||||
// Core Request
|
||||
@@ -470,7 +469,6 @@ module VX_bank #(
|
||||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = do_read_st1 && miss_st1;
|
||||
assign perf_write_misses = do_write_st1 && miss_st1;
|
||||
assign perf_pipe_stalls = crsq_stall || mreq_alm_full || mshr_alm_full;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
|
||||
244
hw/rtl/cache/VX_cache.sv
vendored
244
hw/rtl/cache/VX_cache.sv
vendored
@@ -102,7 +102,6 @@ module VX_cache #(
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -219,37 +218,37 @@ module VX_cache #(
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
wire [NUM_REQS-1:0] core_req_valid_nc;
|
||||
wire [NUM_REQS-1:0] core_req_rw_nc;
|
||||
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_nc;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_nc;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_nc;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_nc;
|
||||
wire [NUM_REQS-1:0] core_req_ready_nc;
|
||||
wire [NUM_REQS-1:0] core_req_valid_c;
|
||||
wire [NUM_REQS-1:0] core_req_rw_c;
|
||||
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_c;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_c;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_c;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_c;
|
||||
wire [NUM_REQS-1:0] core_req_ready_c;
|
||||
|
||||
// Core response
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_nc;
|
||||
wire [NUM_REQS-1:0] core_rsp_tmask_nc;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_nc;
|
||||
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_nc;
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_nc;
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_c;
|
||||
wire [NUM_REQS-1:0] core_rsp_tmask_c;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_c;
|
||||
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_c;
|
||||
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_c;
|
||||
|
||||
// Memory request
|
||||
wire mem_req_valid_nc;
|
||||
wire mem_req_rw_nc;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
|
||||
wire mem_req_ready_nc;
|
||||
wire mem_req_valid_c;
|
||||
wire mem_req_rw_c;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_c;
|
||||
wire [NUM_PORTS-1:0] mem_req_pmask_c;
|
||||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_c;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_c;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_c;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_c;
|
||||
wire mem_req_ready_c;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid_nc;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_nc;
|
||||
wire mem_rsp_ready_nc;
|
||||
wire mem_rsp_valid_c;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_c;
|
||||
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_c;
|
||||
wire mem_rsp_ready_c;
|
||||
|
||||
if (NC_ENABLE) begin
|
||||
VX_nc_bypass #(
|
||||
@@ -280,20 +279,20 @@ module VX_cache #(
|
||||
.core_req_ready_in (core_req_ready),
|
||||
|
||||
// Core request out
|
||||
.core_req_valid_out (core_req_valid_nc),
|
||||
.core_req_rw_out (core_req_rw_nc),
|
||||
.core_req_byteen_out(core_req_byteen_nc),
|
||||
.core_req_addr_out (core_req_addr_nc),
|
||||
.core_req_data_out (core_req_data_nc),
|
||||
.core_req_tag_out (core_req_tag_nc),
|
||||
.core_req_ready_out (core_req_ready_nc),
|
||||
.core_req_valid_out (core_req_valid_c),
|
||||
.core_req_rw_out (core_req_rw_c),
|
||||
.core_req_byteen_out(core_req_byteen_c),
|
||||
.core_req_addr_out (core_req_addr_c),
|
||||
.core_req_data_out (core_req_data_c),
|
||||
.core_req_tag_out (core_req_tag_c),
|
||||
.core_req_ready_out (core_req_ready_c),
|
||||
|
||||
// Core response in
|
||||
.core_rsp_valid_in (core_rsp_valid_nc),
|
||||
.core_rsp_tmask_in (core_rsp_tmask_nc),
|
||||
.core_rsp_data_in (core_rsp_data_nc),
|
||||
.core_rsp_tag_in (core_rsp_tag_nc),
|
||||
.core_rsp_ready_in (core_rsp_ready_nc),
|
||||
.core_rsp_valid_in (core_rsp_valid_c),
|
||||
.core_rsp_tmask_in (core_rsp_tmask_c),
|
||||
.core_rsp_data_in (core_rsp_data_c),
|
||||
.core_rsp_tag_in (core_rsp_tag_c),
|
||||
.core_rsp_ready_in (core_rsp_ready_c),
|
||||
|
||||
// Core response out
|
||||
.core_rsp_valid_out (core_rsp_valid_sb),
|
||||
@@ -303,15 +302,15 @@ module VX_cache #(
|
||||
.core_rsp_ready_out (core_rsp_ready_sb),
|
||||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_nc),
|
||||
.mem_req_rw_in (mem_req_rw_nc),
|
||||
.mem_req_addr_in (mem_req_addr_nc),
|
||||
.mem_req_pmask_in (mem_req_pmask_nc),
|
||||
.mem_req_byteen_in (mem_req_byteen_nc),
|
||||
.mem_req_wsel_in (mem_req_wsel_nc),
|
||||
.mem_req_data_in (mem_req_data_nc),
|
||||
.mem_req_tag_in (mem_req_tag_nc),
|
||||
.mem_req_ready_in (mem_req_ready_nc),
|
||||
.mem_req_valid_in (mem_req_valid_c),
|
||||
.mem_req_rw_in (mem_req_rw_c),
|
||||
.mem_req_addr_in (mem_req_addr_c),
|
||||
.mem_req_pmask_in (mem_req_pmask_c),
|
||||
.mem_req_byteen_in (mem_req_byteen_c),
|
||||
.mem_req_wsel_in (mem_req_wsel_c),
|
||||
.mem_req_data_in (mem_req_data_c),
|
||||
.mem_req_tag_in (mem_req_tag_c),
|
||||
.mem_req_ready_in (mem_req_ready_c),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid_sb),
|
||||
@@ -331,40 +330,40 @@ module VX_cache #(
|
||||
.mem_rsp_ready_in (mem_rsp_ready),
|
||||
|
||||
// Memory response out
|
||||
.mem_rsp_valid_out (mem_rsp_valid_nc),
|
||||
.mem_rsp_data_out (mem_rsp_data_nc),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_nc),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_nc)
|
||||
.mem_rsp_valid_out (mem_rsp_valid_c),
|
||||
.mem_rsp_data_out (mem_rsp_data_c),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_c),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_c)
|
||||
);
|
||||
end else begin
|
||||
assign core_req_valid_nc = core_req_valid;
|
||||
assign core_req_rw_nc = core_req_rw;
|
||||
assign core_req_addr_nc = core_req_addr;
|
||||
assign core_req_byteen_nc = core_req_byteen;
|
||||
assign core_req_data_nc = core_req_data;
|
||||
assign core_req_tag_nc = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_nc;
|
||||
assign core_req_valid_c = core_req_valid;
|
||||
assign core_req_rw_c = core_req_rw;
|
||||
assign core_req_addr_c = core_req_addr;
|
||||
assign core_req_byteen_c = core_req_byteen;
|
||||
assign core_req_data_c = core_req_data;
|
||||
assign core_req_tag_c = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_c;
|
||||
|
||||
assign core_rsp_valid_sb = core_rsp_valid_nc;
|
||||
assign core_rsp_tmask_sb = core_rsp_tmask_nc;
|
||||
assign core_rsp_data_sb = core_rsp_data_nc;
|
||||
assign core_rsp_tag_sb = core_rsp_tag_nc;
|
||||
assign core_rsp_ready_nc = core_rsp_ready_sb;
|
||||
assign core_rsp_valid_sb = core_rsp_valid_c;
|
||||
assign core_rsp_tmask_sb = core_rsp_tmask_c;
|
||||
assign core_rsp_data_sb = core_rsp_data_c;
|
||||
assign core_rsp_tag_sb = core_rsp_tag_c;
|
||||
assign core_rsp_ready_c = core_rsp_ready_sb;
|
||||
|
||||
assign mem_req_valid_sb = mem_req_valid_nc;
|
||||
assign mem_req_addr_sb = mem_req_addr_nc;
|
||||
assign mem_req_rw_p = mem_req_rw_nc;
|
||||
assign mem_req_pmask_p = mem_req_pmask_nc;
|
||||
assign mem_req_byteen_p = mem_req_byteen_nc;
|
||||
assign mem_req_wsel_p = mem_req_wsel_nc;
|
||||
assign mem_req_data_p = mem_req_data_nc;
|
||||
assign mem_req_tag_sb = mem_req_tag_nc;
|
||||
assign mem_req_ready_nc = mem_req_ready_sb;
|
||||
assign mem_req_valid_sb = mem_req_valid_c;
|
||||
assign mem_req_addr_sb = mem_req_addr_c;
|
||||
assign mem_req_rw_p = mem_req_rw_c;
|
||||
assign mem_req_pmask_p = mem_req_pmask_c;
|
||||
assign mem_req_byteen_p = mem_req_byteen_c;
|
||||
assign mem_req_wsel_p = mem_req_wsel_c;
|
||||
assign mem_req_data_p = mem_req_data_c;
|
||||
assign mem_req_tag_sb = mem_req_tag_c;
|
||||
assign mem_req_ready_c = mem_req_ready_sb;
|
||||
|
||||
assign mem_rsp_valid_nc = mem_rsp_valid;
|
||||
assign mem_rsp_data_nc = mem_rsp_data;
|
||||
assign mem_rsp_tag_nc = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_ready_nc;
|
||||
assign mem_rsp_valid_c = mem_rsp_valid;
|
||||
assign mem_rsp_data_c = mem_rsp_data;
|
||||
assign mem_rsp_tag_c = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_ready_c;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -383,15 +382,15 @@ module VX_cache #(
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (mrsq_reset),
|
||||
.ready_in (mem_rsp_ready_nc),
|
||||
.valid_in (mem_rsp_valid_nc),
|
||||
.data_in ({mem_rsp_tag_nc, mem_rsp_data_nc}),
|
||||
.ready_in (mem_rsp_ready_c),
|
||||
.valid_in (mem_rsp_valid_c),
|
||||
.data_in ({mem_rsp_tag_c, mem_rsp_data_c}),
|
||||
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
|
||||
.ready_out (mrsq_out_ready),
|
||||
.valid_out (mrsq_out_valid)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_tag_nc)
|
||||
`UNUSED_VAR (mem_rsp_tag_c)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -464,13 +463,13 @@ module VX_cache #(
|
||||
`ifdef PERF_ENABLE
|
||||
.bank_stalls(perf_cache_if.bank_stalls),
|
||||
`endif
|
||||
.core_req_valid (core_req_valid_nc),
|
||||
.core_req_rw (core_req_rw_nc),
|
||||
.core_req_addr (core_req_addr_nc),
|
||||
.core_req_byteen (core_req_byteen_nc),
|
||||
.core_req_data (core_req_data_nc),
|
||||
.core_req_tag (core_req_tag_nc),
|
||||
.core_req_ready (core_req_ready_nc),
|
||||
.core_req_valid (core_req_valid_c),
|
||||
.core_req_rw (core_req_rw_c),
|
||||
.core_req_addr (core_req_addr_c),
|
||||
.core_req_byteen (core_req_byteen_c),
|
||||
.core_req_data (core_req_data_c),
|
||||
.core_req_tag (core_req_tag_c),
|
||||
.core_req_ready (core_req_ready_c),
|
||||
.per_bank_core_req_valid (per_bank_core_req_valid),
|
||||
.per_bank_core_req_pmask (per_bank_core_req_pmask),
|
||||
.per_bank_core_req_rw (per_bank_core_req_rw),
|
||||
@@ -592,7 +591,6 @@ module VX_cache #(
|
||||
.perf_read_misses (perf_read_miss_per_bank[i]),
|
||||
.perf_write_misses (perf_write_miss_per_bank[i]),
|
||||
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
||||
.perf_pipe_stalls (perf_pipe_stall_per_bank[i]),
|
||||
`endif
|
||||
|
||||
// Core request
|
||||
@@ -655,11 +653,11 @@ module VX_cache #(
|
||||
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
|
||||
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
|
||||
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
|
||||
.core_rsp_valid (core_rsp_valid_nc),
|
||||
.core_rsp_tmask (core_rsp_tmask_nc),
|
||||
.core_rsp_tag (core_rsp_tag_nc),
|
||||
.core_rsp_data (core_rsp_data_nc),
|
||||
.core_rsp_ready (core_rsp_ready_nc)
|
||||
.core_rsp_valid (core_rsp_valid_c),
|
||||
.core_rsp_tmask (core_rsp_tmask_c),
|
||||
.core_rsp_tag (core_rsp_tag_c),
|
||||
.core_rsp_data (core_rsp_data_c),
|
||||
.core_rsp_ready (core_rsp_ready_c)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
||||
@@ -681,15 +679,15 @@ module VX_cache #(
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.data_in (data_in),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.valid_out (mem_req_valid_nc),
|
||||
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
|
||||
.ready_out (mem_req_ready_nc)
|
||||
.valid_out (mem_req_valid_c),
|
||||
.data_out ({mem_req_addr_c, mem_req_id, mem_req_rw_c, mem_req_pmask_c, mem_req_byteen_c, mem_req_wsel_c, mem_req_data_c}),
|
||||
.ready_out (mem_req_ready_c)
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'(mem_req_id);
|
||||
assign mem_req_tag_c = MEM_TAG_IN_WIDTH'(mem_req_id);
|
||||
end else begin
|
||||
assign mem_req_tag_nc = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_nc), mem_req_id});
|
||||
assign mem_req_tag_c = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_c), mem_req_id});
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
@@ -697,12 +695,21 @@ module VX_cache #(
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid_c & core_req_ready_c & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid_c & core_req_ready_c & core_req_rw;
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
@@ -712,23 +719,14 @@ module VX_cache #(
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
|
||||
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
||||
wire [$clog2(NUM_BANKS+1)-1:0] perf_pipe_stall_per_cycle;
|
||||
|
||||
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
||||
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
||||
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
||||
`POP_COUNT(perf_pipe_stall_per_cycle, perf_pipe_stall_per_bank);
|
||||
wire perf_mem_stall_per_cycle = mem_req_valid & ~mem_req_ready;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_pipe_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -738,16 +736,16 @@ module VX_cache #(
|
||||
perf_read_misses <= 0;
|
||||
perf_write_misses <= 0;
|
||||
perf_mshr_stalls <= 0;
|
||||
perf_pipe_stalls <= 0;
|
||||
perf_mem_stalls <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses+ `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + `PERF_CTR_BITS'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
||||
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -756,7 +754,7 @@ module VX_cache #(
|
||||
assign perf_cache_if.read_misses = perf_read_misses;
|
||||
assign perf_cache_if.write_misses = perf_write_misses;
|
||||
assign perf_cache_if.mshr_stalls = perf_mshr_stalls;
|
||||
assign perf_cache_if.pipe_stalls = perf_pipe_stalls;
|
||||
assign perf_cache_if.mem_stalls = perf_mem_stalls;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
|
||||
25
hw/rtl/cache/VX_shared_mem.sv
vendored
25
hw/rtl/cache/VX_shared_mem.sv
vendored
@@ -335,21 +335,13 @@ module VX_shared_mem #(
|
||||
// per cycle: core_reads, core_writes
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
||||
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
||||
|
||||
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid & core_req_ready & ~core_req_rw;
|
||||
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid & core_req_ready & core_req_rw;
|
||||
|
||||
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
||||
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
||||
|
||||
if (CORE_TAG_ID_BITS != 0) begin
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end else begin
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
||||
end
|
||||
wire perf_crsp_stall_per_cycle = core_rsp_valid & ~core_rsp_ready;
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
||||
@@ -357,13 +349,13 @@ module VX_shared_mem #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_core_reads <= 0;
|
||||
perf_core_writes <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
perf_core_reads <= 0;
|
||||
perf_core_writes <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -371,7 +363,8 @@ module VX_shared_mem #(
|
||||
assign perf_cache_if.writes = perf_core_writes;
|
||||
assign perf_cache_if.read_misses = '0;
|
||||
assign perf_cache_if.write_misses = '0;
|
||||
assign perf_cache_if.pipe_stalls = '0;
|
||||
assign perf_cache_if.mshr_stalls = '0;
|
||||
assign perf_cache_if.mem_stalls = '0;
|
||||
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
|
||||
interface VX_alu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -24,6 +25,7 @@ interface VX_alu_req_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -43,6 +45,7 @@ interface VX_alu_req_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -5,9 +5,12 @@
|
||||
|
||||
interface VX_cmt_to_csr_if ();
|
||||
|
||||
wire valid;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
|
||||
|
||||
wire valid;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [$clog2(6*`NUM_THREADS+1)-1:0] commit_size;
|
||||
`else
|
||||
wire [$clog2(5*`NUM_THREADS+1)-1:0] commit_size;
|
||||
`endif
|
||||
modport master (
|
||||
output valid,
|
||||
output commit_size
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_commit_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -17,6 +18,7 @@ interface VX_commit_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -29,6 +31,7 @@ interface VX_commit_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_csr_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -20,6 +21,7 @@ interface VX_csr_req_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -35,6 +37,7 @@ interface VX_csr_req_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_decode_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -23,7 +24,8 @@ interface VX_decode_if ();
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -42,7 +44,8 @@ interface VX_decode_if ();
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_fpu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -20,6 +21,7 @@ interface VX_fpu_req_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -35,6 +37,7 @@ interface VX_fpu_req_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
interface VX_gpu_req_if();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -19,11 +19,11 @@ interface VX_gpu_req_if();
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -41,6 +41,7 @@ interface VX_gpu_req_if();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_ibuffer_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -31,6 +32,7 @@ interface VX_ibuffer_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -55,6 +57,7 @@ interface VX_ibuffer_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -5,14 +5,16 @@
|
||||
|
||||
interface VX_ifetch_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output valid,
|
||||
output uuid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
@@ -20,7 +22,8 @@ interface VX_ifetch_req_if ();
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input valid,
|
||||
input uuid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_ifetch_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
@@ -13,7 +14,8 @@ interface VX_ifetch_rsp_if ();
|
||||
wire ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output valid,
|
||||
output uuid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
@@ -22,7 +24,8 @@ interface VX_ifetch_rsp_if ();
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input valid,
|
||||
input uuid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_lsu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -21,6 +22,7 @@ interface VX_lsu_req_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -37,6 +39,7 @@ interface VX_lsu_req_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -11,7 +11,7 @@ interface VX_perf_cache_if ();
|
||||
wire [`PERF_CTR_BITS-1:0] write_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] bank_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] mshr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] crsp_stalls;
|
||||
|
||||
modport master (
|
||||
@@ -21,7 +21,7 @@ interface VX_perf_cache_if ();
|
||||
output write_misses,
|
||||
output bank_stalls,
|
||||
output mshr_stalls,
|
||||
output pipe_stalls,
|
||||
output mem_stalls,
|
||||
output crsp_stalls
|
||||
);
|
||||
|
||||
@@ -32,7 +32,7 @@ interface VX_perf_cache_if ();
|
||||
input write_misses,
|
||||
input bank_stalls,
|
||||
input mshr_stalls,
|
||||
input pipe_stalls,
|
||||
input mem_stalls,
|
||||
input crsp_stalls
|
||||
);
|
||||
|
||||
|
||||
@@ -7,68 +7,50 @@ interface VX_perf_memsys_if ();
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] icache_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_read_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] icache_crsp_stalls;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_read_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_write_misses;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_bank_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_mshr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_pipe_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] dcache_crsp_stalls;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] smem_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] smem_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] smem_bank_stalls;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] mem_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_writes;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_latency;
|
||||
|
||||
modport master (
|
||||
output icache_reads,
|
||||
output icache_read_misses,
|
||||
output icache_pipe_stalls,
|
||||
output icache_crsp_stalls,
|
||||
output dcache_reads,
|
||||
output dcache_writes,
|
||||
output dcache_writes,
|
||||
output dcache_read_misses,
|
||||
output dcache_write_misses,
|
||||
output dcache_bank_stalls,
|
||||
output dcache_mshr_stalls,
|
||||
output dcache_pipe_stalls,
|
||||
output dcache_crsp_stalls,
|
||||
output smem_reads,
|
||||
output smem_writes,
|
||||
output smem_bank_stalls,
|
||||
output mem_reads,
|
||||
output mem_writes,
|
||||
output mem_stalls,
|
||||
output mem_latency
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input icache_reads,
|
||||
input icache_read_misses,
|
||||
input icache_pipe_stalls,
|
||||
input icache_crsp_stalls,
|
||||
input dcache_reads,
|
||||
input dcache_writes,
|
||||
input dcache_writes,
|
||||
input dcache_read_misses,
|
||||
input dcache_write_misses,
|
||||
input dcache_bank_stalls,
|
||||
input dcache_mshr_stalls,
|
||||
input dcache_pipe_stalls,
|
||||
input dcache_crsp_stalls,
|
||||
input smem_reads,
|
||||
input smem_writes,
|
||||
input smem_bank_stalls,
|
||||
input mem_reads,
|
||||
input mem_writes,
|
||||
input mem_stalls,
|
||||
input mem_latency
|
||||
);
|
||||
|
||||
|
||||
@@ -4,18 +4,27 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] csr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] alu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] loads;
|
||||
wire [`PERF_CTR_BITS-1:0] stores;
|
||||
wire [`PERF_CTR_BITS-1:0] branches;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] csr_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] alu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
|
||||
`endif
|
||||
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
|
||||
|
||||
modport master (
|
||||
modport decode (
|
||||
output loads,
|
||||
output stores,
|
||||
output branches
|
||||
);
|
||||
|
||||
modport issue (
|
||||
output ibf_stalls,
|
||||
output scb_stalls,
|
||||
output lsu_stalls,
|
||||
@@ -25,9 +34,12 @@ interface VX_perf_pipeline_if ();
|
||||
output fpu_stalls,
|
||||
`endif
|
||||
output gpu_stalls
|
||||
);
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input loads,
|
||||
input stores,
|
||||
input branches,
|
||||
input ibf_stalls,
|
||||
input scb_stalls,
|
||||
input lsu_stalls,
|
||||
|
||||
23
hw/rtl/interfaces/VX_perf_tex_if.sv
Normal file
23
hw/rtl/interfaces/VX_perf_tex_if.sv
Normal file
@@ -0,0 +1,23 @@
|
||||
`ifndef VX_PERF_TEX_IF
|
||||
`define VX_PERF_TEX_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_tex_if ();
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] mem_reads;
|
||||
wire [`PERF_CTR_BITS-1:0] mem_latency;
|
||||
|
||||
modport master (
|
||||
output mem_reads,
|
||||
output mem_latency
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input mem_reads,
|
||||
input mem_latency
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_tex_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -20,6 +21,7 @@ interface VX_tex_req_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -33,6 +35,7 @@ interface VX_tex_req_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_tex_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
@@ -16,6 +17,7 @@ interface VX_tex_rsp_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output wid,
|
||||
output tmask,
|
||||
output PC,
|
||||
@@ -27,6 +29,7 @@ interface VX_tex_rsp_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input wid,
|
||||
input tmask,
|
||||
input PC,
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
interface VX_writeback_if ();
|
||||
|
||||
wire valid;
|
||||
wire [63:0] uuid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
@@ -16,6 +17,7 @@ interface VX_writeback_if ();
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output uuid,
|
||||
output tmask,
|
||||
output wid,
|
||||
output PC,
|
||||
@@ -27,6 +29,7 @@ interface VX_writeback_if ();
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input uuid,
|
||||
input tmask,
|
||||
input wid,
|
||||
input PC,
|
||||
|
||||
@@ -125,7 +125,7 @@ module VX_axi_adapter #(
|
||||
|
||||
// AXI write response channel
|
||||
`UNUSED_VAR (m_axi_bid);
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("AXI response error"));
|
||||
`RUNTIME_ASSERT(~m_axi_bvalid || m_axi_bresp == 0, ("%t: *** AXI response error", $time));
|
||||
assign m_axi_bready = 1'b1;
|
||||
|
||||
// AXI read request channel
|
||||
@@ -144,7 +144,7 @@ module VX_axi_adapter #(
|
||||
assign mem_rsp_valid = m_axi_rvalid;
|
||||
assign mem_rsp_tag = m_axi_rid;
|
||||
assign mem_rsp_data = m_axi_rdata;
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("AXI response error"));
|
||||
`RUNTIME_ASSERT(~m_axi_rvalid || m_axi_rresp == 0, ("%t: *** AXI response error", $time));
|
||||
`UNUSED_VAR (m_axi_rlast);
|
||||
assign m_axi_rready = mem_rsp_ready;
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ module VX_index_queue #(
|
||||
assign enqueue = push;
|
||||
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
|
||||
|
||||
`RUNTIME_ASSERT(!push || !full, ("invalid inputs"));
|
||||
`RUNTIME_ASSERT(!push || !full, ("%t: *** invalid inputs", $time));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
||||
@@ -4,12 +4,17 @@
|
||||
module VX_popcount #(
|
||||
parameter MODEL = 1,
|
||||
parameter N = 1,
|
||||
parameter LOGN = $clog2(N),
|
||||
parameter M = LOGN+1
|
||||
parameter M = $clog2(N+1)
|
||||
) (
|
||||
input wire [N-1:0] in_i,
|
||||
output wire [M-1:0] cnt_o
|
||||
);
|
||||
`ifndef SYNTHESIS
|
||||
assign cnt_o = $countones(in_i);
|
||||
`else
|
||||
`ifdef QUARTUS
|
||||
assign cnt_o = $countones(in_i);
|
||||
`else
|
||||
if (N == 1) begin
|
||||
|
||||
assign cnt_o = in_i;
|
||||
@@ -53,6 +58,8 @@ module VX_popcount #(
|
||||
assign cnt_o = cnt_r;
|
||||
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
@@ -30,7 +30,7 @@ module VX_skid_buffer #(
|
||||
|
||||
end else if (NOBACKPRESSURE) begin
|
||||
|
||||
`RUNTIME_ASSERT(ready_out, ("ready_out should always be asserted"))
|
||||
`RUNTIME_ASSERT(ready_out, ("%t: *** ready_out should always be asserted", $time))
|
||||
|
||||
wire stall = valid_out && ~ready_out;
|
||||
|
||||
|
||||
@@ -6,6 +6,11 @@ module VX_tex_unit #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_tex_if.master perf_tex_if,
|
||||
`endif
|
||||
|
||||
// Texture unit <-> Memory Unit
|
||||
VX_dcache_req_if.master dcache_req_if,
|
||||
VX_dcache_rsp_if.slave dcache_rsp_if,
|
||||
@@ -18,10 +23,11 @@ module VX_tex_unit #(
|
||||
VX_tex_rsp_if.master tex_rsp_if
|
||||
);
|
||||
|
||||
localparam REQ_INFOW_S = `NR_BITS + 1 + `NW_BITS + 32;
|
||||
localparam REQ_INFOW_S = 64 + `NR_BITS + 1 + `NW_BITS + 32;
|
||||
localparam REQ_INFOW_A = `TEX_FORMAT_BITS + REQ_INFOW_S;
|
||||
localparam REQ_INFOW_M = (2 * `NUM_THREADS * `TEX_BLEND_FRAC) + REQ_INFOW_A;
|
||||
|
||||
reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit;
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
|
||||
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
|
||||
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
|
||||
@@ -29,57 +35,60 @@ module VX_tex_unit #(
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
|
||||
|
||||
// CSRs programming
|
||||
// CSRs programming
|
||||
|
||||
reg [`NUM_TEX_UNITS-1:0] csrs_dirty;
|
||||
reg csrs_dirty [`NUM_TEX_UNITS-1:0];
|
||||
`UNUSED_VAR (csrs_dirty)
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX(i, `TEX_STATE_ADDR) : begin
|
||||
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_FORMAT) : begin
|
||||
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_WRAPU) : begin
|
||||
tex_wraps[i][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_WRAPV) : begin
|
||||
tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_FILTER) : begin
|
||||
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_WIDTH) : begin
|
||||
tex_logdims[i][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_HEIGHT) : begin
|
||||
tex_logdims[i][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
if (tex_csr_if.write_addr == `CSR_ADDR_BITS'(`CSR_TEX(i, `TEX_STATE_MIPOFF(j)))) begin
|
||||
`IGNORE_WARNINGS_END
|
||||
tex_mipoff[i][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_UNIT: begin
|
||||
csr_tex_unit <= tex_csr_if.write_data[$clog2(`NUM_TEX_UNITS)-1:0];
|
||||
end
|
||||
`CSR_TEX_ADDR: begin
|
||||
tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_FORMAT: begin
|
||||
tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_WRAPU: begin
|
||||
tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_WRAPV: begin
|
||||
tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_FILTER: begin
|
||||
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_WIDTH: begin
|
||||
tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
`CSR_TEX_HEIGHT: begin
|
||||
tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
default: begin
|
||||
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin
|
||||
`IGNORE_WARNINGS_END
|
||||
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
csrs_dirty[csr_tex_unit] <= 1;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
if (reset || (tex_req_if.valid && tex_req_if.ready)) begin
|
||||
csrs_dirty[i] <= '0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
if (reset || (tex_req_if.valid && tex_req_if.ready)) begin
|
||||
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
csrs_dirty[i] <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -125,7 +134,7 @@ module VX_tex_unit #(
|
||||
.req_baseaddr(tex_baddr[tex_req_if.unit]),
|
||||
.req_mipoff (sel_mipoff),
|
||||
.req_logdims(sel_logdims),
|
||||
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
|
||||
.req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
|
||||
.req_ready (tex_req_if.ready),
|
||||
|
||||
.rsp_valid (mem_req_valid),
|
||||
@@ -204,9 +213,47 @@ module VX_tex_unit #(
|
||||
.rsp_valid (tex_rsp_if.valid),
|
||||
.rsp_tmask (tex_rsp_if.tmask),
|
||||
.rsp_data (tex_rsp_if.data),
|
||||
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
|
||||
.rsp_info ({tex_rsp_if.uuid, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
|
||||
.rsp_ready (tex_rsp_if.ready)
|
||||
);
|
||||
);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle;
|
||||
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_rsp_per_cycle;
|
||||
|
||||
wire [`NUM_THREADS-1:0] perf_mem_req_per_mask = dcache_req_if.valid & dcache_req_if.ready;
|
||||
wire [`NUM_THREADS-1:0] perf_mem_rsp_per_mask = dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_if.valid & dcache_rsp_if.ready}};
|
||||
|
||||
`POP_COUNT(perf_mem_req_per_cycle, perf_mem_req_per_mask);
|
||||
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_mask);
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_pending_reads;
|
||||
wire [$clog2(`NUM_THREADS+1)+1-1:0] perf_pending_reads_cycle = perf_mem_req_per_cycle - perf_mem_rsp_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_pending_reads <= 0;
|
||||
end else begin
|
||||
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'($signed(perf_pending_reads_cycle));
|
||||
end
|
||||
end
|
||||
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
|
||||
reg [`PERF_CTR_BITS-1:0] perf_mem_latency;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_mem_reads <= 0;
|
||||
perf_mem_latency <= 0;
|
||||
end else begin
|
||||
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'(perf_mem_req_per_cycle);
|
||||
perf_mem_latency <= perf_mem_latency + `PERF_CTR_BITS'(perf_pending_reads);
|
||||
end
|
||||
end
|
||||
|
||||
assign perf_tex_if.mem_reads = perf_mem_reads;
|
||||
assign perf_tex_if.mem_latency = perf_mem_latency;
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_TEX
|
||||
always @(posedge clk) begin
|
||||
|
||||
@@ -123,9 +123,9 @@
|
||||
"!cci_pending_writes_full": 1,
|
||||
"?afu_mem_req_fire": 1,
|
||||
"afu_mem_req_addr": 26,
|
||||
"afu_mem_req_tag": 27,
|
||||
"afu_mem_req_tag": "`VX_MEM_TAG_WIDTH+1",
|
||||
"?afu_mem_rsp_fire": 1,
|
||||
"afu_mem_rsp_tag": 27
|
||||
"afu_mem_rsp_tag": "`VX_MEM_TAG_WIDTH+1"
|
||||
},
|
||||
"afu/vortex": {
|
||||
"!reset": 1,
|
||||
@@ -140,49 +140,29 @@
|
||||
"mem_rsp_tag":"`VX_MEM_TAG_WIDTH",
|
||||
"busy": 1
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
|
||||
"?icache_req_fire": 1,
|
||||
"icache_req_wid":"`NW_BITS",
|
||||
"icache_req_addr": 32,
|
||||
"icache_req_tag":"`ICACHE_CORE_TAG_ID_BITS",
|
||||
"?icache_rsp_fire": 1,
|
||||
"icache_rsp_data": 32,
|
||||
"icache_rsp_tag":"`ICACHE_CORE_TAG_ID_BITS"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/fetch/warp_sched": {
|
||||
"?wsched_scheduled": 1,
|
||||
"wsched_schedule_uuid": 64,
|
||||
"wsched_active_warps": "`NUM_WARPS",
|
||||
"wsched_stalled_warps": "`NUM_WARPS",
|
||||
"wsched_schedule_tmask": "`NUM_THREADS",
|
||||
"wsched_schedule_wid": "`NW_BITS",
|
||||
"wsched_schedule_pc": "32"
|
||||
"wsched_schedule_pc": 32
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
|
||||
"?gpu_rsp_valid": 1,
|
||||
"gpu_rsp_wid": "`NW_BITS",
|
||||
"gpu_rsp_tmc": 1,
|
||||
"gpu_rsp_wspawn": 1,
|
||||
"gpu_rsp_split": 1,
|
||||
"gpu_rsp_barrier": 1
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
|
||||
"?dcache_req_fire":"`NUM_THREADS",
|
||||
"dcache_req_wid":"`NW_BITS",
|
||||
"dcache_req_pc": 32,
|
||||
"dcache_req_addr":"`NUM_THREADS * 32",
|
||||
"dcache_req_rw": 1,
|
||||
"dcache_req_byteen":"`NUM_THREADS * 4",
|
||||
"dcache_req_data": "`NUM_THREADS * 32",
|
||||
"dcache_req_tag":"`LSUQ_ADDR_BITS",
|
||||
"?dcache_rsp_fire":"`NUM_THREADS",
|
||||
"dcache_rsp_data":"`NUM_THREADS * 32",
|
||||
"dcache_rsp_tag":"`LSUQ_ADDR_BITS"
|
||||
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
|
||||
"?icache_req_fire": 1,
|
||||
"icache_req_uuid": 64,
|
||||
"icache_req_addr": 32,
|
||||
"icache_req_tag":"`ICACHE_CORE_TAG_ID_BITS",
|
||||
"?icache_rsp_fire": 1,
|
||||
"icache_rsp_uuid": 64,
|
||||
"icache_rsp_data": 32,
|
||||
"icache_rsp_tag":"`ICACHE_CORE_TAG_ID_BITS"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/issue": {
|
||||
"?issue_fire": 1,
|
||||
"issue_wid":"`NW_BITS",
|
||||
"issue_tmask":"`NUM_THREADS",
|
||||
"issue_pc": 32,
|
||||
"issue_uuid": 64,
|
||||
"issue_tmask":"`NUM_THREADS",
|
||||
"issue_ex_type":"`EX_BITS",
|
||||
"issue_op_type":"`INST_OP_BITS",
|
||||
"issue_op_mod":"`INST_MOD_BITS",
|
||||
@@ -198,15 +178,35 @@
|
||||
"gpr_rs2":"`NUM_THREADS * 32",
|
||||
"gpr_rs3":"`NUM_THREADS * 32",
|
||||
"?writeback_valid": 1,
|
||||
"writeback_wid":"`NW_BITS",
|
||||
"writeback_pc": 32,
|
||||
"writeback_uuid": 64,
|
||||
"writeback_tmask":"`NUM_THREADS",
|
||||
"writeback_rd":"`NR_BITS",
|
||||
"writeback_data":"`NUM_THREADS * 32",
|
||||
"writeback_eop": 1,
|
||||
"!scoreboard_delay": 1,
|
||||
"!dispatch_delay": 1
|
||||
},
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
|
||||
"?dcache_req_fire":"`NUM_THREADS",
|
||||
"dcache_req_uuid": 64,
|
||||
"dcache_req_addr":"`NUM_THREADS * 32",
|
||||
"dcache_req_rw": 1,
|
||||
"dcache_req_byteen":"`NUM_THREADS * 4",
|
||||
"dcache_req_data":"`NUM_THREADS * 32",
|
||||
"dcache_req_tag":"`LSUQ_ADDR_BITS",
|
||||
"?dcache_rsp_fire":"`NUM_THREADS",
|
||||
"dcache_rsp_uuid": 64,
|
||||
"dcache_rsp_data":"`NUM_THREADS * 32",
|
||||
"dcache_rsp_tag":"`LSUQ_ADDR_BITS"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
|
||||
"?gpu_rsp_valid": 1,
|
||||
"gpu_rsp_uuid": 64,
|
||||
"gpu_rsp_tmc": 1,
|
||||
"gpu_rsp_wspawn": 1,
|
||||
"gpu_rsp_split": 1,
|
||||
"gpu_rsp_barrier": 1
|
||||
},
|
||||
"afu/vortex/l3cache/bank, afu/vortex/cluster/l2cache/bank, afu/vortex/cluster/core/mem_unit/dcache/bank, afu/vortex/cluster/core/mem_unit/icache/bank": {
|
||||
"?valid_st0": 1,
|
||||
"?valid_st1": 1,
|
||||
|
||||
Reference in New Issue
Block a user