fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,
This commit is contained in:
@@ -236,18 +236,30 @@
|
||||
|
||||
////////// Texture Units //////////////////////////////////////////////////////
|
||||
|
||||
`define NUM_TEX_UNITS 2
|
||||
`define NUM_TEX_UNITS 2
|
||||
`define TEX_SUBPIXEL_BITS 8
|
||||
|
||||
`define CSR_TEX_STATES 7
|
||||
`define CSR_TEX_BEGIN(x) (12'hFD0 + (x) * `CSR_TEX_STATES)
|
||||
`define TEX_DIM_BITS 15
|
||||
`define TEX_LOD_MAX `TEX_DIM_BITS
|
||||
`define TEX_LOD_BITS 4
|
||||
|
||||
`define CSR_TEX_ADDR(x) (`CSR_TEX_BEGIN(x) + 12'h00)
|
||||
`define CSR_TEX_FORMAT(x) (`CSR_TEX_BEGIN(x) + 12'h01)
|
||||
`define CSR_TEX_WRAP(x) (`CSR_TEX_BEGIN(x) + 12'h02)
|
||||
`define CSR_TEX_FILTER(x) (`CSR_TEX_BEGIN(x) + 12'h03)
|
||||
`define CSR_TEX_MIPOFF(x) (`CSR_TEX_BEGIN(x) + 12'h04)
|
||||
`define CSR_TEX_WIDTH(x) (`CSR_TEX_BEGIN(x) + 12'h05)
|
||||
`define CSR_TEX_HEIGHT(x) (`CSR_TEX_BEGIN(x) + 12'h06)
|
||||
`define TEX_FXD_BITS 32
|
||||
`define TEX_FXD_FRAC (`TEX_DIM_BITS+`TEX_SUBPIXEL_BITS)
|
||||
|
||||
`define TEX_STATE_ADDR 0
|
||||
`define TEX_STATE_WIDTH 1
|
||||
`define TEX_STATE_HEIGHT 2
|
||||
`define TEX_STATE_FORMAT 3
|
||||
`define TEX_STATE_FILTER 4
|
||||
`define TEX_STATE_WRAPU 5
|
||||
`define TEX_STATE_WRAPV 6
|
||||
`define TEX_STATE_MIPOFF(lod) (7+(lod))
|
||||
|
||||
`define NUM_TEX_STATES (7+`TEX_LOD_MAX)
|
||||
|
||||
`define CSR_TEX(unit,state) (12'hFD0 + ((unit) * `NUM_TEX_STATES) + (state))
|
||||
`define CSR_TEX_UNIT(csr) (((csr) - 12'hFD0) / `NUM_TEX_STATES)
|
||||
`define CSR_TEX_STATE(csr) (((csr) - 12'hFD0) % `NUM_TEX_STATES)
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -266,6 +278,11 @@
|
||||
`define FPUQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Texture Unit Request Queue
|
||||
`ifndef TEXQ_SIZE
|
||||
`define TEXQ_SIZE (`NUM_WARPS * 2)
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
|
||||
@@ -50,35 +50,40 @@ module VX_csr_data #(
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FFLAGS_BITS-1:0] fcsr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (reset) begin
|
||||
fcsr <= '0;
|
||||
end
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
default: begin
|
||||
`ASSERT(write_addr >= `CSR_TEX_BEGIN(0)
|
||||
&& write_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES),
|
||||
("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
end
|
||||
endcase
|
||||
end else begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_to_csr_if.write_enable) begin
|
||||
fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFLAGS_BITS-1:0]
|
||||
| fpu_to_csr_if.write_fflags;
|
||||
end
|
||||
`endif
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: fcsr[write_wid][`FFLAGS_BITS-1:0] <= write_data[`FFLAGS_BITS-1:0];
|
||||
`CSR_FRM: fcsr[write_wid][`INST_FRM_BITS+`FFLAGS_BITS-1:`FFLAGS_BITS] <= write_data[`INST_FRM_BITS-1:0];
|
||||
`CSR_FCSR: fcsr[write_wid] <= write_data[`FFLAGS_BITS+`INST_FRM_BITS-1:0];
|
||||
`CSR_SATP: csr_satp <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MSTATUS: csr_mstatus <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEDELEG: csr_medeleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIDELEG: csr_mideleg <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MIE: csr_mie <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MTVEC: csr_mtvec <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_MEPC: csr_mepc <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data[`CSR_WIDTH-1:0];
|
||||
default: begin
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`ASSERT(write_addr >= `CSR_TEX(0,0)
|
||||
&& write_addr < `CSR_TEX(`NUM_TEX_UNITS, 0),
|
||||
("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
`else
|
||||
`ASSERT(~write_enable, ("%t: invalid CSR write address: %0h", $time, write_addr));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -217,11 +222,16 @@ module VX_csr_data #(
|
||||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
if (!((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|
||||
|| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32)
|
||||
|| (read_addr >= `CSR_TEX_BEGIN(0) && read_addr < `CSR_TEX_BEGIN(`CSR_TEX_STATES))))) begin
|
||||
if ((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|
||||
|| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32))) begin
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
if (read_addr >= `CSR_TEX(0,0) && read_addr < `CSR_TEX(`NUM_TEX_UNITS,0)) begin
|
||||
read_addr_valid_r = 1;
|
||||
end else
|
||||
`endif
|
||||
read_addr_valid_r = 0;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -214,9 +214,9 @@ module VX_decode #(
|
||||
case (u_12)
|
||||
12'h000: op_type = `INST_OP_BITS'(`INST_BR_ECALL);
|
||||
12'h001: op_type = `INST_OP_BITS'(`INST_BR_EBREAK);
|
||||
12'h002: op_type = `INST_OP_BITS'(`INST_BR_URET);
|
||||
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h302: op_type = `INST_OP_BITS'(`INST_BR_MRET);
|
||||
12'h102: op_type = `INST_OP_BITS'(`INST_BR_SRET);
|
||||
12'h7B2: op_type = `INST_OP_BITS'(`INST_BR_DRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
@@ -347,7 +347,7 @@ module VX_decode #(
|
||||
endcase
|
||||
end
|
||||
`endif
|
||||
`INST_GPU: begin
|
||||
`INST_GPGPU: begin
|
||||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
@@ -374,9 +374,21 @@ module VX_decode #(
|
||||
is_wstall = 1;
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
end
|
||||
3'h5: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'(`INST_LSU_LW);
|
||||
op_mod = `INST_MOD_BITS'(2);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_GPU: begin
|
||||
case (func3)
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
3'h0: begin
|
||||
ex_type = `EX_GPU;
|
||||
op_type = `INST_OP_BITS'(`INST_GPU_TEX);
|
||||
op_mod = `INST_MOD_BITS'(func2);
|
||||
use_rd = 1;
|
||||
@@ -386,12 +398,6 @@ module VX_decode #(
|
||||
`USED_IREG (rs3);
|
||||
end
|
||||
`endif
|
||||
3'h6: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'(`INST_LSU_LW);
|
||||
op_mod = `INST_MOD_BITS'(2);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -66,7 +66,8 @@
|
||||
`define INST_FNMADD 7'b1001111
|
||||
`define INST_FCI 7'b1010011 // float common instructions
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
`define INST_GPGPU 7'b1101011
|
||||
`define INST_GPU 7'b1011011
|
||||
|
||||
`define INST_TEX 7'b0101011
|
||||
|
||||
@@ -117,9 +118,9 @@
|
||||
`define INST_BR_JALR 4'b1001
|
||||
`define INST_BR_ECALL 4'b1010
|
||||
`define INST_BR_EBREAK 4'b1011
|
||||
`define INST_BR_MRET 4'b1100
|
||||
`define INST_BR_URET 4'b1100
|
||||
`define INST_BR_SRET 4'b1101
|
||||
`define INST_BR_DRET 4'b1110
|
||||
`define INST_BR_MRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_NEG(x) x[1]
|
||||
@@ -185,14 +186,14 @@
|
||||
`define INST_FPU_NMADD 4'hF
|
||||
`define INST_FPU_BITS 4
|
||||
|
||||
`define INST_GPU_TMC 3'h0
|
||||
`define INST_GPU_WSPAWN 3'h1
|
||||
`define INST_GPU_SPLIT 3'h2
|
||||
`define INST_GPU_JOIN 3'h3
|
||||
`define INST_GPU_BAR 3'h4
|
||||
`define INST_GPU_PRED 3'h5
|
||||
`define INST_GPU_TEX 3'h6
|
||||
`define INST_GPU_BITS 3
|
||||
`define INST_GPU_TMC 4'h0
|
||||
`define INST_GPU_WSPAWN 4'h1
|
||||
`define INST_GPU_SPLIT 4'h2
|
||||
`define INST_GPU_JOIN 4'h3
|
||||
`define INST_GPU_BAR 4'h4
|
||||
`define INST_GPU_PRED 4'h5
|
||||
`define INST_GPU_TEX 4'h6
|
||||
`define INST_GPU_BITS 4
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -237,11 +238,9 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO // wid PC
|
||||
`define DBG_CACHE_REQ_MDATAW (`NW_BITS + 32)
|
||||
`else
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
// cache request identifier
|
||||
`define DBG_CACHE_REQ_IDW 48
|
||||
`define DBG_CACHE_REQ_ID(type, ctr) {4'(type), {`DBG_CACHE_REQ_IDW-4{1'b0}}} + ctr
|
||||
|
||||
// non-cacheable tag bits
|
||||
`define NC_TAG_BIT 1
|
||||
@@ -249,6 +248,9 @@
|
||||
// texture tag bits
|
||||
`define TEX_TAG_BIT 1
|
||||
|
||||
// cache address type bits
|
||||
`define CACHE_ADDR_TYPE_BITS (`NC_TAG_BIT + `SM_ENABLE)
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
@@ -264,7 +266,7 @@
|
||||
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICACHE_CORE_TAG_ID_BITS)
|
||||
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `ICACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
|
||||
@@ -289,17 +291,14 @@
|
||||
// Core request tag bits
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_TAG_BIT + `SM_ENABLE)
|
||||
`define TEX_TAG_ID_BITS (2)
|
||||
`define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + `TEX_TAG_BIT)
|
||||
`define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS)
|
||||
`define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS)
|
||||
`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS)
|
||||
`define LSU_TAG_ID_BITS `MAX(`LSUQ_ADDR_BITS, 2)
|
||||
`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_IDW + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS + `TEX_TAG_BIT)
|
||||
`else
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_TAG_BIT + `SM_ENABLE)
|
||||
`define LSU_TAG_ID_BITS `LSUQ_ADDR_BITS
|
||||
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
|
||||
`endif
|
||||
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `DCACHE_CORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
@@ -52,51 +52,29 @@ module VX_execute #(
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_DCACHE_TAG_BITS)
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) lsu_dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`LSU_DCACHE_TAG_BITS)
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) lsu_dcache_rsp_if();
|
||||
|
||||
VX_dcache_req_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`TEX_DCACHE_TAG_BITS)
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) tex_dcache_req_if();
|
||||
|
||||
VX_dcache_rsp_if #(
|
||||
.NUM_REQS (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_WIDTH (`TEX_DCACHE_TAG_BITS)
|
||||
.TAG_WIDTH (`LSU_TEX_DCACHE_TAG_BITS)
|
||||
) tex_dcache_rsp_if();
|
||||
|
||||
VX_tex_csr_if tex_csr_if();
|
||||
|
||||
wire [`NUM_THREADS-1:0][`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_in, lsu_tag_in;
|
||||
wire [`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_out, lsu_tag_out;
|
||||
|
||||
`UNUSED_VAR (tex_tag_out)
|
||||
`UNUSED_VAR (lsu_tag_out)
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign tex_tag_in[i][`LSU_TEX_TAG_ID_BITS-1:0] = `LSU_TEX_TAG_ID_BITS'(tex_dcache_req_if.tag[i][`TEX_TAG_ID_BITS-1:0]);
|
||||
assign lsu_tag_in[i][`LSU_TEX_TAG_ID_BITS-1:0] = `LSU_TEX_TAG_ID_BITS'(lsu_dcache_req_if.tag[i][`LSU_TAG_ID_BITS-1:0]);
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign tex_tag_in[i][`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS] = tex_dcache_req_if.tag[i][`TEX_DCACHE_TAG_BITS-1:`TEX_TAG_ID_BITS];
|
||||
assign lsu_tag_in[i][`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS] = lsu_dcache_req_if.tag[i][`LSU_DCACHE_TAG_BITS-1:`LSU_TAG_ID_BITS];
|
||||
`endif
|
||||
end
|
||||
|
||||
assign tex_dcache_rsp_if.tag[`TEX_TAG_ID_BITS-1:0] = tex_tag_out[`TEX_TAG_ID_BITS-1:0];
|
||||
assign lsu_dcache_rsp_if.tag[`LSU_TAG_ID_BITS-1:0] = lsu_tag_out[`LSU_TAG_ID_BITS-1:0];
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign tex_dcache_rsp_if.tag[`TEX_DCACHE_TAG_BITS-1:`TEX_TAG_ID_BITS] = tex_tag_out[`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS];
|
||||
assign lsu_dcache_rsp_if.tag[`LSU_DCACHE_TAG_BITS-1:`LSU_TAG_ID_BITS] = lsu_tag_out[`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS];
|
||||
`endif
|
||||
|
||||
VX_cache_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
@@ -113,7 +91,7 @@ module VX_execute #(
|
||||
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
|
||||
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
|
||||
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
|
||||
.req_tag_in ({tex_tag_in, lsu_tag_in}),
|
||||
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
|
||||
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
|
||||
|
||||
// Dcache request
|
||||
@@ -136,7 +114,7 @@ module VX_execute #(
|
||||
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
|
||||
.rsp_tmask_out ({tex_dcache_rsp_if.tmask, lsu_dcache_rsp_if.tmask}),
|
||||
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
|
||||
.rsp_tag_out ({tex_tag_out, lsu_tag_out}),
|
||||
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
|
||||
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready})
|
||||
);
|
||||
|
||||
|
||||
@@ -24,10 +24,17 @@ module VX_icache_stage #(
|
||||
|
||||
localparam OUT_REG = 0;
|
||||
|
||||
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
|
||||
wire [`NW_BITS-1:0] req_tag, rsp_tag;
|
||||
|
||||
`UNUSED_VAR (rsp_req_id)
|
||||
|
||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||
|
||||
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid;
|
||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
|
||||
assign req_tag = ifetch_req_if.wid;
|
||||
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
|
||||
assign rsp_req_id = icache_rsp_if.tag[`NW_BITS +: `DBG_CACHE_REQ_IDW];
|
||||
|
||||
wire [31:0] rsp_PC;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
@@ -51,16 +58,21 @@ module VX_icache_stage #(
|
||||
// Icache Request
|
||||
assign icache_req_if.valid = ifetch_req_if.valid;
|
||||
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
|
||||
assign icache_req_if.tag = {req_id, req_tag};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_id <= `DBG_CACHE_REQ_ID(0, 0);
|
||||
end else begin
|
||||
if (icache_req_fire) begin
|
||||
req_id <= req_id + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Can accept new request?
|
||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.wid, ifetch_req_if.PC, req_tag};
|
||||
`else
|
||||
assign icache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
||||
wire [`NW_BITS-1:0] rsp_wid = rsp_tag;
|
||||
|
||||
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
|
||||
@@ -90,11 +102,11 @@ module VX_icache_stage #(
|
||||
|
||||
`ifdef DBG_TRACE_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC);
|
||||
if (icache_req_fire) begin
|
||||
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h, req_id=%0h\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, req_id);
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data);
|
||||
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, req_id=%0h, data=%0h\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, rsp_req_id, ifetch_rsp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -24,8 +24,6 @@ module VX_lsu_unit #(
|
||||
|
||||
localparam REQ_ASHIFT = `CLOG2(`DCACHE_WORD_SIZE);
|
||||
|
||||
localparam ADDR_TYPEW = `NC_TAG_BIT + `SM_ENABLE;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
@@ -44,7 +42,7 @@ module VX_lsu_unit #(
|
||||
|
||||
wire mbuf_empty;
|
||||
|
||||
wire [`NUM_THREADS-1:0][ADDR_TYPEW-1:0] lsu_addr_type, req_addr_type;
|
||||
wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type, req_addr_type;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_addr;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
@@ -83,7 +81,7 @@ module VX_lsu_unit #(
|
||||
wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
@@ -104,19 +102,22 @@ module VX_lsu_unit #(
|
||||
wire rsp_is_dup;
|
||||
wire rsp_is_prefetch;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
`UNUSED_VAR (rsp_is_prefetch)
|
||||
|
||||
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
||||
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
reg is_req_start;
|
||||
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
`UNUSED_VAR (rsp_is_prefetch)
|
||||
`UNUSED_VAR (rsp_req_id)
|
||||
|
||||
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign req_offset[i] = req_addr[i][1:0];
|
||||
@@ -124,6 +125,8 @@ module VX_lsu_unit #(
|
||||
|
||||
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
|
||||
|
||||
wire dcache_req_fire_any = (| dcache_req_fire);
|
||||
|
||||
wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
|
||||
@@ -135,7 +138,8 @@ module VX_lsu_unit #(
|
||||
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[ADDR_TYPEW +: `LSUQ_ADDR_BITS];
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: `LSUQ_ADDR_BITS];
|
||||
assign rsp_req_id = dcache_rsp_if.tag[(`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS) +: `DBG_CACHE_REQ_IDW];
|
||||
`UNUSED_VAR (dcache_rsp_if.tag)
|
||||
|
||||
// do not writeback from software prefetch
|
||||
@@ -214,7 +218,7 @@ module VX_lsu_unit #(
|
||||
0: mem_req_byteen[req_offset[i]] = 1;
|
||||
1: begin
|
||||
mem_req_byteen[req_offset[i]] = 1;
|
||||
mem_req_byteen[{req_addr[i][1], 1'b1}] = 1;
|
||||
mem_req_byteen[{req_offset[i][1], 1'b1}] = 1;
|
||||
end
|
||||
default : mem_req_byteen = {4{1'b1}};
|
||||
endcase
|
||||
@@ -235,12 +239,17 @@ module VX_lsu_unit #(
|
||||
assign dcache_req_if.addr[i] = req_addr[i][31:2];
|
||||
assign dcache_req_if.byteen[i] = mem_req_byteen;
|
||||
assign dcache_req_if.data[i] = mem_req_data;
|
||||
assign dcache_req_if.tag[i] = {req_id, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]};
|
||||
end
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag[i] = {req_wid, req_pc, req_tag, req_addr_type[i]};
|
||||
`else
|
||||
assign dcache_req_if.tag[i] = {req_tag, req_addr_type[i]};
|
||||
`endif
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_id <= `DBG_CACHE_REQ_ID(1, 0);
|
||||
end else begin
|
||||
if (dcache_req_fire_any) begin
|
||||
req_id <= req_id + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = req_dep_ready && dcache_req_ready;
|
||||
@@ -339,22 +348,21 @@ module VX_lsu_unit #(
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CORE_DCACHE
|
||||
wire dcache_req_fire_any = (| dcache_req_fire);
|
||||
always @(posedge clk) begin
|
||||
if (lsu_req_if.valid && fence_wait) begin
|
||||
dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID);
|
||||
end
|
||||
if (dcache_req_fire_any) begin
|
||||
if (dcache_req_if.rw[0]) begin
|
||||
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_id);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
dpi_trace(", data=");
|
||||
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
dpi_trace("\n");
|
||||
dpi_trace(", req_id=%0h\n", req_id);
|
||||
end else begin
|
||||
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire);
|
||||
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire, req_id);
|
||||
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
@@ -362,8 +370,8 @@ module VX_lsu_unit #(
|
||||
end
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd);
|
||||
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, rsp_req_id, mbuf_raddr, rsp_rd);
|
||||
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
dpi_trace(", is_dup=%b\n", rsp_is_dup);
|
||||
end
|
||||
|
||||
71
hw/rtl/cache/VX_bank.sv
vendored
71
hw/rtl/cache/VX_bank.sv
vendored
@@ -33,9 +33,6 @@ module VX_bank #(
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
// size of tag id in core request tag
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
@@ -96,14 +93,9 @@ module VX_bank #(
|
||||
input wire [`LINE_SELECT_BITS-1:0] flush_addr
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_TAG_ID_BITS)
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
|
||||
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] req_id_sel, req_id_st0, req_id_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
wire [NUM_PORTS-1:0] creq_pmask;
|
||||
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] creq_wsel;
|
||||
@@ -197,13 +189,7 @@ module VX_bank #(
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire creq_fire = creq_valid && creq_ready;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[0][`CACHE_REQ_INFO_RNG] : creq_tag[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_sel, debug_pc_sel} = 0;
|
||||
end
|
||||
`endif
|
||||
assign req_id_sel = mshr_enable ? mshr_tag[0][`CACHE_REQ_ID_RNG] : creq_tag[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
|
||||
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
|
||||
@@ -237,13 +223,7 @@ module VX_bank #(
|
||||
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_id_st0})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = tag_st0[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st0, debug_pc_st0} = 0;
|
||||
end
|
||||
`endif
|
||||
assign req_id_st0 = tag_st0[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
@@ -263,11 +243,9 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st0),
|
||||
.debug_wid (debug_wid_st0),
|
||||
`endif
|
||||
.stall (crsq_stall),
|
||||
.req_id (req_id_st0),
|
||||
|
||||
.stall (crsq_stall),
|
||||
|
||||
// read/Fill
|
||||
.lookup (do_lookup_st0),
|
||||
@@ -293,13 +271,7 @@ module VX_bank #(
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, miss_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = tag_st1[0][`CACHE_REQ_INFO_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st1, debug_pc_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
assign req_id_st1 = tag_st1[0][`CACHE_REQ_ID_RNG];
|
||||
|
||||
wire do_read_st0 = valid_st0 && is_read_st0;
|
||||
wire do_read_st1 = valid_st1 && is_read_st1;
|
||||
@@ -323,10 +295,8 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st1),
|
||||
.debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
.req_id (req_id_st1),
|
||||
|
||||
.stall (crsq_stall),
|
||||
|
||||
.read (do_read_st1 || do_mshr_st1),
|
||||
@@ -372,14 +342,9 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.deq_debug_pc (debug_pc_sel),
|
||||
.deq_debug_wid (debug_wid_sel),
|
||||
.lkp_debug_pc (debug_pc_st0),
|
||||
.lkp_debug_wid (debug_wid_st0),
|
||||
.rel_debug_pc (debug_pc_st1),
|
||||
.rel_debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
.deq_req_id (req_id_sel),
|
||||
.lkp_req_id (req_id_st0),
|
||||
.rel_req_id (req_id_st1),
|
||||
|
||||
// allocate
|
||||
.allocate_valid (mshr_allocate),
|
||||
@@ -525,22 +490,22 @@ module VX_bank #(
|
||||
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
|
||||
end
|
||||
if (mshr_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, debug_wid_sel, debug_pc_sel);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel);
|
||||
end
|
||||
if (creq_fire) begin
|
||||
if (creq_rw)
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel);
|
||||
end
|
||||
if (crsq_fire) begin
|
||||
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
|
||||
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1);
|
||||
end
|
||||
if (mreq_push) begin
|
||||
if (is_write_st1)
|
||||
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1);
|
||||
else
|
||||
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, debug_wid_st1, debug_pc_st1);
|
||||
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
3
hw/rtl/cache/VX_cache.sv
vendored
3
hw/rtl/cache/VX_cache.sv
vendored
@@ -580,8 +580,7 @@ module VX_cache #(
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
|
||||
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
|
||||
) bank (
|
||||
`SCOPE_BIND_VX_cache_bank(i)
|
||||
|
||||
7
hw/rtl/cache/VX_cache_define.vh
vendored
7
hw/rtl/cache/VX_cache_define.vh
vendored
@@ -3,9 +3,8 @@
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`include "VX_define.vh"
|
||||
`endif
|
||||
// cache request identifier
|
||||
`define DBG_CACHE_REQ_IDW 48
|
||||
|
||||
`define REQS_BITS `LOG2UP(NUM_REQS)
|
||||
|
||||
@@ -52,7 +51,7 @@
|
||||
|
||||
`define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS]
|
||||
|
||||
`define CACHE_REQ_INFO_RNG CORE_TAG_WIDTH-1 : (CORE_TAG_WIDTH-`DBG_CACHE_REQ_MDATAW)
|
||||
`define CACHE_REQ_ID_RNG CORE_TAG_WIDTH-1 : (CORE_TAG_WIDTH-`DBG_CACHE_REQ_IDW)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
9
hw/rtl/cache/VX_data_access.sv
vendored
9
hw/rtl/cache/VX_data_access.sv
vendored
@@ -21,12 +21,9 @@ module VX_data_access #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
|
||||
@@ -125,10 +122,10 @@ module VX_data_access #(
|
||||
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, read_data);
|
||||
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, req_id=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, read_data);
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byteen, line_addr, write_data);
|
||||
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, req_id=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, byteen, line_addr, write_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
26
hw/rtl/cache/VX_miss_resrv.sv
vendored
26
hw/rtl/cache/VX_miss_resrv.sv
vendored
@@ -25,16 +25,11 @@ module VX_miss_resrv #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] deq_debug_pc,
|
||||
input wire[`NW_BITS-1:0] deq_debug_wid,
|
||||
input wire[31:0] lkp_debug_pc,
|
||||
input wire[`NW_BITS-1:0] lkp_debug_wid,
|
||||
input wire[31:0] rel_debug_pc,
|
||||
input wire[`NW_BITS-1:0] rel_debug_wid,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] deq_req_id,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] lkp_req_id,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] rel_req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
// allocate
|
||||
input wire allocate_valid,
|
||||
@@ -206,23 +201,22 @@ module VX_miss_resrv #(
|
||||
always @(posedge clk) begin
|
||||
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
|
||||
if (allocate_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_debug_wid, deq_debug_pc);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_req_id);
|
||||
if (fill_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
|
||||
if (dequeue_fire)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_debug_wid, deq_debug_pc);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_id);
|
||||
if (lookup_replay)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id);
|
||||
if (lookup_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_debug_wid, lkp_debug_pc);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_req_id);
|
||||
if (release_valid)
|
||||
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, wid=%0d, PC=%0h\n", $time, CACHE_ID, BANK_ID,
|
||||
release_id, rel_debug_wid, rel_debug_pc);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id);
|
||||
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
|
||||
for (integer i = 0; i < MSHR_SIZE; ++i) begin
|
||||
if (valid_table[i]) begin
|
||||
|
||||
29
hw/rtl/cache/VX_shared_mem.sv
vendored
29
hw/rtl/cache/VX_shared_mem.sv
vendored
@@ -254,22 +254,19 @@ module VX_shared_mem #(
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
|
||||
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
|
||||
wire [NUM_BANKS-1:0][`DBG_CACHE_REQ_IDW-1:0] req_id_st0, req_id_st1;
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_wid_st0[i], debug_pc_st0[i]} = per_bank_core_req_tag_unqual[i][`CACHE_REQ_INFO_RNG];
|
||||
assign {debug_wid_st1[i], debug_pc_st1[i]} = per_bank_core_req_tag[i][`CACHE_REQ_INFO_RNG];
|
||||
assign req_id_st0[i] = per_bank_core_req_tag_unqual[i][`CACHE_REQ_ID_RNG];
|
||||
assign req_id_st1[i] = per_bank_core_req_tag[i][`CACHE_REQ_ID_RNG];
|
||||
end else begin
|
||||
assign {debug_wid_st0[i], debug_pc_st0[i]} = 0;
|
||||
assign {debug_wid_st1[i], debug_pc_st1[i]} = 0;
|
||||
assign req_id_st0[i] = 0;
|
||||
assign req_id_st1[i] = 0;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_BANK
|
||||
|
||||
@@ -309,11 +306,11 @@ module VX_shared_mem #(
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid_unqual[i]) begin
|
||||
if (per_bank_core_req_rw_unqual[i]) begin
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
|
||||
dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, req_id=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], req_id_st0[i]);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], debug_wid_st0[i], debug_pc_st0[i]);
|
||||
dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h, req_id=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], req_id_st0[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -322,11 +319,11 @@ module VX_shared_mem #(
|
||||
for (integer i = 0; i < NUM_BANKS; ++i) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
if (per_bank_core_req_rw[i]) begin
|
||||
dpi_trace("%d: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i], debug_wid_st1[i], debug_pc_st1[i]);
|
||||
dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_data[i], req_id_st1[i]);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_rsp_data[i], debug_wid_st1[i], debug_pc_st1[i]);
|
||||
dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n",
|
||||
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_rsp_data[i], req_id_st1[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
9
hw/rtl/cache/VX_tag_access.sv
vendored
9
hw/rtl/cache/VX_tag_access.sv
vendored
@@ -17,12 +17,9 @@ module VX_tag_access #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
input wire[`DBG_CACHE_REQ_IDW-1:0] req_id,
|
||||
`IGNORE_UNUSED_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
|
||||
@@ -71,9 +68,9 @@ module VX_tag_access #(
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_match) begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag);
|
||||
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag);
|
||||
end else begin
|
||||
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, line_tag, read_tag);
|
||||
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag, read_tag);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -12,13 +12,13 @@ module VX_tex_addr #(
|
||||
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [1:0][NUM_REQS-1:0][31:0] req_coords,
|
||||
input wire [1:0][NUM_REQS-1:0][`TEX_FXD_BITS-1:0] req_coords,
|
||||
input wire [`TEX_FORMAT_BITS-1:0] req_format,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps,
|
||||
input wire [`TEX_ADDR_BITS-1:0] req_baseaddr,
|
||||
input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff,
|
||||
input wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] req_logdims,
|
||||
input wire [NUM_REQS-1:0][1:0][`TEX_LOD_BITS-1:0] req_logdims,
|
||||
input wire [REQ_INFOW-1:0] req_info,
|
||||
output wire req_ready,
|
||||
|
||||
@@ -27,31 +27,33 @@ module VX_tex_addr #(
|
||||
output wire rsp_valid,
|
||||
output wire [NUM_REQS-1:0] rsp_tmask,
|
||||
output wire [`TEX_FILTER_BITS-1:0] rsp_filter,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] rsp_stride,
|
||||
output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride,
|
||||
output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr,
|
||||
output wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends,
|
||||
output wire [REQ_INFOW-1:0] rsp_info,
|
||||
input wire rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam PITCH_BITS = `MAX(`TEX_DIM_BITS, `TEX_STRIDE_BITS) + 1;
|
||||
localparam SCALED_U_W = `FIXED_INT + `TEX_STRIDE_BITS;
|
||||
localparam SCALED_X_W = (2 * `FIXED_INT);
|
||||
localparam SCALED_V_W = SCALED_X_W + `TEX_STRIDE_BITS;
|
||||
localparam SHIFT_BITS = $clog2(`TEX_FXD_FRAC+1);
|
||||
localparam PITCH_BITS = `MAX(`TEX_LOD_BITS, `TEX_LGSTRIDE_BITS) + 1;
|
||||
localparam SCALED_X_W = `TEX_DIM_BITS + `TEX_BLEND_FRAC;
|
||||
localparam OFFSET_U_W = `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
|
||||
localparam OFFSET_V_W = `TEX_DIM_BITS + `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX;
|
||||
|
||||
wire valid_s0;
|
||||
wire [NUM_REQS-1:0] tmask_s0;
|
||||
wire [`TEX_FILTER_BITS-1:0] filter_s0;
|
||||
wire [REQ_INFOW-1:0] req_info_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_lo, clamped_lo_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_FXD_FRAC-1:0] clamped_hi, clamped_hi_s0;
|
||||
wire [NUM_REQS-1:0][1:0][SHIFT_BITS-1:0] dim_shift, dim_shift_s0;
|
||||
wire [`TEX_LGSTRIDE_BITS-1:0] log_stride, log_stride_s0;
|
||||
wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0;
|
||||
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
|
||||
wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0;
|
||||
|
||||
wire stall_out;
|
||||
|
||||
// stride
|
||||
@@ -67,9 +69,9 @@ module VX_tex_addr #(
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
wire [`FIXED_FRAC-1:0] delta = (`FIXED_HALF >> req_logdims[i][j]);
|
||||
wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - 32'(delta)) : req_coords[j][i];
|
||||
wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + 32'(delta)) : req_coords[j][i];
|
||||
wire [`TEX_FXD_FRAC-1:0] delta = (`TEX_FXD_HALF >> req_logdims[i][j]);
|
||||
wire [`TEX_FXD_BITS-1:0] coord_lo = req_filter ? (req_coords[j][i] - `TEX_FXD_BITS'(delta)) : req_coords[j][i];
|
||||
wire [`TEX_FXD_BITS-1:0] coord_hi = req_filter ? (req_coords[j][i] + `TEX_FXD_BITS'(delta)) : req_coords[j][i];
|
||||
|
||||
VX_tex_wrap #(
|
||||
.CORE_ID (CORE_ID)
|
||||
@@ -86,66 +88,72 @@ module VX_tex_addr #(
|
||||
.coord_i (coord_hi),
|
||||
.coord_o (clamped_hi[i][j])
|
||||
);
|
||||
|
||||
assign dim_shift[i][j] = (`TEX_FXD_FRAC - `TEX_BLEND_FRAC - req_logdims[i][j]);
|
||||
end
|
||||
assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride);
|
||||
assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]);
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)),
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * SHIFT_BITS + 32 + 2 * 2 * `TEX_FXD_FRAC)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, req_logdims, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
.data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, dim_shift, mip_addr, clamped_lo, clamped_hi}),
|
||||
.data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, dim_shift_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0})
|
||||
);
|
||||
|
||||
// addresses generation
|
||||
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo;
|
||||
wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi;
|
||||
wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_lo;
|
||||
wire [NUM_REQS-1:0][1:0][SCALED_X_W-1:0] scaled_hi;
|
||||
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_lo;
|
||||
wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_hi;
|
||||
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_lo;
|
||||
wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_hi;
|
||||
wire [NUM_REQS-1:0][31:0] base_addr_lo;
|
||||
wire [NUM_REQS-1:0][31:0] base_addr_hi;
|
||||
wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] blends;
|
||||
wire [NUM_REQS-1:0][3:0][31:0] addr;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
for (genvar j = 0; j < 2; ++j) begin
|
||||
assign scaled_lo[i][j] = scale_to_dim(clamped_lo_s0[i][j], log_dims_s0[i][j]);
|
||||
assign scaled_hi[i][j] = scale_to_dim(clamped_hi_s0[i][j], log_dims_s0[i][j]);
|
||||
assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0);
|
||||
assign scaled_lo[i][j] = SCALED_X_W'(clamped_lo_s0[i][j] >> dim_shift_s0[i][j]);
|
||||
assign scaled_hi[i][j] = SCALED_X_W'(clamped_hi_s0[i][j] >> dim_shift_s0[i][j]);
|
||||
assign blends[i][j] = filter_s0 ? scaled_lo[i][j][`TEX_BLEND_FRAC-1:0] : `TEX_BLEND_FRAC'(0);
|
||||
end
|
||||
end
|
||||
|
||||
`UNUSED_VAR (log_pitch_s0)
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire [SCALED_U_W-1:0] offset_u_lo = SCALED_U_W'(scaled_lo[i][0]) << log_stride_s0;
|
||||
wire [SCALED_U_W-1:0] offset_u_hi = SCALED_U_W'(scaled_hi[i][0]) << log_stride_s0;
|
||||
assign offset_u_lo[i] = OFFSET_U_W'(scaled_lo[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
|
||||
assign offset_u_hi[i] = OFFSET_U_W'(scaled_hi[i][0][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_stride_s0;
|
||||
|
||||
wire [SCALED_V_W-1:0] offset_v_lo = SCALED_V_W'(scaled_lo[i][1]) << log_pitch_s0[i];
|
||||
wire [SCALED_V_W-1:0] offset_v_hi = SCALED_V_W'(scaled_hi[i][1]) << log_pitch_s0[i];
|
||||
assign offset_v_lo[i] = OFFSET_V_W'(scaled_lo[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
|
||||
assign offset_v_hi[i] = OFFSET_V_W'(scaled_hi[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i];
|
||||
|
||||
wire [31:0] base_addr_lo = mip_addr_s0[i] + 32'(offset_v_lo);
|
||||
wire [31:0] base_addr_hi = mip_addr_s0[i] + 32'(offset_v_hi);
|
||||
assign base_addr_lo[i] = mip_addr_s0[i] + 32'(offset_v_lo[i]);
|
||||
assign base_addr_hi[i] = mip_addr_s0[i] + 32'(offset_v_hi[i]);
|
||||
|
||||
assign addr[i][0] = base_addr_lo + 32'(offset_u_lo);
|
||||
assign addr[i][1] = base_addr_lo + 32'(offset_u_hi);
|
||||
assign addr[i][2] = base_addr_hi + 32'(offset_u_lo);
|
||||
assign addr[i][3] = base_addr_hi + 32'(offset_u_hi);
|
||||
assign addr[i][0] = base_addr_lo[i] + 32'(offset_u_lo[i]);
|
||||
assign addr[i][1] = base_addr_lo[i] + 32'(offset_u_hi[i]);
|
||||
assign addr[i][2] = base_addr_hi[i] + 32'(offset_u_lo[i]);
|
||||
assign addr[i][3] = base_addr_hi[i] + 32'(offset_u_hi[i]);
|
||||
end
|
||||
|
||||
assign stall_out = rsp_valid && ~rsp_ready;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `BLEND_FRAC) + REQ_INFOW),
|
||||
.DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `TEX_BLEND_FRAC) + REQ_INFOW),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~stall_out),
|
||||
.data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}),
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_stride, rsp_addr, rsp_blends, rsp_info})
|
||||
.data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_lgstride, rsp_addr, rsp_blends, rsp_info})
|
||||
);
|
||||
|
||||
assign req_ready = ~stall_out;
|
||||
@@ -157,22 +165,47 @@ module VX_tex_addr #(
|
||||
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (req_valid && ~stall_out) begin
|
||||
dpi_trace("%d: *** log_pitch=", $time);
|
||||
`TRACE_ARRAY1D(log_pitch, NUM_REQS);
|
||||
dpi_trace(", mip_addr=");
|
||||
`TRACE_ARRAY1D(mip_addr, NUM_REQS);
|
||||
dpi_trace(", req_logdims=");
|
||||
`TRACE_ARRAY2D(req_logdims, 2, NUM_REQS);
|
||||
dpi_trace(", clamped_lo=");
|
||||
`TRACE_ARRAY2D(clamped_lo, 2, NUM_REQS);
|
||||
dpi_trace(", clamped_hi=");
|
||||
`TRACE_ARRAY2D(clamped_hi, 2, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
|
||||
if (valid_s0 && ~stall_out) begin
|
||||
dpi_trace("%d: *** scaled_lo=", $time);
|
||||
`TRACE_ARRAY2D(scaled_lo, 2, NUM_REQS);
|
||||
dpi_trace(", scaled_hi=");
|
||||
`TRACE_ARRAY2D(scaled_hi, 2, NUM_REQS);
|
||||
dpi_trace(", offset_u_lo=");
|
||||
`TRACE_ARRAY1D(offset_u_lo, NUM_REQS);
|
||||
dpi_trace(", offset_u_hi=");
|
||||
`TRACE_ARRAY1D(offset_u_hi, NUM_REQS);
|
||||
dpi_trace(", offset_v_lo=");
|
||||
`TRACE_ARRAY1D(offset_v_lo, NUM_REQS);
|
||||
dpi_trace(", offset_v_hi=");
|
||||
`TRACE_ARRAY1D(offset_v_hi, NUM_REQS);
|
||||
dpi_trace(", base_addr_lo=");
|
||||
`TRACE_ARRAY1D(base_addr_lo, NUM_REQS);
|
||||
dpi_trace(", base_addr_hi=");
|
||||
`TRACE_ARRAY1D(base_addr_hi, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
|
||||
if (rsp_valid && rsp_ready) begin
|
||||
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, tride=%0d, addr=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_stride);
|
||||
dpi_trace("%d: core%0d-tex-addr: wid=%0d, PC=%0h, tmask=%b, req_filter=%0d, lgstride=%0d, addr=",
|
||||
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask, rsp_filter, rsp_lgstride);
|
||||
`TRACE_ARRAY2D(rsp_addr, 4, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
function logic [`FIXED_INT-1:0] scale_to_dim (input logic [`FIXED_FRAC-1:0] src,
|
||||
input logic [`TEX_DIM_BITS-1:0] dim);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
logic [`FIXED_BITS-1:0] out;
|
||||
`IGNORE_WARNINGS_END
|
||||
out = `FIXED_BITS'(src) << dim;
|
||||
return out[`FIXED_FRAC +: `FIXED_INT];
|
||||
endfunction
|
||||
|
||||
endmodule
|
||||
@@ -3,31 +3,26 @@
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define FIXED_BITS 32
|
||||
`define FIXED_FRAC 20
|
||||
`define FIXED_INT (`FIXED_BITS - `FIXED_FRAC)
|
||||
`define FIXED_ONE (2 ** `FIXED_FRAC)
|
||||
`define FIXED_HALF (`FIXED_ONE >> 1)
|
||||
`define FIXED_MASK (`FIXED_ONE - 1)
|
||||
`define TEX_FXD_INT (`TEX_FXD_BITS - `TEX_FXD_FRAC)
|
||||
`define TEX_FXD_ONE (2 ** `TEX_FXD_FRAC)
|
||||
`define TEX_FXD_HALF (`TEX_FXD_ONE >> 1)
|
||||
`define TEX_FXD_MASK (`TEX_FXD_ONE - 1)
|
||||
|
||||
`define TEX_ADDR_BITS 32
|
||||
`define TEX_FORMAT_BITS 3
|
||||
`define TEX_WRAP_BITS 2
|
||||
`define TEX_DIM_BITS 4
|
||||
`define TEX_FILTER_BITS 1
|
||||
`define TEX_MIPOFF_BITS (2*`TEX_DIM_BITS+1)
|
||||
|
||||
`define TEX_MIPOFF_BITS (2*12+1)
|
||||
`define TEX_STRIDE_BITS 2
|
||||
|
||||
`define TEX_LOD_BITS 4
|
||||
`define TEX_MIP_BITS (`NTEX_BITS + `TEX_LOD_BITS)
|
||||
`define TEX_LGSTRIDE_MAX 2
|
||||
`define TEX_LGSTRIDE_BITS 2
|
||||
|
||||
`define TEX_WRAP_CLAMP 0
|
||||
`define TEX_WRAP_REPEAT 1
|
||||
`define TEX_WRAP_MIRROR 2
|
||||
|
||||
`define BLEND_FRAC 8
|
||||
`define BLEND_ONE (2 ** `BLEND_FRAC)
|
||||
`define TEX_BLEND_FRAC 8
|
||||
`define TEX_BLEND_ONE (2 ** `TEX_BLEND_FRAC)
|
||||
|
||||
`define TEX_FORMAT_R8G8B8A8 `TEX_FORMAT_BITS'(0)
|
||||
`define TEX_FORMAT_R5G6B5 `TEX_FORMAT_BITS'(1)
|
||||
|
||||
@@ -15,7 +15,7 @@ module VX_tex_mem #(
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [`TEX_FILTER_BITS-1:0] req_filter,
|
||||
input wire [`TEX_STRIDE_BITS-1:0] req_stride,
|
||||
input wire [`TEX_LGSTRIDE_BITS-1:0] req_lgstride,
|
||||
input wire [NUM_REQS-1:0][3:0][31:0] req_addr,
|
||||
input wire [REQ_INFOW-1:0] req_info,
|
||||
output wire req_ready,
|
||||
@@ -63,23 +63,23 @@ module VX_tex_mem #(
|
||||
wire [NUM_REQS-1:0] q_req_tmask;
|
||||
wire [`TEX_FILTER_BITS-1:0] q_req_filter;
|
||||
wire [REQ_INFOW-1:0] q_req_info;
|
||||
wire [`TEX_STRIDE_BITS-1:0] q_req_stride;
|
||||
wire [`TEX_LGSTRIDE_BITS-1:0] q_req_lgstride;
|
||||
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
|
||||
wire [3:0] q_dup_reqs;
|
||||
|
||||
assign reqq_push = req_valid && req_ready;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + (4 * NUM_REQS * 2) + 4),
|
||||
.SIZE (`LSUQ_SIZE),
|
||||
.DATAW ((NUM_REQS * 4 * 30) + NUM_REQS + REQ_INFOW + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (4 * NUM_REQS * 2) + 4),
|
||||
.SIZE (`TEXQ_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (reqq_push),
|
||||
.pop (reqq_pop),
|
||||
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_stride, align_offs, dup_reqs}),
|
||||
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_stride, q_align_offs, q_dup_reqs}),
|
||||
.data_in ({req_addr_w, req_tmask, req_info, req_filter, req_lgstride, align_offs, dup_reqs}),
|
||||
.data_out ({q_req_addr, q_req_tmask, q_req_info, q_req_filter, q_req_lgstride, q_align_offs, q_dup_reqs}),
|
||||
.empty (reqq_empty),
|
||||
.full (reqq_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
@@ -96,8 +96,12 @@ module VX_tex_mem #(
|
||||
wire sent_all_ready, last_texel_sent;
|
||||
wire req_texel_dup;
|
||||
wire [NUM_REQS-1:0][29:0] req_texel_addr;
|
||||
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
|
||||
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
|
||||
reg [1:0] req_texel_idx;
|
||||
reg req_texels_done;
|
||||
|
||||
`UNUSED_VAR (rsp_req_id)
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || last_texel_sent) begin
|
||||
@@ -146,14 +150,19 @@ module VX_tex_mem #(
|
||||
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
|
||||
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
|
||||
assign dcache_req_if.addr = req_texel_addr;
|
||||
assign dcache_req_if.byteen = {NUM_REQS{4'b1111}};
|
||||
assign dcache_req_if.byteen = {NUM_REQS{4'b0}};
|
||||
assign dcache_req_if.data = 'x;
|
||||
assign dcache_req_if.tag = {NUM_REQS{req_id, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag = {NUM_REQS{q_req_info[`DBG_CACHE_REQ_MDATAW-1:0], req_texel_idx}};
|
||||
`else
|
||||
assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}};
|
||||
`endif
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
req_id <= `DBG_CACHE_REQ_ID(2, 0);
|
||||
end else begin
|
||||
if (dcache_req_fire_any) begin
|
||||
req_id <= req_id + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Dcache Response
|
||||
|
||||
@@ -162,14 +171,17 @@ module VX_tex_mem #(
|
||||
reg [NUM_REQS-1:0][31:0] rsp_data_qual;
|
||||
reg [RSP_CTR_W-1:0] rsp_rem_ctr, rsp_rem_ctr_init;
|
||||
wire [RSP_CTR_W-1:0] rsp_rem_ctr_n;
|
||||
wire [NUM_REQS-1:0][1:0] rsp_align_offs;
|
||||
wire dcache_rsp_fire;
|
||||
wire [1:0] rsp_texel_idx;
|
||||
wire rsp_texel_dup;
|
||||
|
||||
assign rsp_texel_idx = dcache_rsp_if.tag[1:0];
|
||||
|
||||
assign rsp_texel_idx = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: 2];
|
||||
assign rsp_req_id = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS +: `DBG_CACHE_REQ_IDW];
|
||||
`UNUSED_VAR (dcache_rsp_if.tag)
|
||||
|
||||
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
|
||||
assign rsp_align_offs = q_align_offs[rsp_texel_idx];
|
||||
|
||||
assign dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
|
||||
|
||||
@@ -180,12 +192,12 @@ module VX_tex_mem #(
|
||||
reg [31:0] rsp_data_shifted;
|
||||
always @(*) begin
|
||||
rsp_data_shifted[31:16] = src_data[31:16];
|
||||
rsp_data_shifted[15:0] = q_align_offs[rsp_texel_idx][i][1] ? src_data[31:16] : src_data[15:0];
|
||||
rsp_data_shifted[7:0] = q_align_offs[rsp_texel_idx][i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
|
||||
rsp_data_shifted[15:0] = rsp_align_offs[i][1] ? src_data[31:16] : src_data[15:0];
|
||||
rsp_data_shifted[7:0] = rsp_align_offs[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (q_req_stride)
|
||||
case (q_req_lgstride)
|
||||
0: rsp_data_qual[i] = 32'(rsp_data_shifted[7:0]);
|
||||
1: rsp_data_qual[i] = 32'(rsp_data_shifted[15:0]);
|
||||
default: rsp_data_qual[i] = rsp_data_shifted;
|
||||
@@ -266,20 +278,20 @@ module VX_tex_mem #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcache_req_fire_any) begin
|
||||
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_idx);
|
||||
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, addr=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_id, req_texel_idx);
|
||||
`TRACE_ARRAY1D(req_texel_addr, NUM_REQS);
|
||||
dpi_trace(", is_dup=%b\n", req_texel_dup);
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_texel_idx);
|
||||
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, data=",
|
||||
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_req_id, rsp_texel_idx);
|
||||
`TRACE_ARRAY1D(dcache_rsp_if.data, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
if (req_valid && req_ready) begin
|
||||
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, stride=%0d, addr=",
|
||||
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_stride);
|
||||
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, addr=",
|
||||
$time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_lgstride);
|
||||
`TRACE_ARRAY2D(req_addr, 4, NUM_REQS);
|
||||
dpi_trace("\n");
|
||||
end
|
||||
|
||||
@@ -12,7 +12,7 @@ module VX_tex_sampler #(
|
||||
input wire req_valid,
|
||||
input wire [NUM_REQS-1:0] req_tmask,
|
||||
input wire [`TEX_FORMAT_BITS-1:0] req_format,
|
||||
input wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] req_blends,
|
||||
input wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] req_blends,
|
||||
input wire [NUM_REQS-1:0][3:0][31:0] req_data,
|
||||
input wire [REQ_INFOW-1:0] req_info,
|
||||
output wire req_ready,
|
||||
@@ -32,7 +32,7 @@ module VX_tex_sampler #(
|
||||
wire [REQ_INFOW-1:0] req_info_s0;
|
||||
wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh;
|
||||
wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0;
|
||||
wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v, blend_v_s0;
|
||||
wire [NUM_REQS-1:0][`TEX_BLEND_FRAC-1:0] blend_v, blend_v_s0;
|
||||
wire [NUM_REQS-1:0][31:0] texel_v;
|
||||
|
||||
wire stall_out;
|
||||
@@ -52,7 +52,7 @@ module VX_tex_sampler #(
|
||||
end
|
||||
|
||||
wire [7:0] beta = req_blends[i][0];
|
||||
wire [8:0] alpha = `BLEND_ONE - beta;
|
||||
wire [8:0] alpha = `TEX_BLEND_ONE - beta;
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_ul (
|
||||
@@ -76,7 +76,7 @@ module VX_tex_sampler #(
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `BLEND_FRAC) + (2 * NUM_REQS * 32)),
|
||||
.DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `TEX_BLEND_FRAC) + (2 * NUM_REQS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
@@ -88,7 +88,7 @@ module VX_tex_sampler #(
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
wire [7:0] beta = blend_v_s0[i];
|
||||
wire [8:0] alpha = `BLEND_ONE - beta;
|
||||
wire [8:0] alpha = `TEX_BLEND_ONE - beta;
|
||||
|
||||
VX_tex_lerp #(
|
||||
) tex_lerp_v (
|
||||
|
||||
@@ -4,11 +4,11 @@ module VX_tex_stride #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_FORMAT_BITS-1:0] format,
|
||||
output wire [`TEX_STRIDE_BITS-1:0] log_stride
|
||||
output wire [`TEX_LGSTRIDE_BITS-1:0] log_stride
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`TEX_STRIDE_BITS-1:0] log_stride_r;
|
||||
reg [`TEX_LGSTRIDE_BITS-1:0] log_stride_r;
|
||||
|
||||
always @(*) begin
|
||||
case (format)
|
||||
|
||||
@@ -20,13 +20,13 @@ module VX_tex_unit #(
|
||||
|
||||
localparam REQ_INFOW_S = `NR_BITS + 1 + `NW_BITS + 32;
|
||||
localparam REQ_INFOW_A = `TEX_FORMAT_BITS + REQ_INFOW_S;
|
||||
localparam REQ_INFOW_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFOW_A;
|
||||
localparam REQ_INFOW_M = (2 * `NUM_THREADS * `TEX_BLEND_FRAC) + REQ_INFOW_A;
|
||||
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [1:0][`TEX_DIM_BITS-1:0] tex_dims [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0];
|
||||
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
|
||||
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
|
||||
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
|
||||
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
|
||||
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
|
||||
|
||||
// CSRs programming
|
||||
@@ -35,38 +35,46 @@ module VX_tex_unit #(
|
||||
`UNUSED_VAR (csrs_dirty)
|
||||
|
||||
for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS];
|
||||
always @(posedge clk) begin
|
||||
if (tex_csr_if.write_enable) begin
|
||||
case (tex_csr_if.write_addr)
|
||||
`CSR_TEX_ADDR(i) : begin
|
||||
`CSR_TEX(i, `TEX_STATE_ADDR) : begin
|
||||
tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_FORMAT(i) : begin
|
||||
`CSR_TEX(i, `TEX_STATE_FORMAT) : begin
|
||||
tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_WRAP(i) : begin
|
||||
tex_wraps[i][0] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS];
|
||||
tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS];
|
||||
`CSR_TEX(i, `TEX_STATE_WRAPU) : begin
|
||||
tex_wraps[i][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_FILTER(i) : begin
|
||||
`CSR_TEX(i, `TEX_STATE_WRAPV) : begin
|
||||
tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX(i, `TEX_STATE_FILTER) : begin
|
||||
tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_MIPOFF(i) : begin
|
||||
tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
`CSR_TEX(i, `TEX_STATE_WIDTH) : begin
|
||||
tex_logdims[i][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_WIDTH(i) : begin
|
||||
tex_dims[i][mip_level][0] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
`CSR_TEX(i, `TEX_STATE_HEIGHT) : begin
|
||||
tex_logdims[i][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
`CSR_TEX_HEIGHT(i) : begin
|
||||
tex_dims[i][mip_level][1] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
default: begin
|
||||
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
if (tex_csr_if.write_addr == `CSR_ADDR_BITS'(`CSR_TEX(i, `TEX_STATE_MIPOFF(j)))) begin
|
||||
`IGNORE_WARNINGS_END
|
||||
tex_mipoff[i][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
|
||||
csrs_dirty[i] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@@ -78,14 +86,15 @@ module VX_tex_unit #(
|
||||
|
||||
// mipmap attributes
|
||||
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
|
||||
wire [`NUM_THREADS-1:0][1:0][`TEX_DIM_BITS-1:0] sel_dims;
|
||||
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
|
||||
wire [`NUM_THREADS-1:0][1:0][`TEX_LOD_BITS-1:0] sel_logdims;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS];
|
||||
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
|
||||
assign sel_dims[i] = tex_dims[unit][mip_level];
|
||||
wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][`TEX_LOD_BITS-1:0];
|
||||
assign sel_mipoff[i] = tex_mipoff[unit][mip_level];
|
||||
assign sel_logdims[i][0] = (tex_logdims[unit][0] - mip_level);
|
||||
assign sel_logdims[i][1] = (tex_logdims[unit][1] - mip_level);
|
||||
end
|
||||
|
||||
// address generation
|
||||
@@ -93,8 +102,8 @@ module VX_tex_unit #(
|
||||
wire mem_req_valid;
|
||||
wire [`NUM_THREADS-1:0] mem_req_tmask;
|
||||
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
|
||||
wire [`TEX_STRIDE_BITS-1:0] mem_req_stride;
|
||||
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] mem_req_blends;
|
||||
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
|
||||
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
|
||||
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
|
||||
wire [REQ_INFOW_A-1:0] mem_req_info;
|
||||
wire mem_req_ready;
|
||||
@@ -113,16 +122,16 @@ module VX_tex_unit #(
|
||||
.req_format (tex_format[tex_req_if.unit]),
|
||||
.req_filter (tex_filter[tex_req_if.unit]),
|
||||
.req_wraps (tex_wraps[tex_req_if.unit]),
|
||||
.req_baseaddr (tex_baddr[tex_req_if.unit]),
|
||||
.req_baseaddr(tex_baddr[tex_req_if.unit]),
|
||||
.req_mipoff (sel_mipoff),
|
||||
.req_logdims (sel_dims),
|
||||
.req_logdims(sel_logdims),
|
||||
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
|
||||
.req_ready (tex_req_if.ready),
|
||||
|
||||
.rsp_valid (mem_req_valid),
|
||||
.rsp_tmask (mem_req_tmask),
|
||||
.rsp_filter (mem_req_filter),
|
||||
.rsp_stride (mem_req_stride),
|
||||
.rsp_lgstride(mem_req_lgstride),
|
||||
.rsp_addr (mem_req_addr),
|
||||
.rsp_blends (mem_req_blends),
|
||||
.rsp_info (mem_req_info),
|
||||
@@ -142,8 +151,8 @@ module VX_tex_unit #(
|
||||
.REQ_INFOW (REQ_INFOW_M),
|
||||
.NUM_REQS (`NUM_THREADS)
|
||||
) tex_mem (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// memory interface
|
||||
.dcache_req_if (dcache_req_if),
|
||||
@@ -153,7 +162,7 @@ module VX_tex_unit #(
|
||||
.req_valid (mem_req_valid),
|
||||
.req_tmask (mem_req_tmask),
|
||||
.req_filter(mem_req_filter),
|
||||
.req_stride(mem_req_stride),
|
||||
.req_lgstride(mem_req_lgstride),
|
||||
.req_addr (mem_req_addr),
|
||||
.req_info ({mem_req_blends, mem_req_info}),
|
||||
.req_ready (mem_req_ready),
|
||||
@@ -168,7 +177,7 @@ module VX_tex_unit #(
|
||||
|
||||
// apply sampler
|
||||
|
||||
wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends;
|
||||
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends;
|
||||
wire [`TEX_FORMAT_BITS-1:0] rsp_format;
|
||||
wire [REQ_INFOW_S-1:0] rsp_info;
|
||||
|
||||
@@ -205,13 +214,12 @@ module VX_tex_unit #(
|
||||
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
|
||||
if (csrs_dirty[i]) begin
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_mipoff[0]=%0h\n", $time, CORE_ID, i, tex_mipoff[i][0]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_width[0]=%0h\n", $time, CORE_ID, i, tex_dims[i][0][0]);
|
||||
dpi_trace("%d: core%0d-tex-csr: tex%0d_height[0]=%0h\n", $time, CORE_ID, i, tex_dims[i][0][1]);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -4,19 +4,19 @@ module VX_tex_wrap #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire [`TEX_WRAP_BITS-1:0] wrap_i,
|
||||
input wire [31:0] coord_i,
|
||||
output wire [`FIXED_FRAC-1:0] coord_o
|
||||
input wire [`TEX_FXD_BITS-1:0] coord_i,
|
||||
output wire [`TEX_FXD_FRAC-1:0] coord_o
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`FIXED_FRAC-1:0] coord_r;
|
||||
reg [`TEX_FXD_FRAC-1:0] coord_r;
|
||||
|
||||
wire [`FIXED_FRAC-1:0] clamp;
|
||||
wire [`TEX_FXD_FRAC-1:0] clamp;
|
||||
|
||||
VX_tex_sat #(
|
||||
.IN_W (32),
|
||||
.OUT_W (`FIXED_FRAC)
|
||||
.IN_W (`TEX_FXD_BITS),
|
||||
.OUT_W (`TEX_FXD_FRAC)
|
||||
) sat_fx (
|
||||
.data_in (coord_i),
|
||||
.data_out (clamp)
|
||||
@@ -27,9 +27,9 @@ module VX_tex_wrap #(
|
||||
`TEX_WRAP_CLAMP:
|
||||
coord_r = clamp;
|
||||
`TEX_WRAP_MIRROR:
|
||||
coord_r = coord_i[`FIXED_FRAC-1:0] ^ {`FIXED_FRAC{coord_i[`FIXED_FRAC]}};
|
||||
coord_r = coord_i[`TEX_FXD_FRAC-1:0] ^ {`TEX_FXD_FRAC{coord_i[`TEX_FXD_FRAC]}};
|
||||
default: //`TEX_WRAP_REPEAT
|
||||
coord_r = coord_i[`FIXED_FRAC-1:0];
|
||||
coord_r = coord_i[`TEX_FXD_FRAC-1:0];
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
|
||||
|
||||
Reference in New Issue
Block a user