Files
vortex/hw/rtl/tex_unit/VX_tex_unit.sv
2021-12-02 10:22:21 -08:00

287 lines
11 KiB
Systemverilog

`include "VX_tex_define.vh"
module VX_tex_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_tex_if.master perf_tex_if,
`endif
// Texture unit <-> Memory Unit
VX_dcache_req_if.master dcache_req_if,
VX_dcache_rsp_if.slave dcache_rsp_if,
// Inputs
VX_tex_req_if.slave tex_req_if,
VX_tex_csr_if.slave tex_csr_if,
// Outputs
VX_tex_rsp_if.master tex_rsp_if
);
localparam REQ_INFO_W = 64 + `NR_BITS + 1 + `NW_BITS + 32;
localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC);
reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0];
reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0];
// CSRs programming
reg csrs_dirty [`NUM_TEX_UNITS-1:0];
`UNUSED_VAR (csrs_dirty)
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
`CSR_TEX_UNIT: begin
csr_tex_unit <= tex_csr_if.write_data[$clog2(`NUM_TEX_UNITS)-1:0];
end
`CSR_TEX_ADDR: begin
tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FORMAT: begin
tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPU: begin
tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPV: begin
tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FILTER: begin
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WIDTH: begin
tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_HEIGHT: begin
tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin
`IGNORE_WARNINGS_END
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
end
end
endcase
end
if (reset || (tex_req_if.valid && tex_req_if.ready)) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
csrs_dirty[i] <= 0;
end
end
end
// mipmap attributes
wire [`NUM_THREADS-1:0][`TEX_LOD_BITS-1:0] mip_level;
wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff;
wire [`NUM_THREADS-1:0][1:0][`TEX_LOD_BITS-1:0] sel_logdims;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0];
assign mip_level[i] = tex_req_if.lod[i][`TEX_LOD_BITS-1:0];
assign sel_mipoff[i] = tex_mipoff[unit][mip_level[i]];
assign sel_logdims[i][0] = tex_logdims[unit][0];
assign sel_logdims[i][1] = tex_logdims[unit][1];
end
// address generation
wire mem_req_valid;
wire [`NUM_THREADS-1:0] mem_req_tmask;
wire [`TEX_FILTER_BITS-1:0] mem_req_filter;
wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride;
wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr;
wire [`NUM_THREADS-1:0][31:0] mem_req_baseaddr;
wire [(`TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_req_info;
wire mem_req_ready;
VX_tex_addr #(
.CORE_ID (CORE_ID),
.REQ_INFOW (`TEX_FORMAT_BITS + REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_addr (
.clk (clk),
.reset (reset),
.req_valid (tex_req_if.valid),
.req_tmask (tex_req_if.tmask),
.req_coords (tex_req_if.coords),
.req_format (tex_format[tex_req_if.unit]),
.req_filter (tex_filter[tex_req_if.unit]),
.req_wraps (tex_wraps[tex_req_if.unit]),
.req_baseaddr(tex_baddr[tex_req_if.unit]),
.mip_level (mip_level),
.req_mipoff (sel_mipoff),
.req_logdims(sel_logdims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
.rsp_tmask (mem_req_tmask),
.rsp_filter (mem_req_filter),
.rsp_lgstride(mem_req_lgstride),
.rsp_baseaddr(mem_req_baseaddr),
.rsp_addr (mem_req_addr),
.rsp_blends (mem_req_blends),
.rsp_info (mem_req_info),
.rsp_ready (mem_req_ready)
);
// retrieve texel values from memory
wire mem_rsp_valid;
wire [`NUM_THREADS-1:0] mem_rsp_tmask;
wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data;
wire [(BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_rsp_info;
wire mem_rsp_ready;
VX_tex_mem #(
.CORE_ID (CORE_ID),
.REQ_INFOW (BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_mem (
.clk (clk),
.reset (reset),
// memory interface
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
// inputs
.req_valid (mem_req_valid),
.req_tmask (mem_req_tmask),
.req_filter(mem_req_filter),
.req_lgstride(mem_req_lgstride),
.req_baseaddr(mem_req_baseaddr),
.req_addr (mem_req_addr),
.req_info ({mem_req_blends, mem_req_info}),
.req_ready (mem_req_ready),
// outputs
.rsp_valid (mem_rsp_valid),
.rsp_tmask (mem_rsp_tmask),
.rsp_data (mem_rsp_data),
.rsp_info (mem_rsp_info),
.rsp_ready (mem_rsp_ready)
);
// apply sampler
VX_tex_sampler #(
.CORE_ID (CORE_ID),
.REQ_INFOW (REQ_INFO_W),
.NUM_REQS (`NUM_THREADS)
) tex_sampler (
.clk (clk),
.reset (reset),
// inputs
.req_valid (mem_rsp_valid),
.req_tmask (mem_rsp_tmask),
.req_data (mem_rsp_data),
.req_blends (mem_rsp_info[(REQ_INFO_W+`TEX_FORMAT_BITS) +: BLEND_FRAC_W]),
.req_format (mem_rsp_info[REQ_INFO_W +: `TEX_FORMAT_BITS]),
.req_info (mem_rsp_info[0 +: REQ_INFO_W]),
.req_ready (mem_rsp_ready),
// outputs
.rsp_valid (tex_rsp_if.valid),
.rsp_tmask (tex_rsp_if.tmask),
.rsp_data (tex_rsp_if.data),
.rsp_info ({tex_rsp_if.uuid, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
.rsp_ready (tex_rsp_if.ready)
);
`ifdef PERF_ENABLE
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle;
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_rsp_per_cycle;
wire [`NUM_THREADS-1:0] perf_mem_req_per_mask = dcache_req_if.valid & dcache_req_if.ready;
wire [`NUM_THREADS-1:0] perf_mem_rsp_per_mask = dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_if.valid & dcache_rsp_if.ready}};
`POP_COUNT(perf_mem_req_per_cycle, perf_mem_req_per_mask);
`POP_COUNT(perf_mem_rsp_per_cycle, perf_mem_rsp_per_mask);
reg [`PERF_CTR_BITS-1:0] perf_pending_reads;
wire [$clog2(`NUM_THREADS+1)+1-1:0] perf_pending_reads_cycle = perf_mem_req_per_cycle - perf_mem_rsp_per_cycle;
always @(posedge clk) begin
if (reset) begin
perf_pending_reads <= 0;
end else begin
perf_pending_reads <= perf_pending_reads + `PERF_CTR_BITS'($signed(perf_pending_reads_cycle));
end
end
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
reg [`PERF_CTR_BITS-1:0] perf_mem_latency;
always @(posedge clk) begin
if (reset) begin
perf_mem_reads <= 0;
perf_mem_latency <= 0;
end else begin
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'(perf_mem_req_per_cycle);
perf_mem_latency <= perf_mem_latency + `PERF_CTR_BITS'(perf_pending_reads);
end
end
assign perf_tex_if.mem_reads = perf_mem_reads;
assign perf_tex_if.mem_latency = perf_mem_latency;
`endif
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin
if (tex_req_if.valid && tex_req_if.ready) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
if (csrs_dirty[i]) begin
dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]);
end
end
dpi_trace("%d: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
`TRACE_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
dpi_trace(", v=");
`TRACE_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
dpi_trace("\n");
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
dpi_trace("%d: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`TRACE_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
dpi_trace("\n");
end
end
`endif
endmodule