From 668cfb5da40aa51a82b2e5d0d0868e9834da6a27 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 14 Oct 2021 13:00:17 -0700 Subject: [PATCH] text_addr optimization --- hw/rtl/VX_platform.vh | 2 -- hw/rtl/tex_unit/VX_tex_addr.v | 66 ++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 2d01951e..9c1ae729 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -91,8 +91,6 @@ `define LTRIM(x, s) x[s-1:0] -`define ADDER_CARRY_WIDTH(x, y) `MAX(x, `MIN(x, y)+1); - `define PRINT_ARRAY1D(a, m) \ $write("{"); \ for (integer i = (m-1); i >= 0; --i) begin \ diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index a35b6945..40ef87a6 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -36,7 +36,10 @@ module VX_tex_addr #( `UNUSED_PARAM (CORE_ID) - localparam PITCH_BITS = `ADDER_CARRY_WIDTH(`TEX_DIM_BITS, `TEX_STRIDE_BITS); + localparam PITCH_BITS = `MAX(`TEX_DIM_BITS, `TEX_STRIDE_BITS) + 1; + localparam SCALED_U_W = `FIXED_INT + `TEX_STRIDE_BITS; + localparam SCALED_X_W = (2 * `FIXED_INT); + localparam SCALED_V_W = SCALED_X_W + `TEX_STRIDE_BITS; wire valid_s0; wire [NUM_REQS-1:0] tmask_s0; @@ -46,8 +49,8 @@ module VX_tex_addr #( wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0; wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0; + wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0; wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0; - wire [NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_height, log_height_s0; wire stall_out; @@ -83,45 +86,53 @@ module VX_tex_addr #( .coord_i (coord_hi), .coord_o (clamped_hi[i][j]) ); - end - assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride); - assign log_height[i] = req_logdims[i][1]; - assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); + end + assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride); + assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); end VX_pipe_register #( - .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, log_height, mip_addr, clamped_lo, clamped_hi}), - .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_height_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) + .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, req_logdims, mip_addr, clamped_lo, clamped_hi}), + .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) ); // addresses generation - wire [NUM_REQS-1:0][(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scaled_u_lo, scaled_u_hi; - wire [NUM_REQS-1:0][`FIXED_INT-1:0] scaled_v_lo, scaled_v_hi; + wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo; + wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi; wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends; wire [NUM_REQS-1:0][3:0][31:0] addr; for (genvar i = 0; i < NUM_REQS; ++i) begin - assign scaled_u_lo[i] = scale_to_pitch(clamped_lo_s0[i][0], log_pitch_s0[i]); - assign scaled_u_hi[i] = scale_to_pitch(clamped_hi_s0[i][0], log_pitch_s0[i]); - assign scaled_v_lo[i] = scale_to_height(clamped_lo_s0[i][1], log_height_s0[i]); - assign scaled_v_hi[i] = scale_to_height(clamped_hi_s0[i][1], log_height_s0[i]); - for (genvar j = 0; j < 2; ++j) begin + for (genvar j = 0; j < 2; ++j) begin + assign scaled_lo[i][j] = scale_to_dim(clamped_lo_s0[i][j], log_dims_s0[i][j]); + assign scaled_hi[i][j] = scale_to_dim(clamped_hi_s0[i][j], log_dims_s0[i][j]); assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); end end + `UNUSED_VAR (log_pitch_s0) + for (genvar i = 0; i < NUM_REQS; ++i) begin - assign addr[i][0] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]); - assign addr[i][1] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]); - assign addr[i][2] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]); - assign addr[i][3] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]); + wire [SCALED_U_W-1:0] offset_u_lo = SCALED_U_W'(scaled_lo[i][0]) << log_stride_s0; + wire [SCALED_U_W-1:0] offset_u_hi = SCALED_U_W'(scaled_hi[i][0]) << log_stride_s0; + + wire [SCALED_V_W-1:0] offset_v_lo = SCALED_V_W'(scaled_lo[i][1]) << log_pitch_s0[i]; + wire [SCALED_V_W-1:0] offset_v_hi = SCALED_V_W'(scaled_hi[i][1]) << log_pitch_s0[i]; + + wire [31:0] base_addr_lo = mip_addr_s0[i] + 32'(offset_v_lo); + wire [31:0] base_addr_hi = mip_addr_s0[i] + 32'(offset_v_hi); + + assign addr[i][0] = base_addr_lo + 32'(offset_u_lo); + assign addr[i][1] = base_addr_lo + 32'(offset_u_hi); + assign addr[i][2] = base_addr_hi + 32'(offset_u_lo); + assign addr[i][3] = base_addr_hi + 32'(offset_u_hi); end assign stall_out = rsp_valid && ~rsp_ready; @@ -142,7 +153,7 @@ module VX_tex_addr #( `ifdef DBG_PRINT_TEX wire [`NW_BITS-1:0] rsp_wid; wire [31:0] rsp_PC; - + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; always @(posedge clk) begin @@ -155,17 +166,8 @@ module VX_tex_addr #( end `endif -function logic [(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scale_to_pitch (input logic [`FIXED_FRAC-1:0] src, - input logic [PITCH_BITS-1:0] dim); -`IGNORE_WARNINGS_BEGIN - logic [(`FIXED_BITS+`TEX_STRIDE_BITS)-1:0] out; -`IGNORE_WARNINGS_END - out = (`FIXED_BITS+`TEX_STRIDE_BITS)'(src) << dim; - return out[`FIXED_FRAC +: (`FIXED_INT+`TEX_STRIDE_BITS)]; -endfunction - -function logic [`FIXED_INT-1:0] scale_to_height (input logic [`FIXED_FRAC-1:0] src, - input logic [`TEX_DIM_BITS-1:0] dim); +function logic [`FIXED_INT-1:0] scale_to_dim (input logic [`FIXED_FRAC-1:0] src, + input logic [`TEX_DIM_BITS-1:0] dim); `IGNORE_WARNINGS_BEGIN logic [`FIXED_BITS-1:0] out; `IGNORE_WARNINGS_END