diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 9c1ae729..2d01951e 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -91,6 +91,8 @@ `define LTRIM(x, s) x[s-1:0] +`define ADDER_CARRY_WIDTH(x, y) `MAX(x, `MIN(x, y)+1); + `define PRINT_ARRAY1D(a, m) \ $write("{"); \ for (integer i = (m-1); i >= 0; --i) begin \ diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index fdc6fd42..d8e372cf 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -36,6 +36,8 @@ module VX_tex_addr #( `UNUSED_PARAM (CORE_ID) + localparam PITCH_BITS = `ADDER_CARRY_WIDTH(`TEX_DIM_BITS, `TEX_STRIDE_BITS); + wire valid_s0; wire [NUM_REQS-1:0] tmask_s0; wire [`TEX_FILTER_BITS-1:0] filter_s0; @@ -44,7 +46,8 @@ module VX_tex_addr #( wire [NUM_REQS-1:0][1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0; wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0; - wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0; + wire [NUM_REQS-1:0][PITCH_BITS-1:0] log_pitch, log_pitch_s0; + wire [NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_height, log_height_s0; wire stall_out; @@ -61,8 +64,9 @@ module VX_tex_addr #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 2; ++j) begin - wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i]; - wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i]; + wire [`FIXED_FRAC-1:0] delta = (`FIXED_HALF >> req_logdims[i][j]); + wire [31:0] coord_lo = req_filter ? (req_coords[j][i] - 32'(delta)) : req_coords[j][i]; + wire [31:0] coord_hi = req_filter ? (req_coords[j][i] + 32'(delta)) : req_coords[j][i]; VX_tex_wrap #( .CORE_ID (CORE_ID) @@ -79,41 +83,45 @@ module VX_tex_addr #( .coord_i (coord_hi), .coord_o (clamped_hi[i][j]) ); - end - assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); + end + assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride); + assign log_height[i] = req_logdims[i][1]; + assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); end VX_pipe_register #( - .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (PITCH_BITS + `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}), - .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) + .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, log_pitch, log_height, mip_addr, clamped_lo, clamped_hi}), + .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_pitch_s0, log_height_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) ); // addresses generation - wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo; - wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi; + wire [NUM_REQS-1:0][(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scaled_u_lo, scaled_u_hi; + wire [NUM_REQS-1:0][`FIXED_INT-1:0] scaled_v_lo, scaled_v_hi; wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends; wire [NUM_REQS-1:0][3:0][31:0] addr; for (genvar i = 0; i < NUM_REQS; ++i) begin - for (genvar j = 0; j < 2; ++j) begin - assign scaled_lo[i][j] = `FIXED_INT'(clamped_lo_s0[i][j] >> ((`FIXED_FRAC) - log_dims_s0[i][j])); - assign scaled_hi[i][j] = `FIXED_INT'(clamped_hi_s0[i][j] >> ((`FIXED_FRAC) - log_dims_s0[i][j])); - assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); + assign scaled_u_lo[i] = scale_to_pitch(clamped_lo_s0[i][0], log_pitch_s0[i]); + assign scaled_u_hi[i] = scale_to_pitch(clamped_hi_s0[i][0], log_pitch_s0[i]); + assign scaled_v_lo[i] = scale_to_height(clamped_lo_s0[i][1], log_height_s0[i]); + assign scaled_v_hi[i] = scale_to_height(clamped_hi_s0[i][1], log_height_s0[i]); + for (genvar j = 0; j < 2; ++j) begin + assign blends[i][j] = filter_s0 ? clamped_lo_s0[i][j][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); end end for (genvar i = 0; i < NUM_REQS; ++i) begin - assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0; - assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0; - assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0; - assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0; + assign addr[i][0] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]); + assign addr[i][1] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_lo[i]) << log_pitch_s0[i]); + assign addr[i][2] = mip_addr_s0[i] + 32'(scaled_u_lo[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]); + assign addr[i][3] = mip_addr_s0[i] + 32'(scaled_u_hi[i]) + (32'(scaled_v_hi[i]) << log_pitch_s0[i]); end assign stall_out = rsp_valid && ~rsp_ready; @@ -131,9 +139,10 @@ module VX_tex_addr #( assign req_ready = ~stall_out; - `ifdef DBG_PRINT_TEX +`ifdef DBG_PRINT_TEX wire [`NW_BITS-1:0] rsp_wid; wire [31:0] rsp_PC; + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; always @(posedge clk) begin @@ -146,4 +155,22 @@ module VX_tex_addr #( end `endif +function logic [(`FIXED_INT+`TEX_STRIDE_BITS)-1:0] scale_to_pitch (input logic [`FIXED_FRAC-1:0] src, + input logic [PITCH_BITS-1:0] dim); +`IGNORE_UNUSED_BEGIN + logic [(`FIXED_BITS+`TEX_STRIDE_BITS)-1:0] out; +`IGNORE_UNUSED_END + out = (`FIXED_BITS+`TEX_STRIDE_BITS)'(src) << dim; + return out[`FIXED_FRAC +: (`FIXED_INT+`TEX_STRIDE_BITS)]; +endfunction + +function logic [`FIXED_INT-1:0] scale_to_height (input logic [`FIXED_FRAC-1:0] src, + input logic [`TEX_DIM_BITS-1:0] dim); +`IGNORE_UNUSED_BEGIN + logic [`FIXED_BITS-1:0] out; +`IGNORE_UNUSED_END + out = `FIXED_BITS'(src) << dim; + return out[`FIXED_FRAC +: `FIXED_INT]; +endfunction + endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_define.vh b/hw/rtl/tex_unit/VX_tex_define.vh index 5ab9838e..16272fc9 100644 --- a/hw/rtl/tex_unit/VX_tex_define.vh +++ b/hw/rtl/tex_unit/VX_tex_define.vh @@ -3,11 +3,12 @@ `include "VX_define.vh" -`define FIXED_FRAC 20 -`define FIXED_INT (32 - `FIXED_FRAC) -`define FIXED_ONE (2 ** `FIXED_FRAC) -`define FIXED_HALF (`FIXED_ONE >> 1) -`define FIXED_MASK (`FIXED_ONE - 1) +`define FIXED_BITS 32 +`define FIXED_FRAC 20 +`define FIXED_INT (`FIXED_BITS - `FIXED_FRAC) +`define FIXED_ONE (2 ** `FIXED_FRAC) +`define FIXED_HALF (`FIXED_ONE >> 1) +`define FIXED_MASK (`FIXED_ONE - 1) `define TEX_ADDR_BITS 32 `define TEX_FORMAT_BITS 3 diff --git a/hw/rtl/tex_unit/VX_tex_format.v b/hw/rtl/tex_unit/VX_tex_format.v index 951811f4..91e0e6f8 100644 --- a/hw/rtl/tex_unit/VX_tex_format.v +++ b/hw/rtl/tex_unit/VX_tex_format.v @@ -13,6 +13,12 @@ module VX_tex_format #( always @(*) begin case (format) + `TEX_FORMAT_R8G8B8A8: begin + texel_out_r[07:00] = texel_in[7:0]; + texel_out_r[15:08] = texel_in[15:8]; + texel_out_r[23:16] = texel_in[23:16]; + texel_out_r[31:24] = texel_in[31:24]; + end `TEX_FORMAT_R5G6B5: begin texel_out_r[07:00] = {texel_in[15:11], texel_in[15:13]}; texel_out_r[15:08] = {texel_in[10:5], texel_in[10:9]}; @@ -31,24 +37,18 @@ module VX_tex_format #( texel_out_r[23:16] = texel_in[7:0]; texel_out_r[31:24] = texel_in[15:8]; end - `TEX_FORMAT_A8: begin - texel_out_r[07:00] = 0; - texel_out_r[15:08] = 0; - texel_out_r[23:16] = 0; - texel_out_r[31:24] = texel_in[7:0]; - end `TEX_FORMAT_L8: begin texel_out_r[07:00] = texel_in[7:0]; texel_out_r[15:08] = texel_in[7:0]; texel_out_r[23:16] = texel_in[7:0]; texel_out_r[31:24] = 8'hff; end - // `TEX_FORMAT_R8G8B8A8 - default: begin - texel_out_r[07:00] = texel_in[7:0]; - texel_out_r[15:08] = texel_in[15:8]; - texel_out_r[23:16] = texel_in[23:16]; - texel_out_r[31:24] = texel_in[31:24]; + //`TEX_FORMAT_A8 + default: begin + texel_out_r[07:00] = 0; + texel_out_r[15:08] = 0; + texel_out_r[23:16] = 0; + texel_out_r[31:24] = texel_in[7:0]; end endcase end diff --git a/hw/rtl/tex_unit/VX_tex_lerp.v b/hw/rtl/tex_unit/VX_tex_lerp.v index 5495688e..6dce57e3 100644 --- a/hw/rtl/tex_unit/VX_tex_lerp.v +++ b/hw/rtl/tex_unit/VX_tex_lerp.v @@ -1,17 +1,16 @@ `include "VX_tex_define.vh" -module VX_tex_lerp #( -) ( - input wire [`BLEND_FRAC-1:0] blend, - input wire [31:0] in1, - input wire [31:0] in2, - output wire [31:0] out +module VX_tex_lerp ( + input wire [3:0][7:0] in1, + input wire [3:0][7:0] in2, + input wire [8:0] alpha, + input wire [7:0] beta, + output wire [3:0][7:0] out ); for (genvar i = 0; i < 4; ++i) begin - wire [8:0] blend_m1 = `BLEND_ONE - blend; - wire [16:0] sum = in1[i*8+:8] * blend_m1 + in2[i*8+:8] * blend; + wire [16:0] sum = in1[i] * alpha + in2[i] * beta; `UNUSED_VAR (sum) - assign out[i*8+:8] = sum[15:8]; + assign out[i] = sum[15:8]; end endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index e5820f72..e7779df5 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -51,20 +51,25 @@ module VX_tex_sampler #( ); end + wire [7:0] beta = req_blends[i][0]; + wire [8:0] alpha = `BLEND_ONE - beta; + VX_tex_lerp #( ) tex_lerp_ul ( - .blend (req_blends[i][0]), - .in1 (fmt_texels[0]), - .in2 (fmt_texels[1]), - .out (texel_ul[i]) + .in1 (fmt_texels[0]), + .in2 (fmt_texels[1]), + .alpha (alpha), + .beta (beta), + .out (texel_ul[i]) ); VX_tex_lerp #( ) tex_lerp_uh ( - .blend (req_blends[i][0]), - .in1 (fmt_texels[2]), - .in2 (fmt_texels[3]), - .out (texel_uh[i]) + .in1 (fmt_texels[2]), + .in2 (fmt_texels[3]), + .alpha (alpha), + .beta (beta), + .out (texel_uh[i]) ); assign blend_v[i] = req_blends[i][1]; @@ -82,12 +87,16 @@ module VX_tex_sampler #( ); for (genvar i = 0; i < NUM_REQS; i++) begin + wire [7:0] beta = blend_v_s0[i]; + wire [8:0] alpha = `BLEND_ONE - beta; + VX_tex_lerp #( ) tex_lerp_v ( - .blend (blend_v_s0[i]), - .in1 (texel_ul_s0[i]), - .in2 (texel_uh_s0[i]), - .out (texel_v[i]) + .in1 (texel_ul_s0[i]), + .in2 (texel_uh_s0[i]), + .alpha (alpha), + .beta (beta), + .out (texel_v[i]) ); end @@ -108,7 +117,6 @@ module VX_tex_sampler #( assign req_ready = ~stall_out; `ifdef DBG_PRINT_TEX - wire [`NW_BITS-1:0] req_wid, rsp_wid; wire [31:0] req_PC, rsp_PC; diff --git a/hw/rtl/libs/VX_sat_fx.v b/hw/rtl/tex_unit/VX_tex_sat.v similarity index 93% rename from hw/rtl/libs/VX_sat_fx.v rename to hw/rtl/tex_unit/VX_tex_sat.v index 0a7abaf2..f8e20d08 100644 --- a/hw/rtl/libs/VX_sat_fx.v +++ b/hw/rtl/tex_unit/VX_tex_sat.v @@ -1,6 +1,6 @@ `include "VX_platform.vh" -module VX_sat_fx #( +module VX_tex_sat #( parameter IN_W = 1, parameter OUT_W = 1, parameter MODEL = 1 @@ -11,11 +11,11 @@ module VX_sat_fx #( `STATIC_ASSERT(((OUT_W+1) < IN_W), ("invalid parameter")) if (MODEL == 1) begin - assign data_out = data_in[IN_W-1] ? OUT_W'(0) : ((data_in > {OUT_W{1'b1}}) ? {OUT_W{1'b1}} : OUT_W'(data_in)); - end else begin wire [OUT_W-1:0] underflow_mask = {OUT_W{~data_in[IN_W-1]}}; wire [OUT_W-1:0] overflow_mask = {OUT_W{(| data_in[IN_W-2:OUT_W])}}; - assign data_out = (data_in[OUT_W-1:0] | overflow_mask) & underflow_mask; + assign data_out = (data_in[OUT_W-1:0] | overflow_mask) & underflow_mask; + end else begin + assign data_out = data_in[IN_W-1] ? OUT_W'(0) : ((data_in > {OUT_W{1'b1}}) ? {OUT_W{1'b1}} : OUT_W'(data_in)); end endmodule \ No newline at end of file diff --git a/hw/rtl/tex_unit/VX_tex_wrap.v b/hw/rtl/tex_unit/VX_tex_wrap.v index d9193077..8cc7b2f5 100644 --- a/hw/rtl/tex_unit/VX_tex_wrap.v +++ b/hw/rtl/tex_unit/VX_tex_wrap.v @@ -14,7 +14,7 @@ module VX_tex_wrap #( wire [`FIXED_FRAC-1:0] clamp; - VX_sat_fx #( + VX_tex_sat #( .IN_W (32), .OUT_W (`FIXED_FRAC) ) sat_fx (