diff --git a/hw/rtl/VX_lsu_unit.sv b/hw/rtl/VX_lsu_unit.sv index e0ed73b5..ec8fca80 100644 --- a/hw/rtl/VX_lsu_unit.sv +++ b/hw/rtl/VX_lsu_unit.sv @@ -44,6 +44,7 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] lsu_addr_type, req_addr_type; + // full address calculation wire [`NUM_THREADS-1:0][31:0] full_addr; for (genvar i = 0; i < `NUM_THREADS; i++) begin assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset; diff --git a/hw/rtl/VX_writeback.sv b/hw/rtl/VX_writeback.sv index 5b67256c..f4471046 100644 --- a/hw/rtl/VX_writeback.sv +++ b/hw/rtl/VX_writeback.sv @@ -64,7 +64,8 @@ module VX_writeback #( VX_stream_arbiter #( .NUM_REQS (NUM_RSPS), .DATAW (DATAW), - .TYPE ("P") + .BUFFERED (1), + .TYPE ("R") ) rsp_arb ( .clk (clk), .reset (reset), diff --git a/hw/rtl/tex_unit/VX_tex_addr.sv b/hw/rtl/tex_unit/VX_tex_addr.sv index c33cc47a..87da9cef 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.sv +++ b/hw/rtl/tex_unit/VX_tex_addr.sv @@ -17,6 +17,7 @@ module VX_tex_addr #( input wire [`TEX_FILTER_BITS-1:0] req_filter, input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps, input wire [`TEX_ADDR_BITS-1:0] req_baseaddr, + input wire [NUM_REQS-1:0][`TEX_LOD_BITS-1:0] mip_level, input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff, input wire [NUM_REQS-1:0][1:0][`TEX_LOD_BITS-1:0] req_logdims, input wire [REQ_INFOW-1:0] req_info, @@ -28,6 +29,7 @@ module VX_tex_addr #( output wire [NUM_REQS-1:0] rsp_tmask, output wire [`TEX_FILTER_BITS-1:0] rsp_filter, output wire [`TEX_LGSTRIDE_BITS-1:0] rsp_lgstride, + output wire [NUM_REQS-1:0][31:0] rsp_baseaddr, output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr, output wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends, output wire [REQ_INFOW-1:0] rsp_info, @@ -38,6 +40,7 @@ module VX_tex_addr #( localparam SHIFT_BITS = $clog2(`TEX_FXD_FRAC+1); localparam PITCH_BITS = `MAX(`TEX_LOD_BITS, `TEX_LGSTRIDE_BITS) + 1; + localparam SCALED_DIM = `TEX_FXD_FRAC + `TEX_DIM_BITS; localparam SCALED_X_W = `TEX_DIM_BITS + `TEX_BLEND_FRAC; localparam OFFSET_U_W = `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX; localparam OFFSET_V_W = `TEX_DIM_BITS + `TEX_DIM_BITS + `TEX_LGSTRIDE_MAX; @@ -69,7 +72,7 @@ module VX_tex_addr #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 2; ++j) begin - wire [`TEX_FXD_FRAC-1:0] delta = (`TEX_FXD_HALF >> req_logdims[i][j]); + wire [`TEX_FXD_FRAC-1:0] delta = `TEX_FXD_FRAC'((SCALED_DIM'(`TEX_FXD_HALF) << mip_level[i]) >> req_logdims[i][j]); wire [`TEX_FXD_BITS-1:0] coord_lo = req_filter ? (req_coords[j][i] - `TEX_FXD_BITS'(delta)) : req_coords[j][i]; wire [`TEX_FXD_BITS-1:0] coord_hi = req_filter ? (req_coords[j][i] + `TEX_FXD_BITS'(delta)) : req_coords[j][i]; @@ -89,14 +92,14 @@ module VX_tex_addr #( .coord_o (clamped_hi[i][j]) ); - assign dim_shift[i][j] = (`TEX_FXD_FRAC - `TEX_BLEND_FRAC - req_logdims[i][j]); + assign dim_shift[i][j] = (`TEX_FXD_FRAC - `TEX_BLEND_FRAC - (req_logdims[i][j] - mip_level[i])); end - assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0]) + PITCH_BITS'(log_stride); - assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); + assign log_pitch[i] = PITCH_BITS'(req_logdims[i][0] - mip_level[i]) + PITCH_BITS'(log_stride); + assign mip_addr[i] = req_baseaddr + `TEX_ADDR_BITS'(req_mipoff[i]); end VX_pipe_register #( - .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * SHIFT_BITS + 32 + 2 * 2 * `TEX_FXD_FRAC)), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + REQ_INFOW + NUM_REQS * (PITCH_BITS + 2 * SHIFT_BITS + `TEX_ADDR_BITS + 2 * 2 * `TEX_FXD_FRAC)), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -114,8 +117,6 @@ module VX_tex_addr #( wire [NUM_REQS-1:0][OFFSET_U_W-1:0] offset_u_hi; wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_lo; wire [NUM_REQS-1:0][OFFSET_V_W-1:0] offset_v_hi; - wire [NUM_REQS-1:0][31:0] base_addr_lo; - wire [NUM_REQS-1:0][31:0] base_addr_hi; wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] blends; wire [NUM_REQS-1:0][3:0][31:0] addr; @@ -134,26 +135,23 @@ module VX_tex_addr #( assign offset_v_lo[i] = OFFSET_V_W'(scaled_lo[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i]; assign offset_v_hi[i] = OFFSET_V_W'(scaled_hi[i][1][`TEX_BLEND_FRAC +: `TEX_DIM_BITS]) << log_pitch_s0[i]; - assign base_addr_lo[i] = mip_addr_s0[i] + 32'(offset_v_lo[i]); - assign base_addr_hi[i] = mip_addr_s0[i] + 32'(offset_v_hi[i]); - - assign addr[i][0] = base_addr_lo[i] + 32'(offset_u_lo[i]); - assign addr[i][1] = base_addr_lo[i] + 32'(offset_u_hi[i]); - assign addr[i][2] = base_addr_hi[i] + 32'(offset_u_lo[i]); - assign addr[i][3] = base_addr_hi[i] + 32'(offset_u_hi[i]); + assign addr[i][0] = 32'(offset_v_lo[i]) + 32'(offset_u_lo[i]); + assign addr[i][1] = 32'(offset_v_lo[i]) + 32'(offset_u_hi[i]); + assign addr[i][2] = 32'(offset_v_hi[i]) + 32'(offset_u_lo[i]); + assign addr[i][3] = 32'(offset_v_hi[i]) + 32'(offset_u_hi[i]); end assign stall_out = rsp_valid && ~rsp_ready; VX_pipe_register #( - .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `TEX_BLEND_FRAC) + REQ_INFOW), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_LGSTRIDE_BITS + (NUM_REQS * 32) + (NUM_REQS * 4 * 32) + (2 * NUM_REQS * `TEX_BLEND_FRAC) + REQ_INFOW), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, addr, blends, req_info_s0}), - .data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_lgstride, rsp_addr, rsp_blends, rsp_info}) + .data_in ({valid_s0, tmask_s0, filter_s0, log_stride_s0, mip_addr_s0, addr, blends, req_info_s0}), + .data_out ({rsp_valid, rsp_tmask, rsp_filter, rsp_lgstride, rsp_baseaddr, rsp_addr, rsp_blends, rsp_info}) ); assign req_ready = ~stall_out; @@ -176,6 +174,8 @@ module VX_tex_addr #( `TRACE_ARRAY2D(clamped_lo, 2, NUM_REQS); dpi_trace(", clamped_hi="); `TRACE_ARRAY2D(clamped_hi, 2, NUM_REQS); + dpi_trace(", mip_addr="); + `TRACE_ARRAY1D(mip_addr, NUM_REQS); dpi_trace("\n"); end @@ -192,10 +192,6 @@ module VX_tex_addr #( `TRACE_ARRAY1D(offset_v_lo, NUM_REQS); dpi_trace(", offset_v_hi="); `TRACE_ARRAY1D(offset_v_hi, NUM_REQS); - dpi_trace(", base_addr_lo="); - `TRACE_ARRAY1D(base_addr_lo, NUM_REQS); - dpi_trace(", base_addr_hi="); - `TRACE_ARRAY1D(base_addr_hi, NUM_REQS); dpi_trace("\n"); end diff --git a/hw/rtl/tex_unit/VX_tex_lerp.sv b/hw/rtl/tex_unit/VX_tex_lerp.sv index 6dce57e3..7f35ac38 100644 --- a/hw/rtl/tex_unit/VX_tex_lerp.sv +++ b/hw/rtl/tex_unit/VX_tex_lerp.sv @@ -3,12 +3,11 @@ module VX_tex_lerp ( input wire [3:0][7:0] in1, input wire [3:0][7:0] in2, - input wire [8:0] alpha, - input wire [7:0] beta, + input wire [7:0] frac, output wire [3:0][7:0] out -); +); for (genvar i = 0; i < 4; ++i) begin - wire [16:0] sum = in1[i] * alpha + in2[i] * beta; + wire [16:0] sum = in1[i] * 8'(8'hff - frac) + in2[i] * frac; `UNUSED_VAR (sum) assign out[i] = sum[15:8]; end diff --git a/hw/rtl/tex_unit/VX_tex_mem.sv b/hw/rtl/tex_unit/VX_tex_mem.sv index fc99466e..dd9878a2 100644 --- a/hw/rtl/tex_unit/VX_tex_mem.sv +++ b/hw/rtl/tex_unit/VX_tex_mem.sv @@ -16,6 +16,7 @@ module VX_tex_mem #( input wire [NUM_REQS-1:0] req_tmask, input wire [`TEX_FILTER_BITS-1:0] req_filter, input wire [`TEX_LGSTRIDE_BITS-1:0] req_lgstride, + input wire [NUM_REQS-1:0][31:0] req_baseaddr, input wire [NUM_REQS-1:0][3:0][31:0] req_addr, input wire [REQ_INFOW-1:0] req_info, output wire req_ready, @@ -32,6 +33,14 @@ module VX_tex_mem #( localparam RSP_CTR_W = $clog2(NUM_REQS * 4 + 1); + // full address calculation + wire [NUM_REQS-1:0][3:0][31:0] full_addr; + for (genvar i = 0; i < NUM_REQS; ++i) begin + for (genvar j = 0; j < 4; ++j) begin + assign full_addr[i][j] = req_baseaddr[i] + req_addr[i][j]; + end + end + wire [3:0] dup_reqs; wire [3:0][NUM_REQS-1:0][29:0] req_addr_w; wire [3:0][NUM_REQS-1:0][1:0] align_offs; @@ -40,17 +49,17 @@ module VX_tex_mem #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 4; ++j) begin - assign req_addr_w[j][i] = req_addr[i][j][31:2]; - assign align_offs[j][i] = req_addr[i][j][1:0]; + assign req_addr_w[j][i] = full_addr[i][j][31:2]; + assign align_offs[j][i] = full_addr[i][j][1:0]; end end - // find duplicate addresses + // detect duplicate addresses for (genvar i = 0; i < 4; ++i) begin - wire [NUM_REQS-1:0] addr_matches; - for (genvar j = 0; j < NUM_REQS; j++) begin - assign addr_matches[j] = (req_addr_w[i][0] == req_addr_w[i][j]) || ~req_tmask[j]; + wire [NUM_REQS-2:0] addr_matches; + for (genvar j = 0; j < (NUM_REQS-1); ++j) begin + assign addr_matches[j] = (req_addr_w[i][j+1] == req_addr_w[i][0]) || ~req_tmask[j+1]; end assign dup_reqs[i] = req_tmask[0] && (& addr_matches); end @@ -172,6 +181,8 @@ module VX_tex_mem #( reg [RSP_CTR_W-1:0] rsp_rem_ctr, rsp_rem_ctr_init; wire [RSP_CTR_W-1:0] rsp_rem_ctr_n; wire [NUM_REQS-1:0][1:0] rsp_align_offs; + wire [$clog2(NUM_REQS+1)-1:0] q_req_size; + wire [$clog2(NUM_REQS+1)-1:0] dcache_rsp_size; wire dcache_rsp_fire; wire [1:0] rsp_texel_idx; wire rsp_texel_dup; @@ -218,16 +229,21 @@ module VX_tex_mem #( end end + `POP_COUNT(q_req_size, q_req_tmask); + always @(*) begin - rsp_rem_ctr_init = RSP_CTR_W'($countones(q_dup_reqs[0] ? NUM_REQS'(1) : q_req_tmask)); + rsp_rem_ctr_init = q_dup_reqs[0] ? RSP_CTR_W'(1) : RSP_CTR_W'(q_req_size); if (q_req_filter) begin for (integer i = 1; i < 4; ++i) begin - rsp_rem_ctr_init += RSP_CTR_W'($countones(q_dup_reqs[i] ? NUM_REQS'(1) : q_req_tmask)); + rsp_rem_ctr_init += q_dup_reqs[i] ? RSP_CTR_W'(1) : RSP_CTR_W'(q_req_size); end end end - assign rsp_rem_ctr_n = rsp_rem_ctr - RSP_CTR_W'($countones(dcache_rsp_if.tmask)); + wire [NUM_REQS-1:0] dcache_rsp_tmask = dcache_rsp_if.tmask; + `POP_COUNT(dcache_rsp_size, dcache_rsp_tmask); + + assign rsp_rem_ctr_n = rsp_rem_ctr - RSP_CTR_W'(dcache_rsp_size); always @(posedge clk) begin if (reset) begin @@ -249,7 +265,7 @@ module VX_tex_mem #( wire stall_out = rsp_valid && ~rsp_ready; - wire is_last_rsp = (0 == rsp_rem_ctr_n); + wire is_last_rsp = (rsp_rem_ctr == RSP_CTR_W'(dcache_rsp_size)); wire rsp_texels_done = dcache_rsp_fire && is_last_rsp; @@ -290,8 +306,10 @@ module VX_tex_mem #( dpi_trace("\n"); end if (req_valid && req_ready) begin - dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, addr=", + dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, baseaddr=", $time, CORE_ID, req_wid, req_PC, req_tmask, req_filter, req_lgstride); + `TRACE_ARRAY1D(req_baseaddr, NUM_REQS); + dpi_trace(", addr="); `TRACE_ARRAY2D(req_addr, 4, NUM_REQS); dpi_trace("\n"); end diff --git a/hw/rtl/tex_unit/VX_tex_sampler.sv b/hw/rtl/tex_unit/VX_tex_sampler.sv index 63371337..dffc5cf0 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.sv +++ b/hw/rtl/tex_unit/VX_tex_sampler.sv @@ -27,75 +27,78 @@ module VX_tex_sampler #( `UNUSED_PARAM (CORE_ID) - wire valid_s0; - wire [NUM_REQS-1:0] tmask_s0; - wire [REQ_INFOW-1:0] req_info_s0; + wire valid_s0, valid_s1; + wire [NUM_REQS-1:0] req_tmask_s0, req_tmask_s1; + wire [REQ_INFOW-1:0] req_info_s0, req_info_s1; wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh; - wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0; - wire [NUM_REQS-1:0][`TEX_BLEND_FRAC-1:0] blend_v, blend_v_s0; + wire [NUM_REQS-1:0][31:0] texel_ul_s1, texel_uh_s1; + wire [NUM_REQS-1:0][1:0][`TEX_BLEND_FRAC-1:0] req_blends_s0; + wire [NUM_REQS-1:0][`TEX_BLEND_FRAC-1:0] blend_v, blend_v_s1; wire [NUM_REQS-1:0][31:0] texel_v; + wire [NUM_REQS-1:0][3:0][31:0] fmt_texels, fmt_texels_s0; wire stall_out; for (genvar i = 0; i < NUM_REQS; ++i) begin - - wire [3:0][31:0] fmt_texels; - for (genvar j = 0; j < 4; ++j) begin VX_tex_format #( .CORE_ID (CORE_ID) ) tex_format ( .format (req_format), .texel_in (req_data[i][j]), - .texel_out (fmt_texels[j]) + .texel_out (fmt_texels[i][j]) ); - end - - wire [7:0] beta = req_blends[i][0]; - wire [8:0] alpha = `TEX_BLEND_ONE - beta; - - VX_tex_lerp #( - ) tex_lerp_ul ( - .in1 (fmt_texels[0]), - .in2 (fmt_texels[1]), - .alpha (alpha), - .beta (beta), - .out (texel_ul[i]) - ); - - VX_tex_lerp #( - ) tex_lerp_uh ( - .in1 (fmt_texels[2]), - .in2 (fmt_texels[3]), - .alpha (alpha), - .beta (beta), - .out (texel_uh[i]) - ); - - assign blend_v[i] = req_blends[i][1]; + end end VX_pipe_register #( - .DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `TEX_BLEND_FRAC) + (2 * NUM_REQS * 32)), + .DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * 2 * `TEX_BLEND_FRAC) + (NUM_REQS * 4 * 32)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_tmask, req_info, blend_v, texel_ul, texel_uh}), - .data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) + .data_in ({req_valid, req_tmask, req_info, req_blends, fmt_texels}), + .data_out ({valid_s0, req_tmask_s0, req_info_s0, req_blends_s0, fmt_texels_s0}) + ); + + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_tex_lerp #( + ) tex_lerp_ul ( + .in1 (fmt_texels_s0[i][0]), + .in2 (fmt_texels_s0[i][1]), + .frac (req_blends_s0[i][0]), + .out (texel_ul[i]) + ); + + VX_tex_lerp #( + ) tex_lerp_uh ( + .in1 (fmt_texels_s0[i][2]), + .in2 (fmt_texels_s0[i][3]), + .frac (req_blends_s0[i][0]), + .out (texel_uh[i]) + ); + + assign blend_v[i] = req_blends_s0[i][1]; + end + + VX_pipe_register #( + .DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * `TEX_BLEND_FRAC) + (2 * NUM_REQS * 32)), + .RESETW (1) + ) pipe_reg1 ( + .clk (clk), + .reset (reset), + .enable (~stall_out), + .data_in ({valid_s0, req_tmask_s0, req_info_s0, blend_v, texel_ul, texel_uh}), + .data_out ({valid_s1, req_tmask_s1, req_info_s1, blend_v_s1, texel_ul_s1, texel_uh_s1}) ); for (genvar i = 0; i < NUM_REQS; i++) begin - wire [7:0] beta = blend_v_s0[i]; - wire [8:0] alpha = `TEX_BLEND_ONE - beta; - VX_tex_lerp #( ) tex_lerp_v ( - .in1 (texel_ul_s0[i]), - .in2 (texel_uh_s0[i]), - .alpha (alpha), - .beta (beta), + .in1 (texel_ul_s1[i]), + .in2 (texel_uh_s1[i]), + .frac (blend_v_s1[i]), .out (texel_v[i]) ); end @@ -105,12 +108,12 @@ module VX_tex_sampler #( VX_pipe_register #( .DATAW (1 + NUM_REQS + REQ_INFOW + (NUM_REQS * 32)), .RESETW (1) - ) pipe_reg1 ( + ) pipe_reg2 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({valid_s0, tmask_s0, req_info_s0, texel_v}), - .data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data}) + .data_in ({valid_s1, req_tmask_s1, req_info_s1, texel_v}), + .data_out ({rsp_valid, rsp_tmask, rsp_info, rsp_data}) ); // can accept new request? diff --git a/hw/rtl/tex_unit/VX_tex_unit.sv b/hw/rtl/tex_unit/VX_tex_unit.sv index c9510827..c10cdf64 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.sv +++ b/hw/rtl/tex_unit/VX_tex_unit.sv @@ -23,9 +23,8 @@ module VX_tex_unit #( VX_tex_rsp_if.master tex_rsp_if ); - localparam REQ_INFOW_S = 64 + `NR_BITS + 1 + `NW_BITS + 32; - localparam REQ_INFOW_A = `TEX_FORMAT_BITS + REQ_INFOW_S; - localparam REQ_INFOW_M = (2 * `NUM_THREADS * `TEX_BLEND_FRAC) + REQ_INFOW_A; + localparam REQ_INFO_W = 64 + `NR_BITS + 1 + `NW_BITS + 32; + localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC); reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit; reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0]; @@ -95,15 +94,16 @@ module VX_tex_unit #( // mipmap attributes + wire [`NUM_THREADS-1:0][`TEX_LOD_BITS-1:0] mip_level; wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff; wire [`NUM_THREADS-1:0][1:0][`TEX_LOD_BITS-1:0] sel_logdims; for (genvar i = 0; i < `NUM_THREADS; ++i) begin wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0]; - wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][`TEX_LOD_BITS-1:0]; - assign sel_mipoff[i] = tex_mipoff[unit][mip_level]; - assign sel_logdims[i][0] = (tex_logdims[unit][0] - mip_level); - assign sel_logdims[i][1] = (tex_logdims[unit][1] - mip_level); + assign mip_level[i] = tex_req_if.lod[i][`TEX_LOD_BITS-1:0]; + assign sel_mipoff[i] = tex_mipoff[unit][mip_level[i]]; + assign sel_logdims[i][0] = tex_logdims[unit][0]; + assign sel_logdims[i][1] = tex_logdims[unit][1]; end // address generation @@ -114,12 +114,13 @@ module VX_tex_unit #( wire [`TEX_LGSTRIDE_BITS-1:0] mem_req_lgstride; wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] mem_req_blends; wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; - wire [REQ_INFOW_A-1:0] mem_req_info; + wire [`NUM_THREADS-1:0][31:0] mem_req_baseaddr; + wire [(`TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_req_info; wire mem_req_ready; VX_tex_addr #( .CORE_ID (CORE_ID), - .REQ_INFOW (REQ_INFOW_A), + .REQ_INFOW (`TEX_FORMAT_BITS + REQ_INFO_W), .NUM_REQS (`NUM_THREADS) ) tex_addr ( .clk (clk), @@ -132,6 +133,7 @@ module VX_tex_unit #( .req_filter (tex_filter[tex_req_if.unit]), .req_wraps (tex_wraps[tex_req_if.unit]), .req_baseaddr(tex_baddr[tex_req_if.unit]), + .mip_level (mip_level), .req_mipoff (sel_mipoff), .req_logdims(sel_logdims), .req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}), @@ -141,6 +143,7 @@ module VX_tex_unit #( .rsp_tmask (mem_req_tmask), .rsp_filter (mem_req_filter), .rsp_lgstride(mem_req_lgstride), + .rsp_baseaddr(mem_req_baseaddr), .rsp_addr (mem_req_addr), .rsp_blends (mem_req_blends), .rsp_info (mem_req_info), @@ -152,12 +155,12 @@ module VX_tex_unit #( wire mem_rsp_valid; wire [`NUM_THREADS-1:0] mem_rsp_tmask; wire [`NUM_THREADS-1:0][3:0][31:0] mem_rsp_data; - wire [REQ_INFOW_M-1:0] mem_rsp_info; + wire [(BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W)-1:0] mem_rsp_info; wire mem_rsp_ready; VX_tex_mem #( .CORE_ID (CORE_ID), - .REQ_INFOW (REQ_INFOW_M), + .REQ_INFOW (BLEND_FRAC_W + `TEX_FORMAT_BITS + REQ_INFO_W), .NUM_REQS (`NUM_THREADS) ) tex_mem ( .clk (clk), @@ -172,6 +175,7 @@ module VX_tex_unit #( .req_tmask (mem_req_tmask), .req_filter(mem_req_filter), .req_lgstride(mem_req_lgstride), + .req_baseaddr(mem_req_baseaddr), .req_addr (mem_req_addr), .req_info ({mem_req_blends, mem_req_info}), .req_ready (mem_req_ready), @@ -186,15 +190,9 @@ module VX_tex_unit #( // apply sampler - wire [`NUM_THREADS-1:0][1:0][`TEX_BLEND_FRAC-1:0] rsp_blends; - wire [`TEX_FORMAT_BITS-1:0] rsp_format; - wire [REQ_INFOW_S-1:0] rsp_info; - - assign {rsp_blends, rsp_format, rsp_info} = mem_rsp_info; - VX_tex_sampler #( .CORE_ID (CORE_ID), - .REQ_INFOW (REQ_INFOW_S), + .REQ_INFOW (REQ_INFO_W), .NUM_REQS (`NUM_THREADS) ) tex_sampler ( .clk (clk), @@ -204,9 +202,9 @@ module VX_tex_unit #( .req_valid (mem_rsp_valid), .req_tmask (mem_rsp_tmask), .req_data (mem_rsp_data), - .req_format (rsp_format), - .req_blends (rsp_blends), - .req_info (rsp_info), + .req_blends (mem_rsp_info[(REQ_INFO_W+`TEX_FORMAT_BITS) +: BLEND_FRAC_W]), + .req_format (mem_rsp_info[REQ_INFO_W +: `TEX_FORMAT_BITS]), + .req_info (mem_rsp_info[0 +: REQ_INFO_W]), .req_ready (mem_rsp_ready), // outputs diff --git a/hw/syn/quartus/Makefile b/hw/syn/quartus/Makefile index 662848e1..1dd63335 100644 --- a/hw/syn/quartus/Makefile +++ b/hw/syn/quartus/Makefile @@ -1,6 +1,6 @@ BUILD_DIR ?= build -.PHONY: dogfood unittest pipeline smem cache fpu_core core vortex top1 top2 top4 top8 top16 top32 top64 +.PHONY: dogfood unittest pipeline smem cache fpu_core core vortex top1 top2 top4 top8 top16 top32 top64 texunit dogfood: mkdir -p dogfood/$(BUILD_DIR) @@ -75,4 +75,9 @@ top32: top64: mkdir -p top64/$(BUILD_DIR) cp top64/Makefile top64/$(BUILD_DIR) - $(MAKE) -C top64/$(BUILD_DIR) clean && $(MAKE) -C top64/$(BUILD_DIR) > top64/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file + $(MAKE) -C top64/$(BUILD_DIR) clean && $(MAKE) -C top64/$(BUILD_DIR) > top64/$(BUILD_DIR)/build.log 2>&1 & + +texunit: + mkdir -p texunit/$(BUILD_DIR) + cp texunit/Makefile texunit/$(BUILD_DIR) + $(MAKE) -C texunit/$(BUILD_DIR) clean && $(MAKE) -C texunit/$(BUILD_DIR) > texunit/$(BUILD_DIR)/build.log 2>&1 & \ No newline at end of file diff --git a/hw/syn/quartus/texunit/Makefile b/hw/syn/quartus/texunit/Makefile new file mode 100644 index 00000000..3ecfa892 --- /dev/null +++ b/hw/syn/quartus/texunit/Makefile @@ -0,0 +1,81 @@ +PROJECT = Core +TOP_LEVEL_ENTITY = VX_core +SRC_FILE = VX_core.v +RTL_DIR = ../../../../rtl +THIRD_PARTY_DIR = ../../../../../third_party + +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10 + +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(THIRD_PARTY_DIR)/fpnew/src;$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include;$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src +TEX_INCLUDE = $(RTL_DIR)/tex_unit +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE);$(TEX_INCLUDE) + +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf + +# Executable Configuration +SYN_ARGS = --parallel --read_settings_files=on +FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on +ASM_ARGS = +STA_ARGS = --parallel --do_report_timing + +# Build targets +all: $(PROJECT).sta.rpt + +syn: $(PROJECT).syn.rpt + +fit: $(PROJECT).fit.rpt + +asm: $(PROJECT).asm.rpt + +sta: $(PROJECT).sta.rpt + +smart: smart.log + +# Target implementations +STAMP = echo done > + +$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) + quartus_syn $(PROJECT) $(SYN_ARGS) + $(STAMP) fit.chg + +$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt + quartus_fit $(PROJECT) $(FIT_ARGS) + $(STAMP) asm.chg + $(STAMP) sta.chg + +$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt + quartus_asm $(PROJECT) $(ASM_ARGS) + +$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt + quartus_sta $(PROJECT) $(STA_ARGS) + +smart.log: $(PROJECT_FILES) + quartus_sh --determine_smart_action $(PROJECT) > smart.log + +# Project initialization +$(PROJECT_FILES): + quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "EXT_TEX_ENABLE=1" + +syn.chg: + $(STAMP) syn.chg + +fit.chg: + $(STAMP) fit.chg + +sta.chg: + $(STAMP) sta.chg + +asm.chg: + $(STAMP) asm.chg + +program: $(PROJECT).sof + quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" + +clean: + rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox