diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 540cb03f..f215a822 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -120,9 +120,11 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; + wire dcache_req_fire_any = (| dcache_req_fire); + wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready; - wire mbuf_push = (| dcache_req_fire) + wire mbuf_push = dcache_req_fire_any && is_req_start // first submission only && req_wb; // loads only @@ -333,7 +335,7 @@ module VX_lsu_unit #( if (lsu_req_if.valid && fence_wait) begin $display("%t: *** D$%0d fence wait", $time, CORE_ID); end - if ((| dcache_req_fire)) begin + if (dcache_req_fire_any) begin if (dcache_req_if.rw[0]) begin $write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); `PRINT_ARRAY1D(req_addr, `NUM_THREADS); diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index 149af193..571b1974 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -40,12 +40,12 @@ module VX_tex_addr #( wire [NUM_REQS-1:0] tmask_s0; wire [`TEX_FILTER_BITS-1:0] filter_s0; wire [REQ_INFO_WIDTH-1:0] req_info_s0; - - wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_lo, clamped_lo_s0; - wire [1:0][NUM_REQS-1:0][`FIXED_FRAC-1:0] clamped_hi, clamped_hi_s0; + wire [1:0][NUM_REQS-1:0][31:0] coord_lo, coord_lo_s0; + wire [1:0][NUM_REQS-1:0][31:0] coord_hi, coord_hi_s0; wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0; wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_dims_s0; + wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps_s0; wire stall_out; @@ -62,39 +62,21 @@ module VX_tex_addr #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 2; ++j) begin - wire [31:0] coord_lo, coord_hi; - - assign coord_lo = req_coords[j][i] - (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0); - assign coord_hi = req_coords[j][i] + (req_filter ? (`FIXED_HALF >> req_logdims[j][i]) : 0); - - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_lo ( - .wrap_i (req_wraps[j]), - .coord_i (coord_lo), - .coord_o (clamped_lo[j][i]) - ); - - VX_tex_wrap #( - .CORE_ID (CORE_ID) - ) tex_wrap_hi ( - .wrap_i (req_wraps[j]), - .coord_i (coord_hi), - .coord_o (clamped_hi[j][i]) - ); + assign coord_lo[j][i] = req_filter ? (req_coords[j][i] - (`FIXED_HALF >> req_logdims[j][i])) : req_coords[j][i]; + assign coord_hi[j][i] = req_filter ? (req_coords[j][i] + (`FIXED_HALF >> req_logdims[j][i])) : req_coords[j][i]; end assign mip_addr[i] = req_baseaddr + 32'(req_mipoffset[i]); end VX_pipe_register #( - .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * `FIXED_FRAC)), + .DATAW (1 + NUM_REQS + `TEX_FILTER_BITS + `TEX_STRIDE_BITS + REQ_INFO_WIDTH + 2 * `TEX_WRAP_BITS + NUM_REQS * (2 * `TEX_DIM_BITS + 32 + 2 * 2 * 32)), .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_logdims, mip_addr, clamped_lo, clamped_hi}), - .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, log_dims_s0, mip_addr_s0, clamped_lo_s0, clamped_hi_s0}) + .data_in ({req_valid, req_tmask, req_filter, log_stride, req_info, req_wraps, req_logdims, mip_addr, coord_lo, coord_hi}), + .data_out ({valid_s0, tmask_s0, filter_s0, log_stride_s0, req_info_s0, req_wraps_s0, log_dims_s0, mip_addr_s0, coord_lo_s0, coord_hi_s0}) ); // addresses generation @@ -105,9 +87,28 @@ module VX_tex_addr #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 2; ++j) begin - assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i])); - assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi_s0[j][i] >> ((`FIXED_FRAC) - log_dims_s0[j][i])); - assign blends[j][i] = filter_s0 ? clamped_lo_s0[j][i][`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); + wire [`FIXED_FRAC-1:0] clamped_lo; + wire [`FIXED_FRAC-1:0] clamped_hi; + + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_lo ( + .wrap_i (req_wraps_s0[j]), + .coord_i (coord_lo_s0[j][i]), + .coord_o (clamped_lo) + ); + + VX_tex_wrap #( + .CORE_ID (CORE_ID) + ) tex_wrap_hi ( + .wrap_i (req_wraps_s0[j]), + .coord_i (coord_hi_s0[j][i]), + .coord_o (clamped_hi) + ); + + assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo >> ((`FIXED_FRAC) - log_dims_s0[j][i])); + assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi >> ((`FIXED_FRAC) - log_dims_s0[j][i])); + assign blends[j][i] = filter_s0 ? clamped_lo[`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); end end diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index 8c307e03..9a53050d 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -125,10 +125,10 @@ module VX_tex_memory #( // DCache Request reg [NUM_REQS-1:0] texel_sent_mask; - wire [NUM_REQS-1:0] dcache_req_fire; - wire [NUM_REQS-1:0] req_dup_mask; - assign dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; + wire [NUM_REQS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; + + wire dcache_req_fire_any = (| dcache_req_fire); assign sent_all_ready = (&(dcache_req_if.ready | texel_sent_mask | ~q_req_tmask)) || (req_texel_dup & dcache_req_if.ready[0]); @@ -141,7 +141,7 @@ module VX_tex_memory #( end end - assign req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1}; + wire [NUM_REQS-1:0] req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1}; assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask; assign dcache_req_if.rw = {NUM_REQS{1'b0}}; @@ -163,9 +163,8 @@ module VX_tex_memory #( reg [3:0][NUM_REQS-1:0][31:0] rsp_texels, rsp_texels_n; wire [NUM_REQS-1:0][3:0][31:0] rsp_texels_qual; reg [NUM_REQS-1:0][31:0] rsp_data_qual; - reg [RSP_CTR_W-1:0] rsp_rem_ctr; - wire [NUM_REQS-1:0] rsp_cur_tmask; - wire [$clog2(NUM_REQS + 1)-1:0] rsp_cur_cnt; + reg [RSP_CTR_W-1:0] rsp_rem_ctr, rsp_rem_ctr_init; + wire [RSP_CTR_W-1:0] rsp_rem_ctr_n; wire dcache_rsp_fire; wire [1:0] rsp_texel_idx; wire rsp_texel_dup; @@ -176,10 +175,6 @@ module VX_tex_memory #( assign dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready; - assign rsp_cur_tmask = rsp_texel_dup ? q_req_tmask : dcache_rsp_if.tmask; - - assign rsp_cur_cnt = $countones(rsp_cur_tmask); - for (genvar i = 0; i < NUM_REQS; i++) begin wire [31:0] src_mask = {32{dcache_rsp_if.tmask[i]}}; wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : (dcache_rsp_if.data[i]) & src_mask); @@ -213,14 +208,25 @@ module VX_tex_memory #( end end + always @(*) begin + rsp_rem_ctr_init = RSP_CTR_W'($countones(q_dup_reqs[0] ? NUM_REQS'(1) : q_req_tmask)); + if (q_req_filter) begin + for (integer i = 1; i < 4; ++i) begin + rsp_rem_ctr_init += RSP_CTR_W'($countones(q_dup_reqs[i] ? NUM_REQS'(1) : q_req_tmask)); + end + end + end + + assign rsp_rem_ctr_n = rsp_rem_ctr - RSP_CTR_W'($countones(dcache_rsp_if.tmask)); + always @(posedge clk) begin if (reset) begin rsp_rem_ctr <= 0; end else begin - if ((| dcache_req_fire) && 0 == rsp_rem_ctr) begin - rsp_rem_ctr <= q_req_filter ? {$countones(q_req_tmask), 2'b0} : {2'b0, $countones(q_req_tmask)}; + if (dcache_req_fire_any && 0 == rsp_rem_ctr) begin + rsp_rem_ctr <= rsp_rem_ctr_init; end else if (dcache_rsp_fire) begin - rsp_rem_ctr <= rsp_rem_ctr - RSP_CTR_W'(rsp_cur_cnt); + rsp_rem_ctr <= rsp_rem_ctr_n; end end end @@ -233,7 +239,9 @@ module VX_tex_memory #( wire stall_out = rsp_valid && ~rsp_ready; - wire rsp_texels_done = dcache_rsp_fire && (rsp_rem_ctr == RSP_CTR_W'(rsp_cur_cnt)); + wire is_last_rsp = (0 == rsp_rem_ctr_n); + + wire rsp_texels_done = dcache_rsp_fire && is_last_rsp; assign reqq_pop = rsp_texels_done && ~stall_out; @@ -249,7 +257,7 @@ module VX_tex_memory #( ); // Can accept new cache response? - assign dcache_rsp_if.ready = ~stall_out || (rsp_rem_ctr != RSP_CTR_W'(rsp_cur_cnt)); + assign dcache_rsp_if.ready = ~(is_last_rsp && stall_out); `ifdef DBG_PRINT_TEX wire [`NW_BITS-1:0] req_wid, rsp_wid; @@ -258,7 +266,7 @@ module VX_tex_memory #( assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; always @(posedge clk) begin - if ((| dcache_req_fire)) begin + if (dcache_req_fire_any) begin $write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=", $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag); `PRINT_ARRAY1D(req_texel_addr, NUM_REQS); diff --git a/tests/regression/tex/output.tga b/tests/regression/tex/output.tga deleted file mode 100644 index 31dcb0d3..00000000 Binary files a/tests/regression/tex/output.tga and /dev/null differ diff --git a/tests/regression/tex/rainbow.tga b/tests/regression/tex/rainbow.tga new file mode 100644 index 00000000..0250201d Binary files /dev/null and b/tests/regression/tex/rainbow.tga differ