diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 95e57cd4..a5a77de5 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -303,10 +303,11 @@ `ifdef EXT_TEX_ENABLE `define LSU_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) `define TEX_TAG_ID_BITS (2) -`define DCORE_TAG_ID_BITS (`MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS) + 1) +`define LSU_TEX_TAG_ID_BITS `MAX(`LSU_TAG_ID_BITS, `TEX_TAG_ID_BITS) +`define DCORE_TAG_ID_BITS (`LSU_TEX_TAG_ID_BITS + 1) `define LSU_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TAG_ID_BITS) `define TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `TEX_TAG_ID_BITS) -`define LSU_TEX_DCACHE_TAG_BITS `MAX(`LSU_DCACHE_TAG_BITS, `TEX_DCACHE_TAG_BITS) +`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_MDATAW + `LSU_TEX_TAG_ID_BITS) `else `define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `NC_ADDR_BITS + `SM_ENABLE) `endif diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 5e576a31..8334d537 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -69,20 +69,27 @@ module VX_execute #( VX_tex_csr_if tex_csr_if(); - wire [`NUM_THREADS-1:0][`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_in; - wire [`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_out; + wire [`NUM_THREADS-1:0][`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_in, lsu_tag_in; + wire [`LSU_TEX_DCACHE_TAG_BITS-1:0] tex_tag_out, lsu_tag_out; + + `UNUSED_VAR (tex_tag_out) + `UNUSED_VAR (lsu_tag_out) for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign tex_tag_in[i][`LSUQ_ADDR_BITS-1:0] = `LSUQ_ADDR_BITS'(tex_dcache_req_if.tag[i][1:0]); + assign tex_tag_in[i][`LSU_TEX_TAG_ID_BITS-1:0] = `LSU_TEX_TAG_ID_BITS'(tex_dcache_req_if.tag[i][`TEX_TAG_ID_BITS-1:0]); + assign lsu_tag_in[i][`LSU_TEX_TAG_ID_BITS-1:0] = `LSU_TEX_TAG_ID_BITS'(lsu_dcache_req_if.tag[i][`LSU_TAG_ID_BITS-1:0]); `ifdef DBG_CACHE_REQ_INFO - assign tex_tag_in[i][`LSUQ_ADDR_BITS+:`DBG_CACHE_REQ_MDATAW] = tex_dcache_req_if.tag[i][2+:`DBG_CACHE_REQ_MDATAW]; + assign tex_tag_in[i][`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS] = tex_dcache_req_if.tag[i][`TEX_DCACHE_TAG_BITS-1:`TEX_TAG_ID_BITS]; + assign lsu_tag_in[i][`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS] = lsu_dcache_req_if.tag[i][`LSU_DCACHE_TAG_BITS-1:`LSU_TAG_ID_BITS]; `endif end - assign tex_dcache_rsp_if.tag[1:0] = tex_tag_out[1:0]; + + assign tex_dcache_rsp_if.tag[`TEX_TAG_ID_BITS-1:0] = tex_tag_out[`TEX_TAG_ID_BITS-1:0]; + assign lsu_dcache_rsp_if.tag[`LSU_TAG_ID_BITS-1:0] = lsu_tag_out[`LSU_TAG_ID_BITS-1:0]; `ifdef DBG_CACHE_REQ_INFO - assign tex_dcache_rsp_if.tag[2+:`DBG_CACHE_REQ_MDATAW] = tex_tag_out[`LSUQ_ADDR_BITS+:`DBG_CACHE_REQ_MDATAW]; + assign tex_dcache_rsp_if.tag[`TEX_DCACHE_TAG_BITS-1:`TEX_TAG_ID_BITS] = tex_tag_out[`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS]; + assign lsu_dcache_rsp_if.tag[`LSU_DCACHE_TAG_BITS-1:`LSU_TAG_ID_BITS] = lsu_tag_out[`LSU_TEX_DCACHE_TAG_BITS-1:`LSU_TEX_TAG_ID_BITS]; `endif - `UNUSED_VAR (tex_tag_out) VX_cache_arb #( .NUM_REQS (2), @@ -100,7 +107,7 @@ module VX_execute #( .req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}), .req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}), .req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}), - .req_tag_in ({tex_tag_in, lsu_dcache_req_if.tag}), + .req_tag_in ({tex_tag_in, lsu_tag_in}), .req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}), // Dcache request @@ -123,7 +130,7 @@ module VX_execute #( .rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}), .rsp_tmask_out ({tex_dcache_rsp_if.tmask, lsu_dcache_rsp_if.tmask}), .rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}), - .rsp_tag_out ({tex_tag_out, lsu_dcache_rsp_if.tag}), + .rsp_tag_out ({tex_tag_out, lsu_tag_out}), .rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}) ); diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 4e099d51..537d759a 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -55,7 +55,7 @@ module VX_icache_stage #( assign ifetch_req_if.ready = icache_req_if.ready; `ifdef DBG_CACHE_REQ_INFO - assign icache_req_if.tag = {ifetch_req_if.PC, ifetch_req_if.wid, req_tag}; + assign icache_req_if.tag = {ifetch_req_if.wid, ifetch_req_if.PC, req_tag}; `else assign icache_req_if.tag = req_tag; `endif diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index f215a822..3b985627 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -230,7 +230,7 @@ module VX_lsu_unit #( assign dcache_req_if.data[i] = mem_req_data; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]}; + assign dcache_req_if.tag[i] = {req_wid, req_pc, req_tag, req_addr_type[i]}; `else assign dcache_req_if.tag[i] = {req_tag, req_addr_type[i]}; `endif @@ -353,8 +353,8 @@ module VX_lsu_unit #( end end if (dcache_rsp_fire) begin - $write("%t: D$%0d Rsp: tmask=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=", - $time, CORE_ID, dcache_rsp_if.tmask, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd); + $write("%t: D$%0d Rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=", + $time, CORE_ID, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd); `PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS); $write(", is_dup=%b\n", rsp_is_dup); end diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 4fc2f0a1..e149340c 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -200,9 +200,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_sel, debug_wid_sel} = mshr_enable ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_sel, debug_pc_sel} = mshr_enable ? mshr_tag[`CACHE_REQ_INFO_RNG] : creq_tag[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_sel, debug_wid_sel} = 0; + assign {debug_wid_sel, debug_pc_sel} = 0; end `endif @@ -253,9 +253,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0, debug_wid_st0} = tag_st0[`CACHE_REQ_INFO_RNG]; + assign {debug_wid_st0, debug_pc_st0} = tag_st0[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st0, debug_wid_st0} = 0; + assign {debug_wid_st0, debug_pc_st0} = 0; end `endif @@ -322,9 +322,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st1, debug_wid_st1} = tag_st1[`CACHE_REQ_INFO_RNG]; + assign {debug_wid_st1, debug_pc_st1} = tag_st1[`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st1, debug_wid_st1} = 0; + assign {debug_wid_st1, debug_pc_st1} = 0; end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 5a4c609c..985ad25f 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -91,6 +91,9 @@ module VX_cache #( `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) `STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value")) + localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE; + localparam CORE_TAG_ID_X_BITS = CORE_TAG_ID_BITS - NC_ENABLE; + `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank; @@ -106,14 +109,14 @@ module VX_cache #( wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_nc; wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_nc; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_nc; - wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_nc; + wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_nc; wire [NUM_REQS-1:0] core_req_ready_nc; // Core response wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_nc; wire [NUM_REQS-1:0] core_rsp_tmask_nc; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_nc; - wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_nc; + wire [`CORE_RSP_TAGS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_nc; wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_nc; // Memory request @@ -133,17 +136,17 @@ module VX_cache #( if (NC_ENABLE) begin VX_nc_bypass #( - .NUM_REQS (NUM_REQS), - .NUM_RSP_TAGS (`CORE_RSP_TAGS), - .NC_TAG_BIT (0), + .NUM_REQS (NUM_REQS), + .NUM_RSP_TAGS (`CORE_RSP_TAGS), + .NC_TAG_BIT (0), - .CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH), - .CORE_DATA_SIZE (WORD_SIZE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_ADDR_WIDTH (`WORD_ADDR_WIDTH), + .CORE_DATA_SIZE (WORD_SIZE), + .CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH), - .MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH), - .MEM_DATA_SIZE (CACHE_LINE_SIZE), - .MEM_TAG_WIDTH (MEM_TAG_WIDTH) + .MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH), + .MEM_DATA_SIZE (CACHE_LINE_SIZE), + .MEM_TAG_WIDTH (MEM_TAG_WIDTH) ) nc_bypass ( .clk (clk), .reset (reset), @@ -246,7 +249,7 @@ module VX_cache #( wire mrsq_out_valid, mrsq_out_ready; - // trim out shared memory and non-cacheable flags + // trim out non-cacheable flags assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH]; VX_elastic_buffer #( @@ -292,14 +295,14 @@ module VX_cache #( wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_rw; wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; - wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; wire [NUM_BANKS-1:0] per_bank_mem_req_valid; @@ -325,7 +328,7 @@ module VX_cache #( .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) ) core_req_bank_sel ( .clk (clk), @@ -363,14 +366,14 @@ module VX_cache #( wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_rw; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr; - wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; + wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag; wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask; wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid; - wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; + wire [CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag; wire curr_bank_core_rsp_ready; wire curr_bank_mem_req_valid; @@ -442,8 +445,8 @@ module VX_cache #( .MSHR_SIZE (MSHR_SIZE), .MREQ_SIZE (MREQ_SIZE), .WRITE_ENABLE (WRITE_ENABLE), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS), .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) ) bank ( `SCOPE_BIND_VX_cache_bank(i) @@ -504,8 +507,8 @@ module VX_cache #( .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), - .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) + .CORE_TAG_WIDTH (CORE_TAG_X_WIDTH), + .CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS) ) core_rsp_merge ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_define.vh b/hw/rtl/cache/VX_cache_define.vh index 5299f387..1e3af2f9 100644 --- a/hw/rtl/cache/VX_cache_define.vh +++ b/hw/rtl/cache/VX_cache_define.vh @@ -51,7 +51,7 @@ `define LINE_TAG_ADDR(x) x[`LINE_ADDR_WIDTH-1 : `LINE_SELECT_BITS] -`define CACHE_REQ_INFO_RNG CORE_TAG_WIDTH-1:(CORE_TAG_WIDTH-`NW_BITS-32) +`define CACHE_REQ_INFO_RNG CORE_TAG_WIDTH-1 : (CORE_TAG_WIDTH-`DBG_CACHE_REQ_MDATAW) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index aad20de7..f5efdca5 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -7,14 +7,15 @@ module VX_nc_bypass #( parameter CORE_ADDR_WIDTH = 1, parameter CORE_DATA_SIZE = 1, - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_IN_WIDTH = 1, parameter MEM_ADDR_WIDTH = 1, parameter MEM_DATA_SIZE = 1, parameter MEM_TAG_WIDTH = 1, - parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, - parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8 + localparam CORE_DATA_WIDTH = CORE_DATA_SIZE * 8, + localparam MEM_DATA_WIDTH = MEM_DATA_SIZE * 8, + localparam CORE_TAG_OUT_WIDTH = CORE_TAG_IN_WIDTH - 1 ) ( input wire clk, input wire reset, @@ -25,7 +26,7 @@ module VX_nc_bypass #( input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in, input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in, - input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_in, + input wire [NUM_REQS-1:0][CORE_TAG_IN_WIDTH-1:0] core_req_tag_in, output wire [NUM_REQS-1:0] core_req_ready_in, // Core request out @@ -34,21 +35,21 @@ module VX_nc_bypass #( output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out, output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out, - output wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out, + output wire [NUM_REQS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_req_tag_out, input wire [NUM_REQS-1:0] core_req_ready_out, // Core response in input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in, input wire [NUM_REQS-1:0] core_rsp_tmask_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in, - input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in, + input wire [NUM_RSP_TAGS-1:0][CORE_TAG_OUT_WIDTH-1:0] core_rsp_tag_in, output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in, // Core response out output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out, output wire [NUM_REQS-1:0] core_rsp_tmask_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out, - output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out, + output wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out, input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out, // Memory request in @@ -87,6 +88,7 @@ module VX_nc_bypass #( `UNUSED_VAR (reset) localparam CORE_REQ_TIDW = $clog2(NUM_REQS); + localparam MUX_DATAW = CORE_TAG_IN_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1; localparam CORE_LDATAW = $clog2(CORE_DATA_WIDTH); localparam MEM_LDATAW = $clog2(MEM_DATA_WIDTH); @@ -121,7 +123,17 @@ module VX_nc_bypass #( assign core_req_addr_out = core_req_addr_in; assign core_req_byteen_out = core_req_byteen_in; assign core_req_data_out = core_req_data_in; - assign core_req_tag_out = core_req_tag_in; + + for (genvar i = 0; i < NUM_REQS; ++i) begin + VX_bits_remove #( + .N (CORE_TAG_IN_WIDTH), + .S (1), + .POS (NC_TAG_BIT) + ) bits_remove ( + .data_in (core_req_tag_in[i]), + .data_out (core_req_tag_out[i]) + ); + end if (NUM_REQS > 1) begin for (genvar i = 0; i < NUM_REQS; ++i) begin @@ -140,19 +152,19 @@ module VX_nc_bypass #( if (NUM_REQS > 1) begin - wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel; + wire [CORE_TAG_IN_WIDTH-1:0] core_req_tag_in_sel; wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel; wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel; wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel; wire core_req_rw_in_sel; - wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in; + wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]}; end VX_onehot_mux #( - .DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1), + .DATAW (MUX_DATAW), .N (NUM_REQS) ) core_req_nc_mux ( .data_in (core_req_nc_mux_in), @@ -209,10 +221,24 @@ module VX_nc_bypass #( // core response handling + wire [NUM_RSP_TAGS-1:0][CORE_TAG_IN_WIDTH-1:0] core_rsp_tag_out_unqual; + wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; + for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin + VX_bits_insert #( + .N (CORE_TAG_OUT_WIDTH), + .S (1), + .POS (NC_TAG_BIT) + ) bits_remove ( + .data_in (core_rsp_tag_in[i]), + .sel_in ('0), + .data_out (core_rsp_tag_out_unqual[i]) + ); + end + if (NUM_RSP_TAGS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; reg [NUM_REQS-1:0] rsp_nc_valid_r; always @(*) begin rsp_nc_valid_r = 0; @@ -224,7 +250,7 @@ module VX_nc_bypass #( assign core_rsp_ready_in = core_rsp_ready_out; if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D]; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_data_out[i] = core_rsp_valid_in[i] ? core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; @@ -236,15 +262,15 @@ module VX_nc_bypass #( end for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_in[i] : mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; - end + assign core_rsp_tag_out[i] = core_rsp_valid_in[i] ? core_rsp_tag_out_unqual[i] : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0]; + end end else begin assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc; - assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_in : mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; + assign core_rsp_tag_out = core_rsp_valid_in ? core_rsp_tag_out_unqual : mem_rsp_tag_in[CORE_TAG_IN_WIDTH-1:0]; assign core_rsp_ready_in = core_rsp_ready_out; if (NUM_REQS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; reg [NUM_REQS-1:0] core_rsp_tmask_in_r; always @(*) begin core_rsp_tmask_in_r = 0; @@ -256,7 +282,7 @@ module VX_nc_bypass #( end if (D != 0) begin - wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D]; + wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_IN_WIDTH +: D]; for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_rsp_data_out[i] = core_rsp_valid_in ? core_rsp_data_in[i] : mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH]; @@ -275,7 +301,7 @@ module VX_nc_bypass #( assign mem_rsp_tag_out = mem_rsp_tag_in; if (NUM_RSP_TAGS > 1) begin - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_IN_WIDTH + D) +: CORE_REQ_TIDW]; assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in[rsp_tid] && core_rsp_ready_out[rsp_tid]) : mem_rsp_ready_out; end else begin assign mem_rsp_ready_in = is_mem_rsp_nc ? (~core_rsp_valid_in && core_rsp_ready_out) : mem_rsp_ready_out; diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index e5c9c9f9..71aaaf38 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -264,11 +264,11 @@ module VX_shared_mem #( for (genvar i = 0; i < NUM_BANKS; ++i) begin if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; - assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + assign {debug_wid_st0[i], debug_pc_st0[i]} = per_bank_core_req_tag_unqual[i][`CACHE_REQ_INFO_RNG]; + assign {debug_wid_st1[i], debug_pc_st1[i]} = per_bank_core_req_tag[i][`CACHE_REQ_INFO_RNG]; end else begin - assign {debug_pc_st0[i], debug_wid_st0[i]} = 0; - assign {debug_pc_st1[i], debug_wid_st1[i]} = 0; + assign {debug_wid_st0[i], debug_pc_st0[i]} = 0; + assign {debug_wid_st1[i], debug_pc_st1[i]} = 0; end end `endif diff --git a/hw/rtl/tex_unit/VX_tex_addr.v b/hw/rtl/tex_unit/VX_tex_addr.v index 571b1974..ae939673 100644 --- a/hw/rtl/tex_unit/VX_tex_addr.v +++ b/hw/rtl/tex_unit/VX_tex_addr.v @@ -5,8 +5,8 @@ module VX_tex_addr #( parameter REQ_INFO_WIDTH = 1, parameter NUM_REQS = 1 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs @@ -17,8 +17,8 @@ module VX_tex_addr #( input wire [`TEX_FILTER_BITS-1:0] req_filter, input wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps, input wire [`TEX_ADDR_BITS-1:0] req_baseaddr, - input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoffset, - input wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] req_logdims, + input wire [NUM_REQS-1:0][`TEX_MIPOFF_BITS-1:0] req_mipoff, + input wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] req_logdims, input wire [REQ_INFO_WIDTH-1:0] req_info, output wire req_ready, @@ -29,7 +29,7 @@ module VX_tex_addr #( output wire [`TEX_FILTER_BITS-1:0] rsp_filter, output wire [`TEX_STRIDE_BITS-1:0] rsp_stride, output wire [NUM_REQS-1:0][3:0][31:0] rsp_addr, - output wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] rsp_blends, + output wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] rsp_blends, output wire [REQ_INFO_WIDTH-1:0] rsp_info, input wire rsp_ready ); @@ -40,11 +40,11 @@ module VX_tex_addr #( wire [NUM_REQS-1:0] tmask_s0; wire [`TEX_FILTER_BITS-1:0] filter_s0; wire [REQ_INFO_WIDTH-1:0] req_info_s0; - wire [1:0][NUM_REQS-1:0][31:0] coord_lo, coord_lo_s0; - wire [1:0][NUM_REQS-1:0][31:0] coord_hi, coord_hi_s0; + wire [NUM_REQS-1:0][1:0][31:0] coord_lo, coord_lo_s0; + wire [NUM_REQS-1:0][1:0][31:0] coord_hi, coord_hi_s0; wire [`TEX_STRIDE_BITS-1:0] log_stride, log_stride_s0; wire [NUM_REQS-1:0][31:0] mip_addr, mip_addr_s0; - wire [1:0][NUM_REQS-1:0][`TEX_DIM_BITS-1:0] log_dims_s0; + wire [NUM_REQS-1:0][1:0][`TEX_DIM_BITS-1:0] log_dims_s0; wire [1:0][`TEX_WRAP_BITS-1:0] req_wraps_s0; wire stall_out; @@ -62,10 +62,10 @@ module VX_tex_addr #( for (genvar i = 0; i < NUM_REQS; ++i) begin for (genvar j = 0; j < 2; ++j) begin - assign coord_lo[j][i] = req_filter ? (req_coords[j][i] - (`FIXED_HALF >> req_logdims[j][i])) : req_coords[j][i]; - assign coord_hi[j][i] = req_filter ? (req_coords[j][i] + (`FIXED_HALF >> req_logdims[j][i])) : req_coords[j][i]; + assign coord_lo[i][j] = req_filter ? (req_coords[j][i] - (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i]; + assign coord_hi[i][j] = req_filter ? (req_coords[j][i] + (`FIXED_HALF >> req_logdims[i][j])) : req_coords[j][i]; end - assign mip_addr[i] = req_baseaddr + 32'(req_mipoffset[i]); + assign mip_addr[i] = req_baseaddr + 32'(req_mipoff[i]); end VX_pipe_register #( @@ -81,8 +81,9 @@ module VX_tex_addr #( // addresses generation - wire [1:0][NUM_REQS-1:0][`FIXED_INT-1:0] scaled_lo, scaled_hi; - wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] blends; + wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_lo; + wire [NUM_REQS-1:0][1:0][`FIXED_INT-1:0] scaled_hi; + wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] blends; wire [NUM_REQS-1:0][3:0][31:0] addr; for (genvar i = 0; i < NUM_REQS; ++i) begin @@ -94,7 +95,7 @@ module VX_tex_addr #( .CORE_ID (CORE_ID) ) tex_wrap_lo ( .wrap_i (req_wraps_s0[j]), - .coord_i (coord_lo_s0[j][i]), + .coord_i (coord_lo_s0[i][j]), .coord_o (clamped_lo) ); @@ -102,21 +103,21 @@ module VX_tex_addr #( .CORE_ID (CORE_ID) ) tex_wrap_hi ( .wrap_i (req_wraps_s0[j]), - .coord_i (coord_hi_s0[j][i]), + .coord_i (coord_hi_s0[i][j]), .coord_o (clamped_hi) ); - assign scaled_lo[j][i] = `FIXED_INT'(clamped_lo >> ((`FIXED_FRAC) - log_dims_s0[j][i])); - assign scaled_hi[j][i] = `FIXED_INT'(clamped_hi >> ((`FIXED_FRAC) - log_dims_s0[j][i])); - assign blends[j][i] = filter_s0 ? clamped_lo[`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); + assign scaled_lo[i][j] = `FIXED_INT'(clamped_lo >> ((`FIXED_FRAC) - log_dims_s0[i][j])); + assign scaled_hi[i][j] = `FIXED_INT'(clamped_hi >> ((`FIXED_FRAC) - log_dims_s0[i][j])); + assign blends[i][j] = filter_s0 ? clamped_lo[`BLEND_FRAC-1:0] : `BLEND_FRAC'(0); end end for (genvar i = 0; i < NUM_REQS; ++i) begin - assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0; - assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_lo[1][i]) << log_dims_s0[0][i])) << log_stride_s0; - assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0; - assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[0][i]) + (32'(scaled_hi[1][i]) << log_dims_s0[0][i])) << log_stride_s0; + assign addr[i][0] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0; + assign addr[i][1] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_lo[i][1]) << log_dims_s0[i][0])) << log_stride_s0; + assign addr[i][2] = mip_addr_s0[i] + (32'(scaled_lo[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0; + assign addr[i][3] = mip_addr_s0[i] + (32'(scaled_hi[i][0]) + (32'(scaled_hi[i][1]) << log_dims_s0[i][0])) << log_stride_s0; end assign stall_out = rsp_valid && ~rsp_ready; diff --git a/hw/rtl/tex_unit/VX_tex_memory.v b/hw/rtl/tex_unit/VX_tex_memory.v index 9a53050d..e090a851 100644 --- a/hw/rtl/tex_unit/VX_tex_memory.v +++ b/hw/rtl/tex_unit/VX_tex_memory.v @@ -150,10 +150,7 @@ module VX_tex_memory #( assign dcache_req_if.data = 'x; `ifdef DBG_CACHE_REQ_INFO - wire [`NW_BITS-1:0] q_req_wid; - wire [31:0] q_req_PC; - assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0]; - assign dcache_req_if.tag = {NUM_REQS{q_req_PC, q_req_wid, req_texel_idx}}; + assign dcache_req_if.tag = {NUM_REQS{q_req_info[`DBG_CACHE_REQ_MDATAW-1:0], req_texel_idx}}; `else assign dcache_req_if.tag = {NUM_REQS{req_texel_idx}}; `endif @@ -177,7 +174,7 @@ module VX_tex_memory #( for (genvar i = 0; i < NUM_REQS; i++) begin wire [31:0] src_mask = {32{dcache_rsp_if.tmask[i]}}; - wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : (dcache_rsp_if.data[i]) & src_mask); + wire [31:0] src_data = ((i == 0 || rsp_texel_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]) & src_mask; reg [31:0] rsp_data_shifted; always @(*) begin @@ -260,22 +257,23 @@ module VX_tex_memory #( assign dcache_rsp_if.ready = ~(is_last_rsp && stall_out); `ifdef DBG_PRINT_TEX - wire [`NW_BITS-1:0] req_wid, rsp_wid; - wire [31:0] req_PC, rsp_PC; - assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0]; - assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; + wire [`NW_BITS-1:0] q_req_wid, req_wid, rsp_wid; + wire [31:0] q_req_PC, req_PC, rsp_PC; + assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0]; + assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0]; + assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0]; always @(posedge clk) begin if (dcache_req_fire_any) begin $write("%t: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=", - $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag); + $time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, dcache_req_if.tag[0]); `PRINT_ARRAY1D(req_texel_addr, NUM_REQS); $write(", is_dup=%b\n", req_texel_dup); end if (dcache_rsp_fire) begin $write("%t: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=", - $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.valid, dcache_rsp_if.tag); - `PRINT_ARRAY1D(rsp_data_qual, NUM_REQS); + $time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, dcache_rsp_if.tag); + `PRINT_ARRAY1D(dcache_rsp_if.data, NUM_REQS); $write("\n"); end if (req_valid && req_ready) begin diff --git a/hw/rtl/tex_unit/VX_tex_sampler.v b/hw/rtl/tex_unit/VX_tex_sampler.v index 75d0a5bb..e5820f72 100644 --- a/hw/rtl/tex_unit/VX_tex_sampler.v +++ b/hw/rtl/tex_unit/VX_tex_sampler.v @@ -10,16 +10,16 @@ module VX_tex_sampler #( // inputs input wire req_valid, - input wire [`NUM_THREADS-1:0] req_tmask, + input wire [NUM_REQS-1:0] req_tmask, input wire [`TEX_FORMAT_BITS-1:0] req_format, - input wire [1:0][NUM_REQS-1:0][`BLEND_FRAC-1:0] req_blends, + input wire [NUM_REQS-1:0][1:0][`BLEND_FRAC-1:0] req_blends, input wire [NUM_REQS-1:0][3:0][31:0] req_data, input wire [REQ_INFO_WIDTH-1:0] req_info, output wire req_ready, // ouputs output wire rsp_valid, - output wire [`NUM_THREADS-1:0] rsp_tmask, + output wire [NUM_REQS-1:0] rsp_tmask, output wire [NUM_REQS-1:0][31:0] rsp_data, output wire [REQ_INFO_WIDTH-1:0] rsp_info, input wire rsp_ready @@ -28,20 +28,20 @@ module VX_tex_sampler #( `UNUSED_PARAM (CORE_ID) wire valid_s0; - wire [`NUM_THREADS-1:0] tmask_s0; + wire [NUM_REQS-1:0] tmask_s0; wire [REQ_INFO_WIDTH-1:0] req_info_s0; wire [NUM_REQS-1:0][31:0] texel_ul, texel_uh; wire [NUM_REQS-1:0][31:0] texel_ul_s0, texel_uh_s0; - wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v_s0; + wire [NUM_REQS-1:0][`BLEND_FRAC-1:0] blend_v, blend_v_s0; wire [NUM_REQS-1:0][31:0] texel_v; wire stall_out; - for (genvar i = 0; i < NUM_REQS; i++) begin + for (genvar i = 0; i < NUM_REQS; ++i) begin wire [3:0][31:0] fmt_texels; - for (genvar j = 0; j < 4; j++) begin + for (genvar j = 0; j < 4; ++j) begin VX_tex_format #( .CORE_ID (CORE_ID) ) tex_format ( @@ -53,7 +53,7 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_ul ( - .blend (req_blends[0][i]), + .blend (req_blends[i][0]), .in1 (fmt_texels[0]), .in2 (fmt_texels[1]), .out (texel_ul[i]) @@ -61,11 +61,13 @@ module VX_tex_sampler #( VX_tex_lerp #( ) tex_lerp_uh ( - .blend (req_blends[0][i]), + .blend (req_blends[i][0]), .in1 (fmt_texels[2]), .in2 (fmt_texels[3]), .out (texel_uh[i]) ); + + assign blend_v[i] = req_blends[i][1]; end VX_pipe_register #( @@ -75,8 +77,8 @@ module VX_tex_sampler #( .clk (clk), .reset (reset), .enable (~stall_out), - .data_in ({req_valid, req_tmask, req_info, req_blends[1], texel_ul, texel_uh}), - .data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) + .data_in ({req_valid, req_tmask, req_info, blend_v, texel_ul, texel_uh}), + .data_out ({valid_s0, tmask_s0, req_info_s0, blend_v_s0, texel_ul_s0, texel_uh_s0}) ); for (genvar i = 0; i < NUM_REQS; i++) begin diff --git a/hw/rtl/tex_unit/VX_tex_unit.v b/hw/rtl/tex_unit/VX_tex_unit.v index bb36b335..8c8d32f9 100644 --- a/hw/rtl/tex_unit/VX_tex_unit.v +++ b/hw/rtl/tex_unit/VX_tex_unit.v @@ -22,59 +22,70 @@ module VX_tex_unit #( localparam REQ_INFO_WIDTH_A = `TEX_FORMAT_BITS + REQ_INFO_WIDTH_S; localparam REQ_INFO_WIDTH_M = (2 * `NUM_THREADS * `BLEND_FRAC) + REQ_INFO_WIDTH_A; - reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - reg [`TEX_DIM_BITS-1:0] tex_dims [1:0][`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; - - reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; - reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0]; - reg [`TEX_WRAP_BITS-1:0] tex_wraps [1:0][`NUM_TEX_UNITS-1:0]; - reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0]; + reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; + reg [1:0][`TEX_DIM_BITS-1:0] tex_dims [`NUM_TEX_UNITS-1:0][(1 << `TEX_LOD_BITS)-1:0]; + reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0]; + reg [`TEX_FORMAT_BITS-1:0] tex_format [`NUM_TEX_UNITS-1:0]; + reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0]; + reg [`TEX_FILTER_BITS-1:0] tex_filter [`NUM_TEX_UNITS-1:0]; // CSRs programming + reg [`NUM_TEX_UNITS-1:0] csrs_dirty; + `UNUSED_VAR (csrs_dirty) + for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin - wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS]; - always @(posedge clk) begin - if (tex_csr_if.write_enable) begin + wire [`TEX_LOD_BITS-1:0] mip_level = tex_csr_if.write_data[28 +: `TEX_LOD_BITS]; + always @(posedge clk) begin + if (tex_csr_if.write_enable) begin case (tex_csr_if.write_addr) `CSR_TEX_ADDR(i) : begin tex_baddr[i] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0]; + csrs_dirty[i] <= 1; end `CSR_TEX_FORMAT(i) : begin tex_format[i] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0]; + csrs_dirty[i] <= 1; end `CSR_TEX_WRAP(i) : begin - tex_wraps[0][i] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; - tex_wraps[1][i] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; + tex_wraps[i][0] <= tex_csr_if.write_data[0 +: `TEX_WRAP_BITS]; + tex_wraps[i][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS +: `TEX_WRAP_BITS]; + csrs_dirty[i] <= 1; end `CSR_TEX_FILTER(i) : begin tex_filter[i] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0]; + csrs_dirty[i] <= 1; end `CSR_TEX_MIPOFF(i) : begin tex_mipoff[i][mip_level] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0]; + csrs_dirty[i] <= 1; end `CSR_TEX_WIDTH(i) : begin - tex_dims[0][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + tex_dims[i][mip_level][0] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + csrs_dirty[i] <= 1; end `CSR_TEX_HEIGHT(i) : begin - tex_dims[1][i][mip_level] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + tex_dims[i][mip_level][1] <= tex_csr_if.write_data[`TEX_DIM_BITS-1:0]; + csrs_dirty[i] <= 1; end endcase end + if (reset || (tex_req_if.valid && tex_req_if.ready)) begin + csrs_dirty[i] <= '0; + end end end // mipmap attributes wire [`NUM_THREADS-1:0][`TEX_MIPOFF_BITS-1:0] sel_mipoff; - wire [1:0][`NUM_THREADS-1:0][`TEX_DIM_BITS-1:0] sel_dims; + wire [`NUM_THREADS-1:0][1:0][`TEX_DIM_BITS-1:0] sel_dims; for (genvar i = 0; i < `NUM_THREADS; ++i) begin wire [`NTEX_BITS-1:0] unit = tex_req_if.unit[`NTEX_BITS-1:0]; wire [`TEX_LOD_BITS-1:0] mip_level = tex_req_if.lod[i][20+:`TEX_LOD_BITS]; - assign sel_mipoff[i] = tex_mipoff[unit][mip_level]; - assign sel_dims[0][i] = tex_dims[0][unit][mip_level]; - assign sel_dims[1][i] = tex_dims[1][unit][mip_level]; + assign sel_mipoff[i] = tex_mipoff[unit][mip_level]; + assign sel_dims[i] = tex_dims[unit][mip_level]; end // address generation @@ -83,7 +94,7 @@ module VX_tex_unit #( wire [`NUM_THREADS-1:0] mem_req_tmask; wire [`TEX_FILTER_BITS-1:0] mem_req_filter; wire [`TEX_STRIDE_BITS-1:0] mem_req_stride; - wire [1:0][`NUM_THREADS-1:0][`BLEND_FRAC-1:0] mem_req_blends; + wire [`NUM_THREADS-1:0][1:0][`BLEND_FRAC-1:0] mem_req_blends; wire [`NUM_THREADS-1:0][3:0][31:0] mem_req_addr; wire [REQ_INFO_WIDTH_A-1:0] mem_req_info; wire mem_req_ready; @@ -101,10 +112,10 @@ module VX_tex_unit #( .req_coords (tex_req_if.coords), .req_format (tex_format[tex_req_if.unit]), .req_filter (tex_filter[tex_req_if.unit]), - .req_wraps ({tex_wraps[1][tex_req_if.unit], tex_wraps[0][tex_req_if.unit]}), - .req_baseaddr(tex_baddr[tex_req_if.unit]), - .req_mipoffset(sel_mipoff), - .req_logdims(sel_dims), + .req_wraps (tex_wraps[tex_req_if.unit]), + .req_baseaddr (tex_baddr[tex_req_if.unit]), + .req_mipoff (sel_mipoff), + .req_logdims (sel_dims), .req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}), .req_ready (tex_req_if.ready), @@ -189,25 +200,22 @@ module VX_tex_unit #( ); `ifdef DBG_PRINT_TEX - for (genvar i = 0; i < `NUM_TEX_UNITS; ++i) begin - always @(posedge clk) begin - if (tex_csr_if.write_enable - && (tex_csr_if.write_addr >= `CSR_TEX_BEGIN(i) - && tex_csr_if.write_addr < `CSR_TEX_BEGIN(i+1))) begin - $display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]); - $display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]); - $display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wraps[0][i]); - $display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wraps[1][i]); - $display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]); - $display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]); - $display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_dims[0][i][0]); - $display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_dims[1][i][0]); - end - end - end always @(posedge clk) begin if (tex_req_if.valid && tex_req_if.ready) begin - $display("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=", + for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin + if (csrs_dirty[i]) begin + $display("%t: core%0d-tex-csr: tex%0d_addr=%0h", $time, CORE_ID, i, tex_baddr[i]); + $display("%t: core%0d-tex-csr: tex%0d_format=%0h", $time, CORE_ID, i, tex_format[i]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_u=%0h", $time, CORE_ID, i, tex_wraps[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_wrap_v=%0h", $time, CORE_ID, i, tex_wraps[i][1]); + $display("%t: core%0d-tex-csr: tex%0d_filter=%0h", $time, CORE_ID, i, tex_filter[i]); + $display("%t: core%0d-tex-csr: tex%0d_mipoff[0]=%0h", $time, CORE_ID, i, tex_mipoff[i][0]); + $display("%t: core%0d-tex-csr: tex%0d_width[0]=%0h", $time, CORE_ID, i, tex_dims[i][0][0]); + $display("%t: core%0d-tex-csr: tex%0d_height[0]=%0h", $time, CORE_ID, i, tex_dims[i][0][1]); + end + end + + $write("%t: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=", $time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod); `PRINT_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS); $write(", v=");