diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 59e3243c..c229d16f 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -222,9 +222,9 @@ module VX_cluster #( VX_mem_arb #( .NUM_REQS (`NUM_CORES), - .DATA_WIDTH (`L2MEM_LINE_WIDTH), + .DATA_WIDTH (`L2MEM_LINE_WIDTH), + .ADDR_WIDTH (`L2MEM_ADDR_WIDTH), .TAG_IN_WIDTH (`XMEM_TAG_WIDTH), - .TAG_OUT_WIDTH (`L2MEM_TAG_WIDTH), .BUFFERED_REQ (1), .BUFFERED_RSP (1) ) mem_arb ( diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index e5dffbfe..e0817fd6 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -54,11 +54,12 @@ module VX_lsu_unit #( assign word_addr[i] = full_addr[i][REQ_ASHIFT +: REQ_ADDRW]; end + // detect duplicate addresses wire [`NUM_THREADS-1:0] addr_matches; for (genvar i = 0; i < `NUM_THREADS; i++) begin assign addr_matches[i] = (word_addr[0] == word_addr[i]) || ~lsu_req_if.tmask[i]; end - wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches); + wire lsu_is_dup = lsu_req_if.tmask[0] && (& addr_matches); for (genvar i = 0; i < `NUM_THREADS; i++) begin // is non-cacheable address @@ -84,8 +85,8 @@ module VX_lsu_unit #( .clk (clk), .reset (reset), .enable (!stall_in), - .data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), - .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data}) + .data_in ({lsu_req_if.valid, lsu_is_dup, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), + .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data}) ); // Can accept new request? @@ -105,9 +106,9 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] rsp_tmask; reg [`NUM_THREADS-1:0] req_sent_mask; - wire req_ready_all; + reg is_req_start; - wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr; + wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr; wire mbuf_full; wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset; @@ -119,9 +120,9 @@ module VX_lsu_unit #( wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; - wire mbuf_push = (| dcache_req_fire) - && (0 == req_sent_mask) // first submission only - && req_wb; // loads only + wire mbuf_push = (| dcache_req_fire) + && is_req_start // first submission only + && req_wb; // loads only wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n); @@ -144,23 +145,27 @@ module VX_lsu_unit #( `UNUSED_PIN (empty) ); - assign req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask); + wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; + + wire req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask_dup); - wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0}; + wire [`NUM_THREADS-1:0] req_sent_mask_n = req_sent_mask | dcache_req_fire; always @(posedge clk) begin if (reset) begin req_sent_mask <= 0; + is_req_start <= 1; end else begin - if (req_ready_all) + if (req_ready_all) begin req_sent_mask <= 0; - else - req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup; + is_req_start <= 1; + end else begin + req_sent_mask <= req_sent_mask_n; + is_req_start <= (0 == req_sent_mask_n); + end end end - wire is_req_start = (0 == req_sent_mask); - // need to hold the acquired tag index until the full request is submitted reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold; wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold; @@ -168,9 +173,7 @@ module VX_lsu_unit #( if (mbuf_push) begin req_tag_hold <= mbuf_waddr; end - end - - wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; + end assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid; @@ -184,46 +187,42 @@ module VX_lsu_unit #( end // ensure all dependencies for the requests are resolved - wire req_dep_ready = (req_wb && (~mbuf_full || ~is_req_start)) + wire req_dep_ready = (req_wb && ~(mbuf_full && is_req_start)) || (~req_wb && st_commit_if.ready); // DCache Request - reg [`NUM_THREADS-1:0][29:0] mem_req_addr; - reg [`NUM_THREADS-1:0][3:0] mem_req_byteen; - reg [`NUM_THREADS-1:0][31:0] mem_req_data; + for (genvar i = 0; i < `NUM_THREADS; i++) begin - always @(*) begin - for (integer i = 0; i < `NUM_THREADS; i++) begin - mem_req_byteen[i] = {4{req_wb}}; + reg [3:0] mem_req_byteen; + reg [31:0] mem_req_data; + + always @(*) begin + mem_req_byteen = {4{req_wb}}; case (`LSU_WSIZE(req_type)) - 0: mem_req_byteen[i][req_offset[i]] = 1; + 0: mem_req_byteen[req_offset[i]] = 1; 1: begin - mem_req_byteen[i][req_offset[i]] = 1; - mem_req_byteen[i][{req_addr[i][1], 1'b1}] = 1; + mem_req_byteen[req_offset[i]] = 1; + mem_req_byteen[{req_addr[i][1], 1'b1}] = 1; end - default : mem_req_byteen[i] = {4{1'b1}}; + default : mem_req_byteen = {4{1'b1}}; endcase - mem_req_data[i] = 'x; + mem_req_data = 'x; case (req_offset[i]) - 1: mem_req_data[i][31:8] = req_data[i][23:0]; - 2: mem_req_data[i][31:16] = req_data[i][15:0]; - 3: mem_req_data[i][31:24] = req_data[i][7:0]; - default: mem_req_data[i] = req_data[i]; + 1: mem_req_data[31:8] = req_data[i][23:0]; + 2: mem_req_data[31:16] = req_data[i][15:0]; + 3: mem_req_data[31:24] = req_data[i][7:0]; + default: mem_req_data = req_data[i]; endcase - - mem_req_addr[i] = req_addr[i][31:2]; end - end - assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_dep_ready}} & req_tmask_dup & ~req_sent_mask; - assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}}; - assign dcache_req_if.addr = mem_req_addr; - assign dcache_req_if.byteen = mem_req_byteen; - assign dcache_req_if.data = mem_req_data; + assign dcache_req_if.valid[i] = req_valid && req_dep_ready && req_tmask_dup[i] && !req_sent_mask[i]; + assign dcache_req_if.rw[i] = ~req_wb; + assign dcache_req_if.addr[i] = req_addr[i][31:2]; + assign dcache_req_if.byteen[i] = mem_req_byteen; + assign dcache_req_if.data[i] = mem_req_data; - for (genvar i = 0; i < `NUM_THREADS; ++i) begin `ifdef DBG_CACHE_REQ_INFO assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]}; `else @@ -252,22 +251,17 @@ module VX_lsu_unit #( wire [`NUM_THREADS-1:0] rsp_tmask_qual; for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [31:0] src_data = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; - - reg [31:0] rsp_data_shifted; - always @(*) begin - rsp_data_shifted[31:16] = src_data[31:16]; - rsp_data_shifted[15:0] = rsp_offset[i][1] ? src_data[31:16] : src_data[15:0]; - rsp_data_shifted[7:0] = rsp_offset[i][0] ? rsp_data_shifted[15:8] : rsp_data_shifted[7:0]; - end + wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? dcache_rsp_if.data[0] : dcache_rsp_if.data[i]; + wire [15:0] rsp_data16 = rsp_offset[i][1] ? rsp_data32[31:16] : rsp_data32[15:0]; + wire [7:0] rsp_data8 = rsp_offset[i][0] ? rsp_data16[15:8] : rsp_data16[7:0]; always @(*) begin case (`LSU_FMT(rsp_type)) - `FMT_B: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0])); - `FMT_H: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0])); - `FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0])); - `FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0])); - default: rsp_data[i] = rsp_data_shifted; + `FMT_B: rsp_data[i] = 32'(signed'(rsp_data8)); + `FMT_H: rsp_data[i] = 32'(signed'(rsp_data16)); + `FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8)); + `FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16)); + default: rsp_data[i] = rsp_data32; endcase end end diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index 958146a2..fc845c0a 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -3,14 +3,15 @@ module VX_mem_arb #( parameter NUM_REQS = 1, parameter DATA_WIDTH = 1, - parameter TAG_IN_WIDTH = 1, - parameter TAG_OUT_WIDTH = 1, + parameter ADDR_WIDTH = 1, + parameter TAG_IN_WIDTH = 1, parameter BUFFERED_REQ = 0, parameter BUFFERED_RSP = 0, + parameter TYPE = "R", - parameter DATA_SIZE = (DATA_WIDTH / 8), - parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE), - parameter LOG_NUM_REQS = `CLOG2(NUM_REQS) + parameter DATA_SIZE = (DATA_WIDTH / 8), + parameter LOG_NUM_REQS = `CLOG2(NUM_REQS), + parameter TAG_OUT_WIDTH = TAG_IN_WIDTH + LOG_NUM_REQS ) ( input wire clk, input wire reset, @@ -50,20 +51,21 @@ module VX_mem_arb #( if (NUM_REQS > 1) begin - wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in; + wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in_merged; for (genvar i = 0; i < NUM_REQS; i++) begin - assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; + assign req_data_in_merged[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]}; end - VX_stream_arbiter #( + VX_stream_arbiter #( .NUM_REQS (NUM_REQS), .DATAW (REQ_DATAW), - .BUFFERED (BUFFERED_REQ) + .BUFFERED (BUFFERED_REQ), + .TYPE (TYPE) ) req_arb ( .clk (clk), .reset (reset), .valid_in (req_valid_in), - .data_in (req_merged_data_in), + .data_in (req_data_in_merged), .ready_in (req_ready_in), .valid_out (req_valid_out), .data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}), @@ -72,11 +74,11 @@ module VX_mem_arb #( /////////////////////////////////////////////////////////////////////// - wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0]; + wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0]; - wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out; + wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_out_merged; for (genvar i = 0; i < NUM_REQS; i++) begin - assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i]; + assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_data_out_merged[i]; end VX_stream_demux #( @@ -91,7 +93,7 @@ module VX_mem_arb #( .data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}), .ready_in (rsp_ready_in), .valid_out (rsp_valid_out), - .data_out (rsp_merged_data_out), + .data_out (rsp_data_out_merged), .ready_out (rsp_ready_out) ); diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 5c98bc1a..888f4976 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -288,7 +288,6 @@ module VX_mem_unit # ( .DATA_WIDTH (`DMEM_LINE_WIDTH), .ADDR_WIDTH (`DMEM_ADDR_WIDTH), .TAG_IN_WIDTH (`DMEM_TAG_WIDTH), - .TAG_OUT_WIDTH (`XMEM_TAG_WIDTH), .BUFFERED_REQ (1), .BUFFERED_RSP (0) ) mem_arb ( diff --git a/hw/rtl/VX_smem_arb.v b/hw/rtl/VX_smem_arb.v index 13cb5307..4e6cdee2 100644 --- a/hw/rtl/VX_smem_arb.v +++ b/hw/rtl/VX_smem_arb.v @@ -18,7 +18,7 @@ module VX_smem_arb ( // output response VX_dcache_core_rsp_if core_rsp_if ); - localparam REQ_DATAW = 1 + `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH - 1; + localparam REQ_DATAW = `DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + (`DCORE_TAG_WIDTH-1); localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH; // @@ -26,71 +26,59 @@ module VX_smem_arb ( // for (genvar i = 0; i < `NUM_THREADS; ++i) begin - wire cache_req_valid_out; - wire cache_req_ready_out; - wire is_smem_addr_out; - wire is_smem_addr_in = core_req_if.tag[i][0]; - - VX_skid_buffer #( - .DATAW (REQ_DATAW) - ) out_buffer ( + wire [1:0][REQ_DATAW-1:0] req_data_out; + + VX_stream_demux #( + .NUM_REQS (2), + .DATAW (REQ_DATAW), + .BUFFERED (0) + ) rsp_demux ( .clk (clk), .reset (reset), - .valid_in (core_req_if.valid[i]), - .data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}), - .ready_in (core_req_if.ready[i]), - .valid_out (cache_req_valid_out), - .data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}), - .ready_out (cache_req_ready_out) - ); + .sel (core_req_if.tag[i][0]), + .valid_in (core_req_if.valid[i]), + .data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i][`DCORE_TAG_WIDTH-1:1]}), + .ready_in (core_req_if.ready[i]), + .valid_out ({smem_req_if.valid[i], cache_req_if.valid[i]}), + .data_out (req_data_out), + .ready_out ({smem_req_if.ready[i], cache_req_if.ready[i]}) + ); - assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out; - assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out; - assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i]; - - assign smem_req_if.addr[i] = cache_req_if.addr[i]; - assign smem_req_if.rw[i] = cache_req_if.rw[i]; - assign smem_req_if.byteen[i] = cache_req_if.byteen[i]; - assign smem_req_if.data[i] = cache_req_if.data[i]; - assign smem_req_if.tag[i] = cache_req_if.tag[i]; + assign {cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]} = req_data_out[0]; + assign {smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]} = req_data_out[1]; end // // handle responses // - wire [1:0][RSP_DATAW-1:0] rsp_data_in; wire [1:0] rsp_valid_in; - wire [1:0] rsp_ready_in; - + wire [1:0][RSP_DATAW-1:0] rsp_data_in; + wire [`NUM_THREADS-1:0] core_rsp_tmask; wire core_rsp_valid; - wire [`NUM_THREADS-1:0] core_rsp_valid_tmask; + + assign rsp_valid_in[0] = (| cache_rsp_if.valid); + assign rsp_valid_in[1] = (| smem_rsp_if.valid); assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}; assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}}; - assign rsp_valid_in[0] = (| cache_rsp_if.valid); - assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; - VX_stream_arbiter #( .NUM_REQS (2), - .DATAW (RSP_DATAW), + .DATAW (RSP_DATAW), .BUFFERED (1) ) rsp_arb ( .clk (clk), .reset (reset), .valid_in (rsp_valid_in), .data_in (rsp_data_in), - .ready_in (rsp_ready_in), + .ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}), .valid_out (core_rsp_valid), - .data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}), + .data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}), .ready_out (core_rsp_if.ready) ); - assign cache_rsp_if.ready = rsp_ready_in[0]; - assign smem_rsp_if.ready = rsp_ready_in[1]; - - assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask; + assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_tmask; endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 36173f8b..f3dd9fa6 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -225,8 +225,8 @@ module Vortex ( VX_mem_arb #( .NUM_REQS (`NUM_CLUSTERS), .DATA_WIDTH (`L3MEM_LINE_WIDTH), + .ADDR_WIDTH (`L3MEM_ADDR_WIDTH), .TAG_IN_WIDTH (`L2MEM_TAG_WIDTH), - .TAG_OUT_WIDTH (`L3MEM_TAG_WIDTH), .BUFFERED_REQ (1), .BUFFERED_RSP (1) ) mem_arb ( diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 19e694ed..f5a7e8cf 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -586,23 +586,23 @@ wire [AVS_REQ_TAGW:0] mem_rsp_tag; wire mem_rsp_ready; VX_mem_arb #( - .NUM_REQS (2), - .DATA_WIDTH (LMEM_LINE_WIDTH), - .ADDR_WIDTH (LMEM_ADDR_WIDTH), - .TAG_IN_WIDTH (AVS_REQ_TAGW), - .TAG_OUT_WIDTH (AVS_REQ_TAGW+1) + .NUM_REQS (2), + .DATA_WIDTH (LMEM_LINE_WIDTH), + .ADDR_WIDTH (LMEM_ADDR_WIDTH), + .TAG_IN_WIDTH (AVS_REQ_TAGW), + .TYPE ("X") ) mem_arb ( .clk (clk), .reset (reset), // Source request - .req_valid_in ({cci_mem_req_arb_valid, vx_mem_req_arb_valid}), - .req_rw_in ({cci_mem_req_arb_rw, vx_mem_req_arb_rw}), - .req_byteen_in ({cci_mem_req_arb_byteen, vx_mem_req_arb_byteen}), - .req_addr_in ({cci_mem_req_arb_addr, vx_mem_req_arb_addr}), - .req_data_in ({cci_mem_req_arb_data, vx_mem_req_arb_data}), - .req_tag_in ({cci_mem_req_arb_tag, vx_mem_req_arb_tag}), - .req_ready_in ({cci_mem_req_arb_ready, vx_mem_req_arb_ready}), + .req_valid_in ({vx_mem_req_arb_valid, cci_mem_req_arb_valid}), + .req_rw_in ({vx_mem_req_arb_rw, cci_mem_req_arb_rw}), + .req_byteen_in ({vx_mem_req_arb_byteen, cci_mem_req_arb_byteen}), + .req_addr_in ({vx_mem_req_arb_addr, cci_mem_req_arb_addr}), + .req_data_in ({vx_mem_req_arb_data, cci_mem_req_arb_data}), + .req_tag_in ({vx_mem_req_arb_tag, cci_mem_req_arb_tag}), + .req_ready_in ({vx_mem_req_arb_ready, cci_mem_req_arb_ready}), // Memory request .req_valid_out (mem_req_valid), @@ -614,10 +614,10 @@ VX_mem_arb #( .req_ready_out (mem_req_ready), // Source response - .rsp_valid_out ({cci_mem_rsp_arb_valid, vx_mem_rsp_arb_valid}), - .rsp_data_out ({cci_mem_rsp_arb_data, vx_mem_rsp_arb_data}), - .rsp_tag_out ({cci_mem_rsp_arb_tag, vx_mem_rsp_arb_tag}), - .rsp_ready_out ({cci_mem_rsp_arb_ready, vx_mem_rsp_arb_ready}), + .rsp_valid_out ({vx_mem_rsp_arb_valid, cci_mem_rsp_arb_valid}), + .rsp_data_out ({vx_mem_rsp_arb_data, cci_mem_rsp_arb_data}), + .rsp_tag_out ({vx_mem_rsp_arb_tag, cci_mem_rsp_arb_tag}), + .rsp_ready_out ({vx_mem_rsp_arb_ready, cci_mem_rsp_arb_ready}), // Memory response .rsp_valid_in (mem_rsp_valid), diff --git a/hw/rtl/cache/VX_nc_bypass.v b/hw/rtl/cache/VX_nc_bypass.v index dd8b3da6..84fa2fe9 100644 --- a/hw/rtl/cache/VX_nc_bypass.v +++ b/hw/rtl/cache/VX_nc_bypass.v @@ -97,8 +97,7 @@ module VX_nc_bypass #( reg [NUM_REQS-1:0] core_req_ready_in_r; wire [NUM_REQS-1:0] core_req_valid_in_nc; - wire [CORE_REQ_TIDW-1:0] core_req_nc_tid; - + for (genvar i = 0; i < NUM_REQS; ++i) begin assign core_req_valid_in_nc[i] = core_req_valid_in[i] && core_req_tag_in[i][NC_TAG_BIT]; end @@ -107,14 +106,45 @@ module VX_nc_bypass #( for (integer i = 0; i < NUM_REQS; ++i) begin if (core_req_valid_in_nc[i]) begin core_req_valid_out_r[i] = 0; - core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i)); end else begin core_req_valid_out_r[i] = core_req_valid_in[i]; - core_req_ready_in_r[i] = core_req_ready_out[i]; end end end + wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid; + wire core_req_nc_valid; + + VX_priority_encoder #( + .N (NUM_REQS) + ) core_req_sel ( + .data_in (core_req_valid_in_nc), + .index (core_req_nc_tid), + `UNUSED_PIN (onehot), + .valid_out (core_req_nc_valid) + ); + + if (NUM_REQS > 1) begin + always @(*) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid_in_nc[i]) begin + core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i)); + end else begin + core_req_ready_in_r[i] = core_req_ready_out[i]; + end + end + end + end else begin + `UNUSED_VAR (core_req_nc_tid) + always @(*) begin + if (core_req_valid_in_nc) begin + core_req_ready_in_r = mem_req_ready_out; + end else begin + core_req_ready_in_r = core_req_ready_out; + end + end + end + assign core_req_valid_out = core_req_valid_out_r; assign core_req_rw_out = core_req_rw_in; assign core_req_addr_out = core_req_addr_in; @@ -131,57 +161,92 @@ module VX_nc_bypass #( reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r; reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r; reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r; - reg mem_req_ready_in_r; - - wire core_req_nc_valid; - - VX_priority_encoder #( - .N (NUM_REQS) - ) core_req_sel ( - .data_in (core_req_valid_in_nc), - .index (core_req_nc_tid), - `UNUSED_PIN (onehot), - .valid_out (core_req_nc_valid) - ); + reg mem_req_ready_in_r; always @(*) begin if (core_req_nc_valid) begin - mem_req_valid_out_r = 1; - mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid]; - mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH]; - for (integer i = 0; i < P; ++i) begin - mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid]; - end - mem_req_ready_in_r = 0; + mem_req_valid_out_r = 1; + mem_req_ready_in_r = 0; end else begin - mem_req_valid_out_r = mem_req_valid_in; - mem_req_rw_out_r = mem_req_rw_in; - mem_req_addr_out_r = mem_req_addr_in; - mem_req_data_out_r = mem_req_data_in; - mem_req_ready_in_r = mem_req_ready_out; + mem_req_valid_out_r = mem_req_valid_in; + mem_req_ready_in_r = mem_req_ready_out; end end - if (D != 0) begin - wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0]; + if (NUM_REQS > 1) begin always @(*) begin if (core_req_nc_valid) begin - mem_req_byteen_out_r = 0; - mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid]; - mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]}); + mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid]; + mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH]; + for (integer i = 0; i < P; ++i) begin + mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid]; + end end else begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; + mem_req_rw_out_r = mem_req_rw_in; + mem_req_addr_out_r = mem_req_addr_in; + mem_req_data_out_r = mem_req_data_in; end end - end else begin + + if (D != 0) begin + wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0]; + always @(*) begin + if (core_req_nc_valid) begin + mem_req_byteen_out_r = 0; + mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid]; + mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]}); + end else begin + mem_req_byteen_out_r = mem_req_byteen_in; + mem_req_tag_out_r = mem_req_tag_in; + end + end + end else begin + always @(*) begin + if (core_req_nc_valid) begin + mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid]; + mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]}); + end else begin + mem_req_byteen_out_r = mem_req_byteen_in; + mem_req_tag_out_r = mem_req_tag_in; + end + end + end + end else begin always @(*) begin if (core_req_nc_valid) begin - mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid]; - mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]}); + mem_req_rw_out_r = core_req_rw_in; + mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH]; + for (integer i = 0; i < P; ++i) begin + mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in; + end end else begin - mem_req_byteen_out_r = mem_req_byteen_in; - mem_req_tag_out_r = mem_req_tag_in; + mem_req_rw_out_r = mem_req_rw_in; + mem_req_addr_out_r = mem_req_addr_in; + mem_req_data_out_r = mem_req_data_in; + end + end + + if (D != 0) begin + wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0]; + always @(*) begin + if (core_req_nc_valid) begin + mem_req_byteen_out_r = 0; + mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in; + mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in}); + end else begin + mem_req_byteen_out_r = mem_req_byteen_in; + mem_req_tag_out_r = mem_req_tag_in; + end + end + end else begin + always @(*) begin + if (core_req_nc_valid) begin + mem_req_byteen_out_r = core_req_byteen_in; + mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in); + end else begin + mem_req_byteen_out_r = mem_req_byteen_in; + mem_req_tag_out_r = mem_req_tag_in; + end end end end @@ -201,26 +266,41 @@ module VX_nc_bypass #( reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r; reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r; - wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; - wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; if (NUM_REQS > 1) begin - always @(*) begin - if (is_mem_rsp_nc) begin - core_rsp_valid_out_r = 0; - core_rsp_valid_out_r[rsp_tid] = 1; - for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin - core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; - end - core_rsp_ready_in_r = 0; - end else begin - core_rsp_valid_out_r = core_rsp_valid_in; - core_rsp_tag_out_r = core_rsp_tag_in; - core_rsp_ready_in_r = core_rsp_ready_out; + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; + if (NUM_RSP_TAGS > 1) begin + always @(*) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin + core_rsp_valid_out_r[i] = 1; + core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; + core_rsp_ready_in_r[i] = 0; + end else begin + core_rsp_valid_out_r[i] = core_rsp_valid_in[i]; + core_rsp_tag_out_r[i] = core_rsp_tag_in[i]; + core_rsp_ready_in_r[i] = core_rsp_ready_out[i]; + end + end + end + end else begin + always @(*) begin + if (is_mem_rsp_nc) begin + core_rsp_valid_out_r = 0; + core_rsp_valid_out_r[rsp_tid] = 1; + for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin + core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0]; + end + core_rsp_ready_in_r = 0; + end else begin + core_rsp_valid_out_r = core_rsp_valid_in; + core_rsp_tag_out_r = core_rsp_tag_in; + core_rsp_ready_in_r = core_rsp_ready_out; + end end end - end else begin + end else begin always @(*) begin if (is_mem_rsp_nc) begin core_rsp_valid_out_r = 1; @@ -276,6 +356,7 @@ module VX_nc_bypass #( end if (NUM_RSP_TAGS > 1) begin + wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; always @(*) begin if (is_mem_rsp_nc) begin mem_rsp_ready_in_r = core_rsp_ready_out[rsp_tid]; diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index 9809b58c..4810fe0e 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -18,6 +18,12 @@ module VX_priority_encoder #( assign index = 0; assign valid_out = data_in; + end else if (N == 2) begin + + assign onehot = {!data_in[REVERSE], data_in[REVERSE]}; + assign index = !data_in[REVERSE]; + assign valid_out = (| data_in); + end else if (FAST) begin wire [N-1:0] scan_lo; diff --git a/hw/rtl/libs/VX_stream_arbiter.v b/hw/rtl/libs/VX_stream_arbiter.v index b646ed90..ecb6a21e 100644 --- a/hw/rtl/libs/VX_stream_arbiter.v +++ b/hw/rtl/libs/VX_stream_arbiter.v @@ -27,7 +27,6 @@ module VX_stream_arbiter #( wire [NUM_REQS-1:0] sel_1hot; if (TYPE == "X") begin - VX_fixed_arbiter #( .NUM_REQS(NUM_REQS), .LOCK_ENABLE(1) @@ -40,9 +39,7 @@ module VX_stream_arbiter #( .grant_index (sel_idx), .grant_onehot (sel_1hot) ); - end else if (TYPE == "R") begin - VX_rr_arbiter #( .NUM_REQS(NUM_REQS), .LOCK_ENABLE(1) @@ -55,9 +52,7 @@ module VX_stream_arbiter #( .grant_index (sel_idx), .grant_onehot (sel_1hot) ); - end else if (TYPE == "F") begin - VX_fair_arbiter #( .NUM_REQS(NUM_REQS), .LOCK_ENABLE(1) @@ -70,9 +65,7 @@ module VX_stream_arbiter #( .grant_index (sel_idx), .grant_onehot (sel_1hot) ); - end else if (TYPE == "M") begin - VX_matrix_arbiter #( .NUM_REQS(NUM_REQS), .LOCK_ENABLE(1) @@ -85,8 +78,9 @@ module VX_stream_arbiter #( .grant_index (sel_idx), .grant_onehot (sel_1hot) ); - - end + end else begin + $error ("invalid parameter"); + end wire ready_out_unqual; diff --git a/hw/syn/opae/Makefile b/hw/syn/opae/Makefile index d820df9a..4a35b54d 100644 --- a/hw/syn/opae/Makefile +++ b/hw/syn/opae/Makefile @@ -30,7 +30,7 @@ CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) CONFIG8 := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) -CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) +CONFIG32 := -DNUM_CLUSTERS=8 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS) FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)