From 665b97b8102f9c4cd60cf11845878f1cdbfdcd16 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 8 Feb 2021 16:13:32 -0800 Subject: [PATCH] multi-ported cache support for streaming --- hw/rtl/VX_config.vh | 5 + hw/rtl/VX_lsu_unit.v | 6 +- hw/rtl/VX_mem_unit.v | 1 + hw/rtl/cache/VX_bank.v | 181 +++++++++++++-------- hw/rtl/cache/VX_cache.v | 38 +++-- hw/rtl/cache/VX_cache_config.vh | 7 +- hw/rtl/cache/VX_cache_core_req_bank_sel.v | 190 ++++++++++++++++------ hw/rtl/cache/VX_cache_core_rsp_merge.v | 97 +++++++---- hw/rtl/cache/VX_data_access.v | 22 +-- hw/rtl/cache/VX_miss_resrv.v | 22 +-- hw/rtl/cache/VX_shared_mem.v | 1 + hw/rtl/cache/VX_tag_access.v | 2 - hw/syn/opae/vortex_afu.qsf | 32 ++-- 13 files changed, 387 insertions(+), 217 deletions(-) diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 27b1bfb1..4d1f5f9d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -296,6 +296,11 @@ `define DNUM_BANKS `NUM_THREADS `endif +// Number of bank ports +`ifndef DNUM_PORTS +`define DNUM_PORTS 1 +`endif + // Core Request Queue Size `ifndef DCREQ_SIZE `define DCREQ_SIZE 4 diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index e6e1cb9e..253707d3 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -234,8 +234,6 @@ module VX_lsu_unit #( // send load commit - wire is_load_rsp = (| dcache_rsp_if.valid); - wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid; VX_pipe_register #( @@ -245,8 +243,8 @@ module VX_lsu_unit #( .clk (clk), .reset (reset), .enable (!load_rsp_stall), - .data_in ({is_load_rsp, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), - .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) + .data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), + .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) ); // Can accept new cache response? diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 35fd4328..8e4d55cb 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -155,6 +155,7 @@ module VX_mem_unit # ( .CACHE_SIZE (`DCACHE_SIZE), .CACHE_LINE_SIZE (`DCACHE_LINE_SIZE), .NUM_BANKS (`DNUM_BANKS), + .NUM_PORTS (`DNUM_PORTS), .WORD_SIZE (`DWORD_SIZE), .NUM_REQS (`DNUM_REQUESTS), .CREQ_SIZE (`DCREQ_SIZE), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 337c90a2..815c6aa1 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -13,6 +13,8 @@ module VX_bank #( parameter CACHE_LINE_SIZE = 1, // Number of bankS parameter NUM_BANKS = 1, + // Number of ports per banks + parameter NUM_PORTS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, @@ -53,20 +55,21 @@ module VX_bank #( `endif // Core Request - input wire core_req_valid, - input wire [`REQS_BITS-1:0] core_req_tid, + input wire [NUM_PORTS-1:0] core_req_valid, + input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel, + input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen, + input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data, + input wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_req_tid, input wire core_req_rw, input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr, - input wire [`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel, - input wire [WORD_SIZE-1:0] core_req_byteen, - input wire [`WORD_WIDTH-1:0] core_req_data, input wire [CORE_TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, // Core Response output wire core_rsp_valid, - output wire [`REQS_BITS-1:0] core_rsp_tid, - output wire [`WORD_WIDTH-1:0] core_rsp_data, + output wire [NUM_PORTS-1:0] core_rsp_pmask, + output wire [NUM_PORTS-1:0][`REQS_BITS-1:0] core_rsp_tid, + output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, input wire core_rsp_ready, @@ -95,19 +98,20 @@ module VX_bank #( wire creq_pop; wire creq_full, creq_empty; + wire [NUM_PORTS-1:0] creq_pmask; + wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen; + wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] creq_tid; wire creq_rw; wire [`LINE_ADDR_WIDTH-1:0] creq_addr; - wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; - wire [WORD_SIZE-1:0] creq_byteen; - wire [`WORD_WIDTH-1:0] creq_data; wire [CORE_TAG_WIDTH-1:0] creq_tag; - wire [`REQS_BITS-1:0] creq_tid; - - wire creq_push = core_req_valid && core_req_ready; + + wire creq_push = (| core_req_valid) && core_req_ready; assign core_req_ready = !creq_full; VX_fifo_queue #( - .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH), + .DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), .SIZE (CREQ_SIZE), .BUFFERED (1) ) core_req_queue ( @@ -115,8 +119,8 @@ module VX_bank #( .reset (reset), .push (creq_push), .pop (creq_pop), - .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_addr, core_req_wsel, core_req_byteen, core_req_data}), - .data_out ({creq_tag, creq_tid, creq_rw, creq_addr, creq_wsel, creq_byteen, creq_data}), + .data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_valid, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}), + .data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}), .empty (creq_empty), .full (creq_full), `UNUSED_PIN (alm_empty), @@ -125,22 +129,25 @@ module VX_bank #( ); wire mshr_alm_full; - wire mshr_pop; wire mshr_push; + wire mshr_pop; wire mshr_pending; + wire mshr_valid; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr; - wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel; - wire [WORD_SIZE-1:0] mshr_byteen; wire [CORE_TAG_WIDTH-1:0] mshr_tag; - wire [`REQS_BITS-1:0] mshr_tid; - + wire [NUM_PORTS-1:0] mshr_pmask; + wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mshr_byteen; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] mshr_tid; + wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; wire mem_rw_st0, mem_rw_st1; - wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; + wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1; + wire [NUM_PORTS-1:0] pmask_st0, pmask_st1; wire [`CACHE_LINE_WIDTH-1:0] data_st0, data_st1; - wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1; wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1; wire valid_st0, valid_st1; wire is_fill_st0, is_fill_st1; @@ -151,8 +158,8 @@ module VX_bank #( wire force_miss_st0, force_miss_st1; wire writeen_unqual_st0, writeen_unqual_st1; wire incoming_fill_st0, incoming_fill_st1; - wire is_flush_st0; wire mshr_pending_st0; + wire is_flush_st0; wire crsq_alm_full, crsq_push, crsq_pop; wire dreq_alm_full, dreq_push, dreq_pop; @@ -203,8 +210,29 @@ module VX_bank #( end `endif + wire [`CACHE_LINE_WIDTH-1:0] creq_line_data; + + if (`WORD_SELECT_BITS != 0) begin + if (NUM_PORTS > 1) begin + reg [`CACHE_LINE_WIDTH-1:0] creq_line_data_r; + always @(*) begin + creq_line_data_r = 'x; + for (integer p = 0; p < NUM_PORTS; p++) begin + if (creq_pmask[p]) begin + creq_line_data_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data[p]; + end + end + end + assign creq_line_data = creq_line_data_r; + end else begin + assign creq_line_data = {`WORDS_PER_LINE{creq_data}}; + end + end else begin + assign creq_line_data = creq_data; + end + VX_pipe_register #( - .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `CACHE_LINE_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + 1 + 1), + .DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1 + 1), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -214,17 +242,18 @@ module VX_bank #( mshr_pop || drsq_pop || creq_pop, mshr_pop_unqual, drsq_pop_unqual, - mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_addr), - mshr_pop_unqual ? mshr_wsel : creq_wsel, mshr_pop_unqual ? 1'b0 : creq_rw, + mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_addr), + dram_rsp_valid ? dram_rsp_data : creq_line_data, + mshr_pop_unqual ? mshr_wsel : creq_wsel, mshr_pop_unqual ? mshr_byteen : creq_byteen, - dram_rsp_valid ? dram_rsp_data : {`WORDS_PER_LINE{creq_data}}, mshr_pop_unqual ? mshr_tid : creq_tid, + mshr_pop_unqual ? mshr_pmask : creq_pmask, mshr_pop_unqual ? mshr_tag : creq_tag, mshr_pending_sel, dram_rsp_flush }), - .data_out ({valid_st0, is_mshr_st0, is_fill_st0, addr_st0, wsel_st0, mem_rw_st0, byteen_st0, data_st0, req_tid_st0, tag_st0, mshr_pending_st0, is_flush_st0}) + .data_out ({valid_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, data_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0, is_flush_st0}) ); `ifdef DBG_CACHE_REQ_INFO @@ -238,7 +267,6 @@ module VX_bank #( VX_tag_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .CACHE_SIZE (CACHE_SIZE), .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -278,14 +306,14 @@ module VX_bank #( assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr); VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + CORE_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, prev_miss_dep_st0, incoming_fill_st0, miss_st0, force_miss_st0, addr_st0, wsel_st0, data_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, prev_miss_dep_st1, incoming_fill_st1, miss_st1, force_miss_st1, addr_st1, wsel_st1, data_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) + .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, prev_miss_dep_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, data_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, prev_miss_dep_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, data_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -311,12 +339,32 @@ module VX_bank #( wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1; - wire dreq_push_st1 = send_fill_req_st1 || do_writeback_st1; + wire dreq_push_st1 = send_fill_req_st1 || do_writeback_st1; + + wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1; + + if (`WORD_SELECT_BITS != 0) begin + reg [CACHE_LINE_SIZE-1:0] line_byteen_r; + always @(*) begin + line_byteen_r = 0; + if (NUM_PORTS > 1) begin + for (integer p = 0; p < NUM_PORTS; p++) begin + if (pmask_st1[p]) begin + line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p]; + end + end + end else begin + line_byteen_r[wsel_st1[0] * WORD_SIZE +: WORD_SIZE] = byteen_st1[0]; + end + end + assign line_byteen_st1 = line_byteen_r; + end else begin + assign line_byteen_st1 = byteen_st1; + end VX_data_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), - .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .CACHE_SIZE (CACHE_SIZE), .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), @@ -340,8 +388,7 @@ module VX_bank #( // writing .writeen (valid_st1 && writeen_st1), .is_fill (is_fill_st1), - .wsel (wsel_st1), - .byteen (byteen_st1), + .byteen (line_byteen_st1), .wrdata (data_st1) ); @@ -361,6 +408,7 @@ module VX_bank #( .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .MSHR_SIZE (MSHR_SIZE), @@ -371,8 +419,8 @@ module VX_bank #( .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .deq_debug_pc (debug_pc_st0), - .deq_debug_wid (debug_wid_st0), + .deq_debug_pc (debug_pc_sel), + .deq_debug_wid (debug_wid_sel), .enq_debug_pc (debug_pc_st1), .enq_debug_wid (debug_wid_st1), `endif @@ -380,7 +428,7 @@ module VX_bank #( // enqueue .enqueue (mshr_push), .enqueue_addr (addr_st1), - .enqueue_data ({wsel_st1, byteen_st1, tag_st1, req_tid_st1}), + .enqueue_data ({wsel_st1, byteen_st1, tag_st1, req_tid_st1, pmask_st1}), .enqueue_is_mshr (is_mshr_st1), .enqueue_as_ready (mshr_init_ready_state), `UNUSED_PIN (enqueue_almfull), @@ -395,7 +443,7 @@ module VX_bank #( .schedule (mshr_pop), .schedule_valid (mshr_valid), .schedule_addr (mshr_addr), - .schedule_data ({mshr_wsel, mshr_byteen, mshr_tag, mshr_tid}), + .schedule_data ({mshr_wsel, mshr_byteen, mshr_tag, mshr_tid, mshr_pmask}), // dequeue .dequeue (mshr_dequeue) @@ -403,25 +451,29 @@ module VX_bank #( // Enqueue core response - wire [`WORD_WIDTH-1:0] crsq_data; + wire [NUM_PORTS-1:0] crsq_pmask; + wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid; wire [CORE_TAG_WIDTH-1:0] crsq_tag; - wire [`REQS_BITS-1:0] crsq_tid; wire crsq_empty; assign crsq_push = valid_st1 && crsq_push_st1; assign crsq_pop = core_rsp_valid && core_rsp_ready; if (`WORD_SELECT_BITS != 0) begin - assign crsq_data = readdata_st1[wsel_st1 * `WORD_WIDTH +: `WORD_WIDTH]; + for (genvar p = 0; p < NUM_PORTS; ++p) begin + assign crsq_data[p] = readdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH]; + end end else begin assign crsq_data = readdata_st1; end - assign crsq_tag = tag_st1; + assign crsq_pmask = pmask_st1; assign crsq_tid = req_tid_st1; - + assign crsq_tag = tag_st1; + VX_fifo_queue #( - .DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), + .DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS), .SIZE (CRSQ_SIZE), .ALM_FULL (CRSQ_SIZE-2), .BUFFERED (1) @@ -430,8 +482,8 @@ module VX_bank #( .reset (reset), .push (crsq_push), .pop (crsq_pop), - .data_in ({crsq_data, crsq_tag, crsq_tid}), - .data_out ({core_rsp_data, core_rsp_tag, core_rsp_tid}), + .data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}), + .data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}), .empty (crsq_empty), .alm_full (crsq_alm_full), `UNUSED_PIN (full), @@ -443,7 +495,7 @@ module VX_bank #( // Enqueue DRAM request - wire [CACHE_LINE_SIZE-1:0] dreq_byteen, dreq_byteen_unqual; + wire [CACHE_LINE_SIZE-1:0] dreq_byteen; wire [`LINE_ADDR_WIDTH-1:0] dreq_addr; wire [`CACHE_LINE_WIDTH-1:0] dreq_data; wire dreq_empty, writeback; @@ -452,17 +504,9 @@ module VX_bank #( assign dreq_pop = dram_req_valid && dram_req_ready; - assign writeback = WRITE_ENABLE && do_writeback_st1; + assign writeback = WRITE_ENABLE && do_writeback_st1; - if (`WORD_SELECT_BITS != 0) begin - for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin - assign dreq_byteen_unqual[i * WORD_SIZE +: WORD_SIZE] = (wsel_st1 == `WORD_SELECT_BITS'(i)) ? byteen_st1 : {WORD_SIZE{1'b0}}; - end - end else begin - assign dreq_byteen_unqual = byteen_st1; - end - - assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}}; + assign dreq_byteen = writeback ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}}; assign dreq_addr = addr_st1; assign dreq_data = data_st1; @@ -507,9 +551,12 @@ module VX_bank #( `endif `ifdef DBG_PRINT_CACHE_BANK - always @(posedge clk) begin + always @(posedge clk) begin + /*if (valid_st1 && pmask_st1 == {NUM_PORTS{1'b1}}) begin + $display("%t: cache%0d:%0d full bank multi-porting - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + end*/ if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_qual_st1) begin - $display("%t: miss with incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); + $display("%t: cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); assert(!is_mshr_st1); end if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin @@ -522,16 +569,16 @@ module VX_bank #( $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data); end if (mshr_pop) begin - $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), mshr_tag, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); + $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); end if (creq_pop) begin if (creq_rw) - $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel); + $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel); else - $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel); + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel); end if (crsq_push) begin - $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1); + $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1); end if (dreq_push) begin if (do_writeback_st1) diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index ee99ec8e..99a7313e 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -12,6 +12,8 @@ module VX_cache #( parameter CACHE_LINE_SIZE = 64, // Number of banks parameter NUM_BANKS = NUM_REQS, + // Number of ports per banks + parameter NUM_PORTS = 1, // Size of a word in bytes parameter WORD_SIZE = 4, @@ -87,20 +89,21 @@ module VX_cache #( `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) - wire [NUM_BANKS-1:0] per_bank_core_req_valid; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_rw; wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel; - wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; - wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; - wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; + wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; wire [NUM_BANKS-1:0] per_bank_dram_req_valid; @@ -178,6 +181,7 @@ module VX_cache #( VX_cache_core_req_bank_sel #( .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), @@ -209,19 +213,20 @@ module VX_cache #( /////////////////////////////////////////////////////////////////////////// for (genvar i = 0; i < NUM_BANKS; i++) begin - wire curr_bank_core_req_valid; + wire [NUM_PORTS-1:0] curr_bank_core_req_valid; + wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel; + wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen; + wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_rw; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr; - wire [`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel; - wire [WORD_SIZE-1:0] curr_bank_core_req_byteen; - wire [`WORD_WIDTH-1:0] curr_bank_core_req_data; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; - wire [`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; - wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; - wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data; + wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask; + wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data; + wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; wire curr_bank_core_rsp_ready; @@ -252,6 +257,7 @@ module VX_cache #( // Core WB assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i]; assign per_bank_core_rsp_valid[i] = curr_bank_core_rsp_valid; + assign per_bank_core_rsp_pmask[i] = curr_bank_core_rsp_pmask; assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid; assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag; assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data; @@ -286,6 +292,7 @@ module VX_cache #( .CACHE_SIZE (CACHE_SIZE), .CACHE_LINE_SIZE (CACHE_LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CREQ_SIZE (CREQ_SIZE), @@ -323,6 +330,7 @@ module VX_cache #( // Core response .core_rsp_valid (curr_bank_core_rsp_valid), + .core_rsp_pmask (curr_bank_core_rsp_pmask), .core_rsp_tid (curr_bank_core_rsp_tid), .core_rsp_data (curr_bank_core_rsp_data), .core_rsp_tag (curr_bank_core_rsp_tag), @@ -347,6 +355,7 @@ module VX_cache #( VX_cache_core_rsp_merge #( .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (NUM_PORTS), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), @@ -355,6 +364,7 @@ module VX_cache #( .clk (clk), .reset (reset), .per_bank_core_rsp_valid (per_bank_core_rsp_valid), + .per_bank_core_rsp_pmask (per_bank_core_rsp_pmask), .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_tag (per_bank_core_rsp_tag), .per_bank_core_rsp_tid (per_bank_core_rsp_tid), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index af47cb29..a29f2dd3 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -9,11 +9,8 @@ `define REQS_BITS `LOG2UP(NUM_REQS) -// tag byteen tid -`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + WORD_SIZE + `REQS_BITS) - -// metadata word_sel -`define MSHR_DATA_WIDTH (`REQ_INST_META_WIDTH + `UP(`WORD_SELECT_BITS)) +// tag valid byteen tid word_sel +`define MSHR_DATA_WIDTH (CORE_TAG_WIDTH + (1 + WORD_SIZE + `REQS_BITS + `UP(`WORD_SELECT_BITS)) * NUM_PORTS) `define WORD_WIDTH (8 * WORD_SIZE) diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index fdeb99d3..d71bd0e4 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -7,6 +7,8 @@ module VX_cache_core_req_bank_sel #( parameter WORD_SIZE = 4, // Number of banks parameter NUM_BANKS = 4, + // Number of ports per banks + parameter NUM_PORTS = 1, // Number of Word requests per cycle parameter NUM_REQS = 4, // core request tag size @@ -29,14 +31,14 @@ module VX_cache_core_req_bank_sel #( input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, - output wire [NUM_BANKS-1:0] per_bank_core_req_valid, + output wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid, output wire [NUM_BANKS-1:0] per_bank_core_req_rw, output wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr, - output wire [NUM_BANKS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel, - output wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen, - output wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data, + output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel, + output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen, + output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data, + output wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid, output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag, - output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid, input wire [NUM_BANKS-1:0] per_bank_core_req_ready ); `STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks")); @@ -44,7 +46,7 @@ module VX_cache_core_req_bank_sel #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) - wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr; + wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr; wire [NUM_REQS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel; wire [NUM_REQS-1:0][`UP(`BANK_SELECT_BITS)-1:0] core_req_bid; @@ -67,48 +69,138 @@ module VX_cache_core_req_bank_sel #( if (NUM_REQS > 1) begin - reg [NUM_BANKS-1:0] per_bank_core_req_valid_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; - reg [NUM_BANKS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r; - reg [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; - reg [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; - reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; - reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; + reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; - reg [NUM_REQS-1:0] core_req_ready_r; + reg [NUM_REQS-1:0] core_req_ready_r; - always @(*) begin - per_bank_core_req_valid_r = 0; - per_bank_core_req_rw_r = 'x; - per_bank_core_req_addr_r = 'x; - per_bank_core_req_wsel_r = 'x; - per_bank_core_req_byteen_r= 'x; - per_bank_core_req_data_r = 'x; - per_bank_core_req_tag_r = 'x; - per_bank_core_req_tid_r = 'x; + if (NUM_PORTS > 1) begin - for (integer i = NUM_REQS-1; i >= 0; --i) begin - if (core_req_valid[i]) begin - per_bank_core_req_valid_r[core_req_bid[i]] = 1; - per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i]; - per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i]; - per_bank_core_req_wsel_r[core_req_bid[i]] = core_req_wsel[i]; - per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i]; - per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i]; - per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; - per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i); + reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r; + wire [NUM_REQS-1:0] core_req_line_match; + + always @(*) begin + per_bank_line_addr_r = 'x; + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (core_req_valid[i]) begin + per_bank_line_addr_r[core_req_bid[i]] = core_req_line_addr[i]; + end end end - end + + for (genvar i = NUM_REQS-1; i >= 0; --i) begin + assign core_req_line_match[i] = (core_req_line_addr[i] == per_bank_line_addr_r[core_req_bid[i]]); + end - always @(*) begin - core_req_ready_r = 0; - for (integer j = 0; j < NUM_BANKS; ++j) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_ready_r[i] = per_bank_core_req_ready[j]; - break; + if (NUM_PORTS < NUM_REQS) begin + + reg [NUM_BANKS-1:0][NUM_PORTS-1:0][NUM_REQS-1:0] req_select_table_r; + + always @(*) begin + per_bank_core_req_valid_r = 0; + per_bank_core_req_rw_r = 'x; + per_bank_core_req_addr_r = 'x; + per_bank_core_req_wsel_r = 'x; + per_bank_core_req_byteen_r= 'x; + per_bank_core_req_data_r = 'x; + per_bank_core_req_tag_r = 'x; + per_bank_core_req_tid_r = 'x; + req_select_table_r = 'x; + + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (core_req_valid[i]) begin + per_bank_core_req_valid_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i]; + per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i]; + per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i]; + per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i]; + per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i); + per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i]; + per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i]; + per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; + + req_select_table_r[core_req_bid[i]][i % NUM_PORTS] = (1 << i); + end + end + end + + always @(*) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]] + && core_req_line_match[i] + && req_select_table_r[core_req_bid[i]][i % NUM_PORTS][i]; + end + end + + end else begin + + always @(*) begin + per_bank_core_req_valid_r = 0; + per_bank_core_req_rw_r = 'x; + per_bank_core_req_addr_r = 'x; + per_bank_core_req_wsel_r = 'x; + per_bank_core_req_byteen_r= 'x; + per_bank_core_req_data_r = 'x; + per_bank_core_req_tag_r = 'x; + per_bank_core_req_tid_r = 'x; + core_req_ready_r = 'x; + + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (core_req_valid[i]) begin + per_bank_core_req_valid_r[core_req_bid[i]][i % NUM_PORTS] = core_req_line_match[i]; + per_bank_core_req_wsel_r[core_req_bid[i]][i % NUM_PORTS] = core_req_wsel[i]; + per_bank_core_req_byteen_r[core_req_bid[i]][i % NUM_PORTS] = core_req_byteen[i]; + per_bank_core_req_data_r[core_req_bid[i]][i % NUM_PORTS] = core_req_data[i]; + per_bank_core_req_tid_r[core_req_bid[i]][i % NUM_PORTS] = `REQS_BITS'(i); + per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i]; + per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i]; + per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; + core_req_ready_r[i] = per_bank_core_req_ready[core_req_bid[i]] + && core_req_line_match[i]; + end + end + end + end + + end else begin + + always @(*) begin + per_bank_core_req_valid_r = 0; + per_bank_core_req_rw_r = 'x; + per_bank_core_req_addr_r = 'x; + per_bank_core_req_wsel_r = 'x; + per_bank_core_req_byteen_r= 'x; + per_bank_core_req_data_r = 'x; + per_bank_core_req_tag_r = 'x; + per_bank_core_req_tid_r = 'x; + + for (integer i = NUM_REQS-1; i >= 0; --i) begin + if (core_req_valid[i]) begin + per_bank_core_req_valid_r[core_req_bid[i]] = 1; + per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i]; + per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i]; + per_bank_core_req_wsel_r[core_req_bid[i]] = core_req_wsel[i]; + per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i]; + per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i]; + per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; + per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i); + end + end + end + + always @(*) begin + core_req_ready_r = 0; + for (integer j = 0; j < NUM_BANKS; ++j) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin + core_req_ready_r[i] = per_bank_core_req_ready[j]; + break; + end end end end @@ -128,15 +220,15 @@ module VX_cache_core_req_bank_sel #( `UNUSED_VAR (core_req_bid) - assign per_bank_core_req_valid = core_req_valid[0]; - assign per_bank_core_req_rw[0] = core_req_rw[0]; - assign per_bank_core_req_addr[0] = core_req_line_addr[0]; - assign per_bank_core_req_wsel[0] = core_req_wsel[0]; - assign per_bank_core_req_byteen[0] = core_req_byteen[0]; - assign per_bank_core_req_data[0] = core_req_data[0]; - assign per_bank_core_req_tag[0] = core_req_tag[0]; - assign per_bank_core_req_tid[0] = 0; - assign core_req_ready[0] = per_bank_core_req_ready; + assign per_bank_core_req_valid = core_req_valid; + assign per_bank_core_req_rw = core_req_rw; + assign per_bank_core_req_addr = core_req_line_addr; + assign per_bank_core_req_wsel = core_req_wsel; + assign per_bank_core_req_byteen = core_req_byteen; + assign per_bank_core_req_data = core_req_data; + assign per_bank_core_req_tag = core_req_tag; + assign per_bank_core_req_tid = 0; + assign core_req_ready = per_bank_core_req_ready; end `ifdef PERF_ENABLE diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 4796eb90..4bed779d 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -1,12 +1,14 @@ `include "VX_cache_config.vh" module VX_cache_core_rsp_merge #( - // Number of banks - parameter NUM_BANKS = 1, - // Size of a word in bytes - parameter WORD_SIZE = 1, // Number of Word requests per cycle parameter NUM_REQS = 1, + // Number of banks + parameter NUM_BANKS = 1, + // Number of ports per banks + parameter NUM_PORTS = 1, + // Size of a word in bytes + parameter WORD_SIZE = 1, // core request tag size parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag @@ -17,9 +19,10 @@ module VX_cache_core_rsp_merge #( // Per Bank WB input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, - input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, + input wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask, + input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, + input wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, - input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Response @@ -38,27 +41,55 @@ module VX_cache_core_rsp_merge #( reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; wire core_rsp_ready_unqual; - - always @(*) begin - core_rsp_valid_unqual = 0; - core_rsp_tag_unqual = 'x; - core_rsp_data_unqual = 'x; - core_rsp_bank_select = 0; - for (integer i = 0; i < NUM_BANKS; i++) begin + always @(*) begin + core_rsp_tag_unqual = 'x; + for (integer i = NUM_BANKS-1; i >= 0; --i) begin if (per_bank_core_rsp_valid[i]) begin core_rsp_tag_unqual = per_bank_core_rsp_tag[i]; end end - - for (integer i = 0; i < NUM_BANKS; i++) begin - if (per_bank_core_rsp_valid[i] - && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin - core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; - core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; - core_rsp_bank_select[i] = core_rsp_ready_unqual; + end + + if (NUM_PORTS > 1) begin + + always @(*) begin + core_rsp_valid_unqual = 0; + core_rsp_data_unqual = 'x; + core_rsp_bank_select = 0; + + for (integer i = 0; i < NUM_BANKS; i++) begin + for (integer p = 0; p < NUM_PORTS; p++) begin + if (per_bank_core_rsp_valid[i] + && per_bank_core_rsp_pmask[i][p] + && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin + core_rsp_valid_unqual[per_bank_core_rsp_tid[i][p]] = 1; + core_rsp_data_unqual[per_bank_core_rsp_tid[i][p]] = per_bank_core_rsp_data[i][p]; + core_rsp_bank_select[i] = core_rsp_ready_unqual; + end + end end end + + end else begin + + `UNUSED_VAR (per_bank_core_rsp_pmask) + + always @(*) begin + core_rsp_valid_unqual = 0; + core_rsp_data_unqual = 'x; + core_rsp_bank_select = 0; + + for (integer i = 0; i < NUM_BANKS; i++) begin + if (per_bank_core_rsp_valid[i] + && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin + core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; + core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; + core_rsp_bank_select[i] = core_rsp_ready_unqual; + end + end + end + end wire core_rsp_valid_out; @@ -84,6 +115,8 @@ module VX_cache_core_rsp_merge #( end else begin + `UNUSED_VAR (per_bank_core_rsp_pmask) + reg [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; reg [NUM_REQS-1:0][NUM_BANKS-1:0] bank_select_table; @@ -106,7 +139,6 @@ module VX_cache_core_rsp_merge #( end always @(*) begin - core_rsp_bank_select = 0; for (integer i = 0; i < NUM_BANKS; i++) begin core_rsp_bank_select[i] = core_rsp_ready_unqual[per_bank_core_rsp_tid[i]] && bank_select_table[per_bank_core_rsp_tid[i]][i]; @@ -139,6 +171,7 @@ module VX_cache_core_rsp_merge #( `UNUSED_VAR (clk) `UNUSED_VAR (reset) + `UNUSED_VAR (per_bank_core_rsp_pmask) if (NUM_REQS > 1) begin @@ -150,13 +183,13 @@ module VX_cache_core_rsp_merge #( always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_tag_unqual = per_bank_core_rsp_tag[0]; + core_rsp_tag_unqual = per_bank_core_rsp_tag; core_rsp_data_unqual = 'x; - core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid; - core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0]; + core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; + core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; end - assign per_bank_core_rsp_ready[0] = core_rsp_ready; + assign per_bank_core_rsp_ready = core_rsp_ready; end else begin @@ -164,12 +197,12 @@ module VX_cache_core_rsp_merge #( core_rsp_valid_unqual = 0; core_rsp_tag_unqual = 'x; core_rsp_data_unqual = 'x; - core_rsp_valid_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_valid; - core_rsp_tag_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_tag[0]; - core_rsp_data_unqual[per_bank_core_rsp_tid[0]] = per_bank_core_rsp_data[0]; + core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; + core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag; + core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; end - assign per_bank_core_rsp_ready[0] = core_rsp_ready[per_bank_core_rsp_tid[0]]; + assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid]; end @@ -181,9 +214,9 @@ module VX_cache_core_rsp_merge #( `UNUSED_VAR(per_bank_core_rsp_tid) assign core_rsp_valid = per_bank_core_rsp_valid; - assign core_rsp_tag = per_bank_core_rsp_tag[0]; - assign core_rsp_data = per_bank_core_rsp_data[0]; - assign per_bank_core_rsp_ready[0] = core_rsp_ready; + assign core_rsp_tag = per_bank_core_rsp_tag; + assign core_rsp_data = per_bank_core_rsp_data; + assign per_bank_core_rsp_ready = core_rsp_ready; end end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index e5f5f7a5..881e5018 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -14,9 +14,7 @@ module VX_data_access #( // Enable cache writeable parameter WRITE_ENABLE = 1, // Enable write-through - parameter WRITE_THROUGH = 1, - // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter WRITE_THROUGH = 1 ) ( input wire clk, input wire reset, @@ -39,8 +37,7 @@ module VX_data_access #( // writing input wire writeen, input wire is_fill, - input wire [`UP(`WORD_SELECT_BITS)-1:0] wsel, - input wire [WORD_SIZE-1:0] byteen, + input wire [CACHE_LINE_SIZE-1:0] byteen, input wire [`CACHE_LINE_WIDTH-1:0] wrdata ); `UNUSED_VAR (reset) @@ -63,19 +60,8 @@ module VX_data_access #( .din(wrdata), .dout(rddata) ); - - wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] byteen_qual; - - if (`WORD_SELECT_BITS != 0) begin - for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin - assign byteen_qual[i] = (wsel == `WORD_SELECT_BITS'(i)) ? byteen : {WORD_SIZE{1'b0}}; - end - end else begin - `UNUSED_VAR (wsel) - assign byteen_qual = byteen; - end - assign byte_enable = is_fill ? {CACHE_LINE_SIZE{1'b1}} : byteen_qual; + assign byte_enable = is_fill ? {CACHE_LINE_SIZE{1'b1}} : byteen; `UNUSED_VAR (readen) @@ -85,7 +71,7 @@ module VX_data_access #( if (is_fill) begin $display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wrdata); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wsel, wrdata[`WORD_WIDTH-1:0]); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wrdata); end end if (readen) begin diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 4f142d08..3cad5978 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -1,25 +1,27 @@ `include "VX_cache_config.vh" module VX_miss_resrv #( - parameter CACHE_ID = 0, - parameter BANK_ID = 0, + parameter CACHE_ID = 0, + parameter BANK_ID = 0, // Number of Word requests per cycle - parameter NUM_REQS = 1, + parameter NUM_REQS = 1, // Size of line inside a bank in bytes - parameter CACHE_LINE_SIZE = 1, + parameter CACHE_LINE_SIZE = 1, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, + // Number of ports per banks + parameter NUM_PORTS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 1, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 1, - parameter ALM_FULL = (MSHR_SIZE-1), + parameter MSHR_SIZE = 1, + parameter ALM_FULL = (MSHR_SIZE-1), // core request tag size - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_WIDTH = 1, // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0 + parameter CORE_TAG_ID_BITS = 0 ) ( input wire clk, input wire reset, diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index 3cb5843f..5b0871a6 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -74,6 +74,7 @@ module VX_shared_mem #( VX_cache_core_req_bank_sel #( .CACHE_LINE_SIZE (WORD_SIZE), .NUM_BANKS (NUM_BANKS), + .NUM_PORTS (1), .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 5a980742..22d177c6 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -11,8 +11,6 @@ module VX_tag_access #( parameter NUM_BANKS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, - // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0, // bank offset from beginning of index range parameter BANK_ADDR_OFFSET = 0 ) ( diff --git a/hw/syn/opae/vortex_afu.qsf b/hw/syn/opae/vortex_afu.qsf index d92ca308..438748ed 100644 --- a/hw/syn/opae/vortex_afu.qsf +++ b/hw/syn/opae/vortex_afu.qsf @@ -9,20 +9,20 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON -set_global_assignment -name OPTIMIZATION_TECHNIQUE BALANCED -set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" -set_global_assignment -name FITTER_EFFORT "STANDARD FIT" -set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM -set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 -set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 -set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" -set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON -set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON -set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON -set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON -set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON -set_global_assignment -name POWER_USE_TA_VALUE 65 -set_global_assignment -name SEED 1 \ No newline at end of file +#set_global_assignment -name OPTIMIZATION_TECHNIQUE BALANCED +#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" +#set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +#set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" +#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +#set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON +#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON +#set_global_assignment -name POWER_USE_TA_VALUE 65 +#set_global_assignment -name SEED 1 \ No newline at end of file