From fc5621cd1dbb7162ca6151f13ce487d038c9e6a7 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Mon, 2 Mar 2020 23:08:54 -0800 Subject: [PATCH] Everything except bank internals --- rtl/Makefile | 4 +- rtl/VX_cache/VX_cache.v | 179 ++++++++++++++++++++++ rtl/VX_cache/VX_cache_bank.v | 41 +++++ rtl/VX_cache/VX_cache_config.v | 126 +++++++++++++++ rtl/VX_cache/VX_cache_core_req_bank_sel.v | 25 +++ rtl/VX_cache/VX_cache_dfq_queue.v | 78 ++++++++++ rtl/VX_cache/VX_cache_dram_req_arb.v | 68 ++++++++ rtl/VX_cache/VX_cache_wb_sel_merge.v | 65 ++++++++ rtl/VX_generic_queue.v | 49 ++++++ 9 files changed, 632 insertions(+), 3 deletions(-) create mode 100644 rtl/VX_cache/VX_cache.v create mode 100644 rtl/VX_cache/VX_cache_bank.v create mode 100644 rtl/VX_cache/VX_cache_config.v create mode 100644 rtl/VX_cache/VX_cache_core_req_bank_sel.v create mode 100644 rtl/VX_cache/VX_cache_dfq_queue.v create mode 100644 rtl/VX_cache/VX_cache_dram_req_arb.v create mode 100644 rtl/VX_cache/VX_cache_wb_sel_merge.v create mode 100644 rtl/VX_generic_queue.v diff --git a/rtl/Makefile b/rtl/Makefile index 29a658bc..980a3211 100644 --- a/rtl/Makefile +++ b/rtl/Makefile @@ -1,9 +1,7 @@ all: RUNFILE -# /rf2_256x128_wm1/ -BaseMEM=../models/memory/cln28hpm -INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Icompat/ -Isimulate +INCLUDE=-I. -Ishared_memory -Icache -IVX_cache -IVX_cache/interfaces -Iinterfaces/ -Ipipe_regs/ -Icompat/ -Isimulate FILE=Vortex.v diff --git a/rtl/VX_cache/VX_cache.v b/rtl/VX_cache/VX_cache.v new file mode 100644 index 00000000..a539b350 --- /dev/null +++ b/rtl/VX_cache/VX_cache.v @@ -0,0 +1,179 @@ +`include "VX_cache_config.v" + + +module VX_cache ( + input wire clk, + input wire reset, + + // Req Info + input wire [`NUMBER_REQUESTS-1:0] core_req_valid, + input wire [`NUMBER_REQUESTS-1:0][31:0] core_req_addr, + input wire [`NUMBER_REQUESTS-1:0][31:0] core_req_writedata, + input wire[2:0] core_req_mem_read, + input wire[2:0] core_req_mem_write, + + // Req meta + input wire [4:0] core_req_rd, + input wire [1:0] core_req_wb, + input wire [`NW_M1:0] core_req_warp_num, + + // Core Writeback + input wire core_no_wb_slot, + output wire [`NUMBER_REQUESTS-1:0] core_wb_valid, + output wire [4:0] core_wb_req_rd, + output wire [1:0] core_wb_req_wb, + output wire [`NW_M1:0] core_wb_warp_num, + output wire [`NUMBER_REQUESTS-1:0][31:0] core_wb_readdata, + + + // Dram Fill Response + input wire dram_fill_rsp, + input wire [31:0] dram_fill_rsp_addr, + input wire [`BANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, + + // Dram request + output wire dram_req, + output wire dram_req_write, + output wire dram_req_read, + output wire [31:0] dram_req_addr, + output wire [31:0] dram_req_size, + output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data +); + + + wire [`NUMBER_BANKS-1:0][`NUMBER_REQUESTS-1:0] per_bank_valids; + wire [`NUMBER_BANKS-1:0] per_bank_wb_pop; + wire [`NUMBER_BANKS-1:0][`NUMBER_REQUESTS-1:0] per_bank_wb_tid; + wire [`NUMBER_BANKS-1:0][4:0] per_bank_wb_rd; + wire [`NUMBER_BANKS-1:0][1:0] per_bank_wb_wb; + wire [`NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num; + wire [`NUMBER_BANKS-1:0][31:0] per_bank_wb_data; + + + wire dfqq_full; + wire[`NUMBER_BANKS-1:0] per_bank_dram_fill_req; + wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_fill_req_addr; + + wire[`NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop; + wire[`NUMBER_BANKS-1:0] per_bank_dram_wb_req; + wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr; + wire[`NUMBER_BANKS-1]:0[`BANK_LINE_SIZE_RNG][31:0] per_bank_dram_wb_req_data; + + VX_cache_dram_req_arb VX_cache_dram_req_arb( + .clk (clk), + .reset (reset), + .dfqq_full (dfqq_full), + .per_bank_dram_fill_req (per_bank_dram_fill_req), + .per_bank_dram_fill_req_addr(per_bank_dram_fill_req_addr), + .per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop), + .per_bank_dram_wb_req (per_bank_dram_wb_req), + .per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr), + .per_bank_dram_wb_req_data (per_bank_dram_wb_req_data), + .dram_req (dram_req), + .dram_req_write (dram_req_write), + .dram_req_read (dram_req_read), + .dram_req_addr (dram_req_addr), + .dram_req_size (dram_req_size), + .dram_req_data (dram_req_data) + ); + + + VX_cache_core_req_bank_sel VX_cache_core_req_bank_sel( + .core_req_valid (core_req_valid), + .core_req_addr (core_req_addr), + .per_bank_valids(per_bank_valids) + ); + + + VX_cache_wb_sel_merge VX_cache_core_req_bank_sel( + .per_bank_wb_tid (per_bank_wb_tid), + .per_bank_wb_rd (per_bank_wb_rd), + .per_bank_wb_wb (per_bank_wb_wb), + .per_bank_wb_warp_num(per_bank_wb_warp_num), + .per_bank_wb_data (per_bank_wb_data), + .per_bank_wb_pop (per_bank_wb_pop), + + .core_wb_valid (core_wb_valid), + .core_wb_req_rd (core_wb_req_rd), + .core_wb_req_wb (core_wb_req_wb), + .core_wb_warp_num (core_wb_warp_num), + .core_wb_readdata (core_wb_readdata) + ); + + generate + integer curr_bank; + for (curr_bank = 0; curr_bank < `NUMBER_BANKS; curr_bank=curr_bank+1) begin + wire [`NUMBER_REQUESTS-1:0] curr_bank_valids; + wire [`NUMBER_REQUESTS-1:0][31:0] curr_bank_addr; + wire [`NUMBER_REQUESTS-1:0][31:0] curr_bank_writedata; + wire [4:0] curr_bank_rd; + wire [1:0] curr_bank_wb; + wire [`NW_M1:0] curr_bank_warp_num; + wire [2:0] curr_bank_mem_read; + wire [2:0] curr_bank_mem_write; + + wire curr_bank_wb_pop; + wire [`NUMBER_REQUESTS-1:0] curr_bank_wb_tid; + wire [4:0] curr_bank_wb_rd; + wire [1:0] curr_bank_wb_wb; + wire [`NW_M1:0] curr_bank_wb_warp_num; + wire [31:0] curr_bank_wb_data; + + wire curr_bank_dram_fill_rsp; + wire [31:0] curr_bank_dram_fill_rsp_addr; + wire [`BANK_LINE_SIZE_RNG][31:0] curr_bank_dram_fill_rsp_data; + + wire curr_bank_dfqq_full; + wire curr_bank_dram_fill_req; + wire[31:0] curr_bank_dram_fill_req_addr; + + wire curr_bank_dram_wb_queue_pop; + wire curr_bank_dram_wb_req; + wire[31:0] curr_bank_dram_wb_req_addr; + wire[`BANK_LINE_SIZE_RNG][31:0] curr_bank_dram_wb_req_data; + + // Core Req + assign curr_bank_valids = per_bank_valids[curr_bank]; + assign curr_bank_addr = core_req_addr; + assign curr_bank_writedata = core_req_writedata; + assign curr_bank_rd = core_req_rd; + assign curr_bank_wb = core_req_wb; + assign curr_bank_warp_num = core_req_warp_num; + assign curr_bank_mem_read = core_req_mem_read; + assign curr_bank_mem_write = core_req_mem_write; + + // Core WB + assign curr_bank_wb_pop = per_bank_wb_pop[curr_bank]; + assign per_bank_wb_tid [curr_bank] = curr_bank_wb_tid; + assign per_bank_wb_rd [curr_bank] = curr_bank_wb_rd; + assign per_bank_wb_wb [curr_bank] = curr_bank_wb_wb; + assign per_bank_wb_warp_num[curr_bank] = curr_bank_wb_warp_num; + assign per_bank_wb_data [curr_bank] = curr_bank_wb_data; + + // Dram fill response + assign curr_bank_dram_fill_rsp = (`NUMBER_BANKS == 1) || (dram_fill_addr[`BANK_SELECT_ADDR_RNG] == curr_bank); + assign curr_bank_dram_fill_rsp_addr = dram_fill_rsp_addr; + assign curr_bank_dram_fill_rsp_data = dram_fill_rsp_data; + + // Dram fill request + assign curr_bank_dfqq_full = dfqq_full; + assign per_bank_dram_fill_req[curr_bank] = curr_bank_dram_fill_req; + assign per_bank_dram_fill_req_addr[curr_bank] = curr_bank_dram_fill_req_addr; + + // Dram writeback request + assign curr_bank_dram_wb_queue_pop = per_bank_dram_wb_queue_pop[curr_bank]; + assign per_bank_dram_wb_req[curr_bank] = curr_bank_dram_wb_req; + assign per_bank_dram_wb_req_addr[curr_bank] = curr_bank_dram_wb_req_addr; + assign per_bank_dram_wb_req_data[curr_bank] = curr_bank_dram_wb_req_data; + + + VX_cache_bank bank ( + + ); + + end + endgenerate + + + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_cache_bank.v b/rtl/VX_cache/VX_cache_bank.v new file mode 100644 index 00000000..1a4b4151 --- /dev/null +++ b/rtl/VX_cache/VX_cache_bank.v @@ -0,0 +1,41 @@ +`include "VX_cache_config.v" + +module VX_cache_bank ( + input wire clk, + input wire reset, + + // Input Core Request + input wire [`NUMBER_REQUESTS-1:0] bank_valids, + input wire [`NUMBER_REQUESTS-1:0][31:0] bank_addr, + input wire [`NUMBER_REQUESTS-1:0][31:0] bank_writedata, + input wire [4:0] bank_rd, + input wire [`NW_M1:0] bank_warp_num, + input wire [2:0] bank_mem_read, + input wire [2:0] bank_mem_write, + + // Output Core WB + input wire bank_wb_pop, + output wire [`NUMBER_REQUESTS-1:0] bank_wb_valid, + output wire [4:0] bank_wb_rd, + output wire [1:0] bank_wb_wb, + output wire [`NW_M1:0] bank_wb_warp_num, + output wire [31:0] bank_wb_data, + + // Dram Fill Requests + output wire dram_fill_req, + output wire[31:0] dram_fill_req_addr, + input wire dram_fill_req_queue_full, + + // Dram Fill Response + input wire dram_fill_rsp, + input wire [31:0] dram_fill_addr, + input wire[`BANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, + + // Dram WB Requests + input wire dram_wb_queue_pop, + output wire dram_wb_req, + output wire[31:0] dram_wb_req_addr, + output wire[`BANK_LINE_SIZE_RNG][31:0] dram_wb_req_data +); + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_cache_config.v b/rtl/VX_cache/VX_cache_config.v new file mode 100644 index 00000000..f8bd3634 --- /dev/null +++ b/rtl/VX_cache/VX_cache_config.v @@ -0,0 +1,126 @@ + +`include "../VX_define.h" + + + + +// ========================================= Configurable Knobs ========================================= +// General Cache Knobs + // Size of cache in bytes + `define CACHE_SIZE_BYTES 1024 + // Size of line inside a bank in bytes + `define BANK_LINE_SIZE_BYTES 16 + // Number of banks + `define NUMBER_BANKS 8 + // Size of a word in bytes + `define WORD_SIZE_BYTES 4 + // Number of Word requests per cycle + `define NUMBER_REQUESTS `NT + // Number of cycles to complete stage 2 (read from memory) + `define STAGE_2_CYCLES 1 + +// Queues feeding into banks Knobs + + // Core Request Queue Size + `define REQQ_SIZE `NT*`NW + // Miss Reserv Queue Knob + `define MRVQ_SIZE `REQQ_SIZE + // Dram Fill Rsp Queue Size + `define DFPQ_SIZE 2 + +// Queues for writebacks Knobs + // Core Writeback Queue Size + `define CWBQ_SIZE `REQQ_SIZE + // Dram Writeback Queue Size + `define DWBQ_SIZE 2 + // Dram Fill Req Queue Size + `define DFQQ_SIZE `REQQ_SIZE + +// Dram knobs + `define SIMULATED_DRAM_LATENCY_CYCLES 50 + +// ========================================= Configurable Knobs ========================================= + + +`define vx_clog2_h(value, x) (value == (1 << x)) ? (x) + +`define vx_clog2(value) (value == 0 ) ? 0 : \ + (value == 1 ) ? 1 : \ + `vx_clog2_h(value, 2 ) : \ + `vx_clog2_h(value, 3 ) : \ + `vx_clog2_h(value, 4 ) : \ + `vx_clog2_h(value, 5 ) : \ + `vx_clog2_h(value, 6 ) : \ + `vx_clog2_h(value, 7 ) : \ + `vx_clog2_h(value, 8 ) : \ + `vx_clog2_h(value, 9 ) : \ + `vx_clog2_h(value, 10) : \ + `vx_clog2_h(value, 11) : \ + `vx_clog2_h(value, 12) : \ + `vx_clog2_h(value, 13) : \ + `vx_clog2_h(value, 14) : \ + `vx_clog2_h(value, 15) : \ + `vx_clog2_h(value, 16) : \ + `vx_clog2_h(value, 17) : \ + `vx_clog2_h(value, 18) : \ + `vx_clog2_h(value, 19) : \ + `vx_clog2_h(value, 20) : \ + `vx_clog2_h(value, 21) : \ + `vx_clog2_h(value, 22) : \ + `vx_clog2_h(value, 23) : \ + `vx_clog2_h(value, 24) : \ + `vx_clog2_h(value, 25) : \ + `vx_clog2_h(value, 26) : \ + `vx_clog2_h(value, 27) : \ + `vx_clog2_h(value, 28) : \ + `vx_clog2_h(value, 29) : \ + `vx_clog2_h(value, 30) : \ + `vx_clog2_h(value, 31) : \ + 0 + + +`define BANK_SIZE_BYTES `CACHE_SIZE_BYTES/`NUMBER_BANKS + + +`define BANK_LINE_COUNT `BANK_SIZE_BYTES/`BANK_LINE_SIZE_BYTES +`define BANK_LINE_SIZE_WORDS `BANK_LINE_SIZE_BYTES / `WORD_SIZE_BYTES +`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0 + +// Offset is fixed +`define OFFSET_SIZE_END 1 +`define OFFSET_ADDR_START 0 +`define OFFSET_ADDR_END 1 +`define OFFSET_ADDR_RNG `OFFSET_ADDR_START:`OFFSET_ADDR_END +`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0 + +`define WORD_SELECT_NUM_BITS `vx_clog2(`BANK_LINE_SIZE_WORDS) +`define WORD_SELECT_SIZE_END `WORD_SELECT_NUM_BITS +`define WORD_SELECT_ADDR_START 1+`OFFSET_ADDR_END +`define WORD_SELECT_ADDR_END `WORD_SELECT_SIZE_END+`OFFSET_ADDR_END +`define WORD_SELECT_ADDR_RNG `WORD_SELECT_ADDR_END:`WORD_SELECT_ADDR_START +`define WORD_SELECT_SIZE_RNG `WORD_SELECT_SIZE_END-1:WORD_SELECT_SIZE_END + +`define BANK_SELECT_NUM_BITS `vx_clog2(`NUMBER_BANKS) +`define BANK_SELECT_SIZE_END `BANK_SELECT_NUM_BITS +`define BANK_SELECT_ADDR_START 1+`WORD_SELECT_ADDR_END +`define BANK_SELECT_ADDR_END `BANK_SELECT_SIZE_END+`BANK_SELECT_ADDR_START +`define BANK_SELECT_ADDR_RNG `BANK_SELECT_ADDR_END:`BANK_SELECT_ADDR_START +`define BANK_SELECT_SIZE_RNG `BANK_SELECT_SIZE_END-1:0 + +`define LINE_SELECT_NUM_BITS `vx_clog2(`BANK_LINE_COUNT) +`define LINE_SELECT_SIZE_END `LINE_SELECT_NUM_BITS +`define LINE_SELECT_ADDR_START 1+`BANK_SELECT_ADDR_END +`define LINE_SELECT_ADDR_END `LINE_SELECT_SIZE_END+`LINE_SELECT_ADDR_START +`define LINE_SELECT_ADDR_RNG `LINE_SELECT_ADDR_END:`LINE_SELECT_ADDR_START +`define LINE_SELECT_SIZE_RNG `LINE_SELECT_SIZE_END-1:0 + +`define TAG_SELECT_NUM_BITS 32-`LINE_SELECT_ADDR_RNG+1 +`define TAG_SELECT_SIZE_END `TAG_SELECT_NUM_BITS +`define TAG_SELECT_ADDR_START 1+`LINE_SELECT_ADDR_RNG +`define TAG_SELECT_ADDR_END `TAG_SELECT_SIZE_END+`TAG_SELECT_ADDR_START +`define TAG_SELECT_ADDR_RNG `TAG_SELECT_ADDR_END:`TAG_SELECT_ADDR_START +`define TAG_SELECT_SIZE_RNG `TAG_SELECT_SIZE_END-1:0 + + + + diff --git a/rtl/VX_cache/VX_cache_core_req_bank_sel.v b/rtl/VX_cache/VX_cache_core_req_bank_sel.v new file mode 100644 index 00000000..85a324d0 --- /dev/null +++ b/rtl/VX_cache/VX_cache_core_req_bank_sel.v @@ -0,0 +1,25 @@ + + +module VX_cache_core_req_bank_sel ( + input wire [`NUMBER_REQUESTS-1:0] core_req_valid, + input wire [`NUMBER_REQUESTS-1:0][31:0] core_req_addr, + + output reg [`NUMBER_BANKS-1:0][`NUMBER_REQUESTS-1:0] per_bank_valids +); + + + generate + integer curr_req; + always @(*) begin + for (curr_req = 0; curr_req < `NUMBER_REQUESTS; curr_req = curr_req + 1) begin + if (`NUMBER_BANKS == 1) begin + // If there is only one bank, then only map requests to that bank + per_bank_valids[0][curr_req] <= core_req_valid[curr_req]; + end else begin + per_bank_valids[core_req_addr[`BANK_SELECT_ADDR_RNG]][curr_req] <= core_req_valid[curr_req]; + end + end + end + endgenerate + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_cache_dfq_queue.v b/rtl/VX_cache/VX_cache_dfq_queue.v new file mode 100644 index 00000000..e0af7a2c --- /dev/null +++ b/rtl/VX_cache/VX_cache_dfq_queue.v @@ -0,0 +1,78 @@ +`include "VX_cache_config.v" + +module VX_cache_dfq_queue + ( + input wire clk, + input wire reset, + input wire dfqq_push, + input wire[`NUMBER_BANKS-1:0] per_bank_dram_fill_req, + input wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_fill_req_addr, + + input wire dfqq_pop, + output wire dfqq_req, + output wire[31:0] dfqq_req_addr, + output wire dfqq_empty, + output wire dfqq_full +); + + wire[`NUMBER_BANKS-1:0] out_per_bank_dram_fill_req; + wire[`NUMBER_BANKS-1:0][31:0] out_per_bank_dram_fill_req_addr; + + + reg [`NUMBER_BANKS-1:0] use_per_bank_dram_fill_req; + reg [`NUMBER_BANKS-1:0][31:0] use_per_bank_dram_fill_req_addr; + + + wire[`NUMBER_BANKS-1:0] qual_bank_dram_fill_req; + wire[`NUMBER_BANKS-1:0][31:0] qual_bank_dram_fill_req_addr; + + wire[`NUMBER_BANKS-1:0] updated_bank_dram_fill_req; + + wire use_empty = !(|use_per_bank_dram_fill_req); + wire out_empty = !(|out_per_bank_dram_fill_req); + + wire push_qual = dfqq_push && !dfqq_full; + wire pop_qual = dfqq_pop && use_empty && !out_empty && !dfqq_empty; + VX_generic_queue #(.DATAW(`NUMBER_BANKS * (1+32)), .SIZE(`dFQQ_SIZE)) dfqq_queue( + .clk (clk), + .reset (reset), + .push (push_qual), + .in_data ({per_bank_dram_fill_req, per_bank_dram_fill_req_addr}), + .pop (pop_qual), + .out_data({out_per_bank_dram_fill_req, out_per_bank_dram_fill_req_addr}), + .empty (dfqq_empty), + .full (dfqq_full) + ); + + + + assign qual_bank_dram_fill_req = use_empty ? out_per_bank_dram_fill_req : use_per_bank_dram_fill_req; + assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr; + + wire[`vx_clog2(`NUMBER_BANKS)-1:0] qual_request_index; + wire qual_has_request; + VX_generic_priority_encoder #(.N(`NUMBER_BANKS)) VX_sel_bank( + .valids(qual_bank_dram_fill_req), + .index (qual_request_index), + .found (qual_has_request) + ); + + assign dfqq_req = qual_bank_dram_fill_req [qual_request_index]; + assign dfqq_req_addr = qual_bank_dram_fill_req_addr[qual_request_index]; + + assign updated_bank_dram_fill_req = qual_bank_dram_fill_req & (~(1 << qual_request_index)); + + always @(posedge clk or reset) begin + if (reset) begin + use_per_bank_dram_fill_req <= 0; + use_per_bank_dram_fill_req_addr <= 0; + end else begin + if (dfqq_pop && qual_has_request) begin + use_per_bank_dram_fill_req <= updated_bank_dram_fill_req; + use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr; + end + end + end + + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_cache_dram_req_arb.v b/rtl/VX_cache/VX_cache_dram_req_arb.v new file mode 100644 index 00000000..eebb5b3e --- /dev/null +++ b/rtl/VX_cache/VX_cache_dram_req_arb.v @@ -0,0 +1,68 @@ +`include "VX_cache_config.v" + + +module VX_cache_dram_req_arb ( + input wire clk, + input wire reset, + + + // Fill Request + output wire dfqq_full, + input wire[`NUMBER_BANKS-1:0] per_bank_dram_fill_req, + input wire[`NUMBER_BANKS-1:0][31:0] per_bank_dram_fill_req_addr, + + // DFQ Request + output wire[`NUMBER_BANKS-1] per_bank_dram_wb_queue_pop, + input wire[`NUMBER_BANKS-1] per_bank_dram_wb_req, + input wire[`NUMBER_BANKS-1][31:0] per_bank_dram_wb_req_addr, + input wire[`NUMBER_BANKS-1][`BANK_LINE_SIZE_RNG][31:0] per_bank_dram_wb_req_data, + + // real Dram request + output wire dram_req, + output wire dram_req_write, + output wire dram_req_read, + output wire [31:0] dram_req_addr, + output wire [31:0] dram_req_size, + output wire [`BANK_LINE_SIZE_RNG][31:0] dram_req_data + +); + + + wire dfqq_req; + wire dfqq_req_addr; + wire dfqq_empty; + wire dfqq_pop = !dwb_valid && dfqq_req; // If no dwb, and dfqq has valids, then pop + wire dfqq_push = (|per_bank_dram_wb_queue_pop); + VX_cache_dfq_queue VX_cache_dfq_queue( + .clk (clk), + .reset (reset), + .dfqq_push (dfqq_push), + .per_bank_dram_fill_req (per_bank_dram_fill_req), + .per_bank_dram_fill_req_addr(per_bank_dram_fill_req_addr), + .dfqq_pop (dfqq_pop), + .dfqq_req (dfqq_req), + .dfqq_req_addr (dfqq_req_addr), + .dfqq_empty (dfqq_empty), + .dfqq_full (dfqq_full) + ); + + + wire dwb_valid; + wire[`vx_log2(`NUMBER_BANKS)-1:0] dwb_bank; + VX_generic_priority_encoder #(.N(`NUMBER_BANKS)) VX_sel_dwb( + .valids(per_bank_dram_wb_req), + .index (dwb_bank), + .found (dwb_valid) + ); + + + assign per_bank_dram_wb_queue_pop = per_bank_dram_wb_req & (~(1 << dwb_bank)); + + + assign dram_req = dwb_valid || dfqq_req; + assign dram_req_write = dwb_valid; + assign dram_req_read = dfqq_req && !dwb_valid; + assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr; + assign dram_req_data = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0; + +endmodule \ No newline at end of file diff --git a/rtl/VX_cache/VX_cache_wb_sel_merge.v b/rtl/VX_cache/VX_cache_wb_sel_merge.v new file mode 100644 index 00000000..83f47e82 --- /dev/null +++ b/rtl/VX_cache/VX_cache_wb_sel_merge.v @@ -0,0 +1,65 @@ +`include "VX_cache_config.v" + + +module VX_cache_wb_sel_merge ( + + // Per Bank WB + input wire [`NUMBER_BANKS-1:0][`NUMBER_REQUESTS-1:0] per_bank_wb_tid, + input wire [`NUMBER_BANKS-1:0][4:0] per_bank_wb_rd, + input wire [`NUMBER_BANKS-1:0][1:0] per_bank_wb_wb, + input wire [`NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num, + input wire [`NUMBER_BANKS-1:0][31:0] per_bank_wb_data, + output wire [`NUMBER_BANKS-1:0] per_bank_wb_pop, + + + // Core Writeback + input wire core_no_wb_slot, + output reg [`NUMBER_REQUESTS-1:0] core_wb_valid, + output reg [`NUMBER_REQUESTS-1:0][31:0] core_wb_readdata + output wire [4:0] core_wb_req_rd, + output wire [1:0] core_wb_req_wb, + output wire [`NW_M1:0] core_wb_warp_num, + +); + + wire [`NUMBER_BANKS-1:0] per_bank_wb_pop_unqual; + assign per_bank_wb_pop = per_bank_wb_pop_unqual & {`NUMBER_BANKS{core_no_wb_slot}}; + + wire[`NUMBER_BANKS-1:0] bank_wants_wb; + generate + integer curr_bank; + for (curr_bank = 0; curr_bank < `NUMBER_BANKS; curr_bank=curr_bank+1) begin + assign bank_wants_wb[curr_bank] = (|per_bank_wb_valid[curr_bank]); + end + endgenerate + + + wire [(`vx_clog2(`NUMBER_BANKS))-1:0] main_bank_index; + wire found_bank; + + VX_generic_priority_encoder #(.N(`NUMBER_BANKS)) VX_sel_bank( + .valids(bank_wants_wb), + .index (main_bank_index), + .found (found_bank) + ); + + assign core_wb_req_rd = per_bank_wb_rd [main_bank_index]; + assign core_wb_req_wb = per_bank_wb_wb [main_bank_index]; + assign core_wb_warp_num = per_bank_wb_warp_num[main_bank_index]; + + generate + integer this_bank; + for (this_bank = 0; this_bank < `NUMBER_BANKS; this_bank = this_bank + 1) begin + if ((per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) + && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index])) begin + + assign core_wb_valid[per_bank_wb_tid[this_bank]] = 1; + assign core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; + assign per_bank_wb_pop_unqual[this_bank] = 1; + end else + assign per_bank_wb_pop_unqual[this_bank] = 0; + end + end + endgenerate + +endmodule \ No newline at end of file diff --git a/rtl/VX_generic_queue.v b/rtl/VX_generic_queue.v new file mode 100644 index 00000000..ca383fae --- /dev/null +++ b/rtl/VX_generic_queue.v @@ -0,0 +1,49 @@ + + +module VX_generic_queue + #( + parameter DATAW = 4, + parameter SIZE = 16 + ) + ( + input wire clk, + input wire reset, + input wire push, + input wire[DATAW-1:0] in_data, + + input wire pop, + output wire[DATAW-1:0] out_data, + output wire empty, + output wire full +); + + + reg[SIZE-1:0] data[DATAW-1:0]; + reg[$clog2(SIZE)-1:0] head; + reg[$clog2(SIZE)-1:0] tail; + + assign empty = head == tail; + assign full = head == (tail+1); + + integer i; + always @(posedge clk or reset) begin + if (reset) begin + head <= 0; + tail <= 0; + for (i = 0; i < SIZE; i=i+1) data[i] <= DATAW'0; + end else begin + if (push && !full) begin + data[tail] <= in_data; + tail = tail+1; + end + + if (pop) begin + head = head + 1; + end + + end + end + + assign out_data = data[head]; + +endmodule \ No newline at end of file