cache refactoring (fixed redundant fill requests, merged fill and writeback queues), optimized priority encoder, fixed crs cycles count
This commit is contained in:
@@ -9,6 +9,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
@@ -58,7 +59,7 @@ VL_FLAGS += verilator.vlt
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
|
||||
@@ -31,9 +31,9 @@ opae_sim::opae_sim() {
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedFstC();
|
||||
trace_ = new VerilatedVcdC();
|
||||
vortex_afu_->trace(trace_, 99);
|
||||
trace_->open("trace.fst");
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
this->reset();
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_fst_c.h>
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
@@ -88,6 +88,6 @@ private:
|
||||
RAM ram_;
|
||||
Vvortex_afu_shim *vortex_afu_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedFstC *trace_;
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
||||
@@ -9,6 +9,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
@@ -55,7 +56,7 @@ VL_FLAGS += verilator.vlt
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace-fst --trace-threads 1 $(DBG_FLAGS)
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
|
||||
@@ -177,7 +177,7 @@ int main(int argc, char *argv[]) {
|
||||
size_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << std::hex << buf_size << std::dec << " bytes" << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload kernel" << std::endl;
|
||||
|
||||
@@ -83,6 +83,8 @@ tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd
|
||||
tar -zcvf run.log.tar.gz build_ase_1c/work/run.log
|
||||
tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd
|
||||
tar -cvjf vx_scope.vcd.tar.bz2 vx_scope.vcd
|
||||
tar -cvjf trace.fst.tar.bz2 trace.fst run.log
|
||||
tar -cvjf trace.vcd.tar.bz2 trace.vcd run.log
|
||||
|
||||
# decompress VCD trace
|
||||
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
|
||||
@@ -114,3 +116,6 @@ make -C ../../rtlsim clean && reset && make -C ../../rtlsim
|
||||
|
||||
# split tar into multiple parts
|
||||
split -b 50M home.tar.bz2 "home.tar.bz2.part"
|
||||
|
||||
|
||||
cat run.log | grep -c "cache[0-9]*:[0-9]* dram_req"
|
||||
@@ -10,6 +10,7 @@
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_CACHE_DATA
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
|
||||
@@ -1105,7 +1105,7 @@ wire scope_changed = `SCOPE_TRIGGER;
|
||||
VX_scope #(
|
||||
.DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})),
|
||||
.BUSW (64),
|
||||
.SIZE (4096),
|
||||
.SIZE (`SCOPE_SIZE),
|
||||
.UPDW ($bits({`SCOPE_UPDATE_LIST}))
|
||||
) scope (
|
||||
.clk (clk),
|
||||
|
||||
@@ -211,34 +211,34 @@ module VX_cluster #(
|
||||
.reset (reset),
|
||||
|
||||
// input requests
|
||||
.in_io_req_valid (per_core_io_req_valid),
|
||||
.in_io_req_rw (per_core_io_req_rw),
|
||||
.in_io_req_byteen (per_core_io_req_byteen),
|
||||
.in_io_req_addr (per_core_io_req_addr),
|
||||
.in_io_req_data (per_core_io_req_data),
|
||||
.in_io_req_tag (per_core_io_req_tag),
|
||||
.in_io_req_ready (per_core_io_req_ready),
|
||||
.io_req_valid_in (per_core_io_req_valid),
|
||||
.io_req_rw_in (per_core_io_req_rw),
|
||||
.io_req_byteen_in (per_core_io_req_byteen),
|
||||
.io_req_addr_in (per_core_io_req_addr),
|
||||
.io_req_data_in (per_core_io_req_data),
|
||||
.io_req_tag_in (per_core_io_req_tag),
|
||||
.io_req_ready_in (per_core_io_req_ready),
|
||||
|
||||
// input responses
|
||||
.in_io_rsp_valid (per_core_io_rsp_valid),
|
||||
.in_io_rsp_data (per_core_io_rsp_data),
|
||||
.in_io_rsp_tag (per_core_io_rsp_tag),
|
||||
.in_io_rsp_ready (per_core_io_rsp_ready),
|
||||
.io_rsp_valid_in (per_core_io_rsp_valid),
|
||||
.io_rsp_data_in (per_core_io_rsp_data),
|
||||
.io_rsp_tag_in (per_core_io_rsp_tag),
|
||||
.io_rsp_ready_in (per_core_io_rsp_ready),
|
||||
|
||||
// output request
|
||||
.out_io_req_valid (io_req_valid),
|
||||
.out_io_req_rw (io_req_rw),
|
||||
.out_io_req_byteen (io_req_byteen),
|
||||
.out_io_req_addr (io_req_addr),
|
||||
.out_io_req_data (io_req_data),
|
||||
.out_io_req_tag (io_req_tag),
|
||||
.out_io_req_ready (io_req_ready),
|
||||
.io_req_valid_out (io_req_valid),
|
||||
.io_req_rw_out (io_req_rw),
|
||||
.io_req_byteen_out (io_req_byteen),
|
||||
.io_req_addr_out (io_req_addr),
|
||||
.io_req_data_out (io_req_data),
|
||||
.io_req_tag_out (io_req_tag),
|
||||
.io_req_ready_out (io_req_ready),
|
||||
|
||||
// output response
|
||||
.out_io_rsp_valid (io_rsp_valid),
|
||||
.out_io_rsp_tag (io_rsp_tag),
|
||||
.out_io_rsp_data (io_rsp_data),
|
||||
.out_io_rsp_ready (io_rsp_ready)
|
||||
.io_rsp_valid_out (io_rsp_valid),
|
||||
.io_rsp_tag_out (io_rsp_tag),
|
||||
.io_rsp_data_out (io_rsp_data),
|
||||
.io_rsp_ready_out (io_rsp_ready)
|
||||
);
|
||||
|
||||
VX_csr_io_arb #(
|
||||
@@ -250,28 +250,28 @@ module VX_cluster #(
|
||||
.request_id (csr_io_req_coreid),
|
||||
|
||||
// input requests
|
||||
.in_csr_io_req_valid (csr_io_req_valid),
|
||||
.in_csr_io_req_addr (csr_io_req_addr),
|
||||
.in_csr_io_req_rw (csr_io_req_rw),
|
||||
.in_csr_io_req_data (csr_io_req_data),
|
||||
.in_csr_io_req_ready (csr_io_req_ready),
|
||||
.csr_io_req_valid_in (csr_io_req_valid),
|
||||
.csr_io_req_addr_in (csr_io_req_addr),
|
||||
.csr_io_req_rw_in (csr_io_req_rw),
|
||||
.csr_io_req_data_in (csr_io_req_data),
|
||||
.csr_io_req_ready_in (csr_io_req_ready),
|
||||
|
||||
// input responses
|
||||
.in_csr_io_rsp_valid (per_core_csr_io_rsp_valid),
|
||||
.in_csr_io_rsp_data (per_core_csr_io_rsp_data),
|
||||
.in_csr_io_rsp_ready (per_core_csr_io_rsp_ready),
|
||||
.csr_io_rsp_valid_in (per_core_csr_io_rsp_valid),
|
||||
.csr_io_rsp_data_in (per_core_csr_io_rsp_data),
|
||||
.csr_io_rsp_ready_in (per_core_csr_io_rsp_ready),
|
||||
|
||||
// output request
|
||||
.out_csr_io_req_valid (per_core_csr_io_req_valid),
|
||||
.out_csr_io_req_addr (per_core_csr_io_req_addr),
|
||||
.out_csr_io_req_rw (per_core_csr_io_req_rw),
|
||||
.out_csr_io_req_data (per_core_csr_io_req_data),
|
||||
.out_csr_io_req_ready (per_core_csr_io_req_ready),
|
||||
.csr_io_req_valid_out (per_core_csr_io_req_valid),
|
||||
.csr_io_req_addr_out (per_core_csr_io_req_addr),
|
||||
.csr_io_req_rw_out (per_core_csr_io_req_rw),
|
||||
.csr_io_req_data_out (per_core_csr_io_req_data),
|
||||
.csr_io_req_ready_out (per_core_csr_io_req_ready),
|
||||
|
||||
// output response
|
||||
.out_csr_io_rsp_valid (csr_io_rsp_valid),
|
||||
.out_csr_io_rsp_data (csr_io_rsp_data),
|
||||
.out_csr_io_rsp_ready (csr_io_rsp_ready)
|
||||
.csr_io_rsp_valid_out (csr_io_rsp_valid),
|
||||
.csr_io_rsp_data_out (csr_io_rsp_data),
|
||||
.csr_io_rsp_ready_out (csr_io_rsp_ready)
|
||||
);
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
@@ -281,72 +281,72 @@ module VX_cluster #(
|
||||
|
||||
// L2 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_req_rw;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] l2_core_req_byteen;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_core_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
|
||||
wire l2_core_req_ready;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_req_rw;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
|
||||
wire core_dram_req_ready;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
|
||||
wire l2_core_rsp_ready;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
|
||||
wire core_dram_rsp_ready;
|
||||
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdout_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr;
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdout_invalidate;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag;
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdout_ready;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_ready;
|
||||
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdin_valid;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdin_valid;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
|
||||
|
||||
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
|
||||
assign l2_core_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
|
||||
assign l2_core_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
|
||||
|
||||
assign l2_core_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)];
|
||||
assign l2_core_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen [i] = per_core_D_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen [i+1] = per_core_I_dram_req_byteen[(i/2)];
|
||||
|
||||
assign l2_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign l2_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
|
||||
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
|
||||
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
|
||||
assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready;
|
||||
assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready;
|
||||
assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready;
|
||||
assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready;
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i] && l2_core_rsp_ready;
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1] && l2_core_rsp_ready;
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i] && core_dram_rsp_ready;
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1] && core_dram_rsp_ready;
|
||||
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = l2_core_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = l2_core_rsp_data[i+1];
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i];
|
||||
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
|
||||
assign per_core_D_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i];
|
||||
assign per_core_I_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i+1];
|
||||
|
||||
assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)];
|
||||
assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)];
|
||||
assign per_core_snp_req_invalidate [(i/2)] = l2_snp_fwdout_invalidate [(i/2)];
|
||||
assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)];
|
||||
assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
|
||||
assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)];
|
||||
assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)];
|
||||
assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)];
|
||||
assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)];
|
||||
assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
|
||||
|
||||
assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
|
||||
assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
|
||||
assign per_core_snp_rsp_ready [(i/2)] = l2_snp_fwdin_ready [(i/2)];
|
||||
assign core_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
|
||||
assign core_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
|
||||
assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)];
|
||||
end
|
||||
|
||||
assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
|
||||
assign core_dram_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L2CACHE_ID),
|
||||
@@ -357,11 +357,10 @@ module VX_cluster #(
|
||||
.NUM_REQUESTS (`L2NUM_REQUESTS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MRVQ_SIZE (`L2MRVQ_SIZE),
|
||||
.DFPQ_SIZE (`L2DFPQ_SIZE),
|
||||
.DRPQ_SIZE (`L2DRPQ_SIZE),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L2CWBQ_SIZE),
|
||||
.DWBQ_SIZE (`L2DWBQ_SIZE),
|
||||
.DFQQ_SIZE (`L2DFQQ_SIZE),
|
||||
.DREQ_SIZE (`L2DREQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
@@ -378,19 +377,19 @@ module VX_cluster #(
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (l2_core_req_valid),
|
||||
.core_req_rw (l2_core_req_rw),
|
||||
.core_req_byteen (l2_core_req_byteen),
|
||||
.core_req_addr (l2_core_req_addr),
|
||||
.core_req_data (l2_core_req_data),
|
||||
.core_req_tag (l2_core_req_tag),
|
||||
.core_req_ready (l2_core_req_ready),
|
||||
.core_req_valid (core_dram_req_valid),
|
||||
.core_req_rw (core_dram_req_rw),
|
||||
.core_req_byteen (core_dram_req_byteen),
|
||||
.core_req_addr (core_dram_req_addr),
|
||||
.core_req_data (core_dram_req_data),
|
||||
.core_req_tag (core_dram_req_tag),
|
||||
.core_req_ready (core_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (l2_core_rsp_valid),
|
||||
.core_rsp_data (l2_core_rsp_data),
|
||||
.core_rsp_tag (l2_core_rsp_tag),
|
||||
.core_rsp_ready (l2_core_rsp_ready),
|
||||
.core_rsp_valid (core_dram_rsp_valid),
|
||||
.core_rsp_data (core_dram_rsp_data),
|
||||
.core_rsp_tag (core_dram_rsp_tag),
|
||||
.core_rsp_ready (core_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dram_req_valid),
|
||||
@@ -420,86 +419,86 @@ module VX_cluster #(
|
||||
.snp_rsp_ready (snp_rsp_ready),
|
||||
|
||||
// Snoop forwarding out
|
||||
.snp_fwdout_valid (l2_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (l2_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(l2_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (l2_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (l2_snp_fwdout_ready),
|
||||
.snp_fwdout_valid (core_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (core_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (core_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (core_snp_fwdout_ready),
|
||||
|
||||
// Snoop forwarding in
|
||||
.snp_fwdin_valid (l2_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (l2_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (l2_snp_fwdin_ready)
|
||||
.snp_fwdin_valid (core_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (core_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (core_snp_fwdin_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] arb_dram_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0] arb_dram_req_rw;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] arb_dram_req_byteen;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_dram_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_req_data;
|
||||
wire[`L2NUM_REQUESTS-1:0] arb_dram_req_ready;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_req_rw;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] core_dram_req_byteen;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] core_dram_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_req_data;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_req_ready;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_rsp_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_ready;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] core_dram_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] core_dram_rsp_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0] core_dram_rsp_ready;
|
||||
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdout_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr;
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdout_invalidate;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag;
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdout_ready;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdout_ready;
|
||||
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdin_valid;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdin_valid;
|
||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdin_tag;
|
||||
wire[`NUM_CORES-1:0] core_snp_fwdin_ready;
|
||||
|
||||
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||
assign core_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||
|
||||
assign arb_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
|
||||
assign arb_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)];
|
||||
assign core_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)];
|
||||
|
||||
assign arb_dram_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)];
|
||||
assign arb_dram_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)];
|
||||
assign core_dram_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)];
|
||||
|
||||
assign arb_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign arb_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign core_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
|
||||
assign arb_dram_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign arb_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign core_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
|
||||
assign arb_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign arb_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign core_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
|
||||
assign per_core_D_dram_req_ready [(i/2)] = arb_dram_req_ready[i];
|
||||
assign per_core_I_dram_req_ready [(i/2)] = arb_dram_req_ready[i+1];
|
||||
assign per_core_D_dram_req_ready [(i/2)] = core_dram_req_ready[i];
|
||||
assign per_core_I_dram_req_ready [(i/2)] = core_dram_req_ready[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i+1];
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = core_dram_rsp_valid[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i+1];
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = core_dram_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = core_dram_rsp_data[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i];
|
||||
assign per_core_I_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i+1];
|
||||
assign per_core_D_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i];
|
||||
assign per_core_I_dram_rsp_tag [(i/2)] = core_dram_rsp_tag[i+1];
|
||||
|
||||
assign arb_dram_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
|
||||
assign arb_dram_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
|
||||
assign core_dram_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
|
||||
assign core_dram_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
|
||||
|
||||
assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)];
|
||||
assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)];
|
||||
assign per_core_snp_req_invalidate [(i/2)] = arb_snp_fwdout_invalidate [(i/2)];
|
||||
assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)];
|
||||
assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
|
||||
assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)];
|
||||
assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)];
|
||||
assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)];
|
||||
assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)];
|
||||
assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
|
||||
|
||||
assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
|
||||
assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
|
||||
assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)];
|
||||
assign core_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
|
||||
assign core_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
|
||||
assign per_core_snp_rsp_ready [(i/2)] = core_snp_fwdin_ready [(i/2)];
|
||||
end
|
||||
|
||||
if (`NUM_CORES > 1) begin
|
||||
@@ -525,26 +524,26 @@ module VX_cluster #(
|
||||
.snp_rsp_tag (snp_rsp_tag),
|
||||
.snp_rsp_ready (snp_rsp_ready),
|
||||
|
||||
.snp_fwdout_valid (arb_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (arb_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(arb_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (arb_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (arb_snp_fwdout_ready),
|
||||
.snp_fwdout_valid (core_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (core_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(core_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (core_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (core_snp_fwdout_ready),
|
||||
|
||||
.snp_fwdin_valid (arb_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (arb_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (arb_snp_fwdin_ready)
|
||||
.snp_fwdin_valid (core_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (core_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (core_snp_fwdin_ready)
|
||||
);
|
||||
end else begin
|
||||
assign arb_snp_fwdout_valid = snp_req_valid;
|
||||
assign arb_snp_fwdout_addr = snp_req_addr;
|
||||
assign arb_snp_fwdout_invalidate = snp_req_invalidate;
|
||||
assign arb_snp_fwdout_tag = snp_req_tag;
|
||||
assign snp_req_ready = arb_snp_fwdout_ready;
|
||||
assign core_snp_fwdout_valid = snp_req_valid;
|
||||
assign core_snp_fwdout_addr = snp_req_addr;
|
||||
assign core_snp_fwdout_invalidate = snp_req_invalidate;
|
||||
assign core_snp_fwdout_tag = snp_req_tag;
|
||||
assign snp_req_ready = core_snp_fwdout_ready;
|
||||
|
||||
assign snp_rsp_valid = arb_snp_fwdin_valid;
|
||||
assign snp_rsp_tag = arb_snp_fwdin_tag;
|
||||
assign arb_snp_fwdin_ready = snp_rsp_ready;
|
||||
assign snp_rsp_valid = core_snp_fwdin_valid;
|
||||
assign snp_rsp_tag = core_snp_fwdin_tag;
|
||||
assign core_snp_fwdin_ready = snp_rsp_ready;
|
||||
end
|
||||
|
||||
VX_mem_arb #(
|
||||
@@ -557,34 +556,34 @@ module VX_cluster #(
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.in_mem_req_valid (arb_dram_req_valid),
|
||||
.in_mem_req_rw (arb_dram_req_rw),
|
||||
.in_mem_req_byteen (arb_dram_req_byteen),
|
||||
.in_mem_req_addr (arb_dram_req_addr),
|
||||
.in_mem_req_data (arb_dram_req_data),
|
||||
.in_mem_req_tag (arb_dram_req_tag),
|
||||
.in_mem_req_ready (arb_dram_req_ready),
|
||||
.mem_req_valid_in (core_dram_req_valid),
|
||||
.mem_req_rw_in (core_dram_req_rw),
|
||||
.mem_req_byteen_in (core_dram_req_byteen),
|
||||
.mem_req_addr_in (core_dram_req_addr),
|
||||
.mem_req_data_in (core_dram_req_data),
|
||||
.mem_req_tag_in (core_dram_req_tag),
|
||||
.mem_req_ready_in (core_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.in_mem_rsp_valid (arb_dram_rsp_valid),
|
||||
.in_mem_rsp_data (arb_dram_rsp_data),
|
||||
.in_mem_rsp_tag (arb_dram_rsp_tag),
|
||||
.in_mem_rsp_ready (arb_dram_rsp_ready),
|
||||
.mem_rsp_valid_in (core_dram_rsp_valid),
|
||||
.mem_rsp_data_in (core_dram_rsp_data),
|
||||
.mem_rsp_tag_in (core_dram_rsp_tag),
|
||||
.mem_rsp_ready_in (core_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.out_mem_req_valid (dram_req_valid),
|
||||
.out_mem_req_rw (dram_req_rw),
|
||||
.out_mem_req_byteen (dram_req_byteen),
|
||||
.out_mem_req_addr (dram_req_addr),
|
||||
.out_mem_req_data (dram_req_data),
|
||||
.out_mem_req_tag (dram_req_tag),
|
||||
.out_mem_req_ready (dram_req_ready),
|
||||
.mem_req_valid_out (dram_req_valid),
|
||||
.mem_req_rw_out (dram_req_rw),
|
||||
.mem_req_byteen_out (dram_req_byteen),
|
||||
.mem_req_addr_out (dram_req_addr),
|
||||
.mem_req_data_out (dram_req_data),
|
||||
.mem_req_tag_out (dram_req_tag),
|
||||
.mem_req_ready_out (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.out_mem_rsp_valid (dram_rsp_valid),
|
||||
.out_mem_rsp_tag (dram_rsp_tag),
|
||||
.out_mem_rsp_data (dram_rsp_data),
|
||||
.out_mem_rsp_ready (dram_rsp_ready)
|
||||
.mem_rsp_valid_out (dram_rsp_valid),
|
||||
.mem_rsp_tag_out (dram_rsp_tag),
|
||||
.mem_rsp_data_out (dram_rsp_data),
|
||||
.mem_rsp_ready_out (dram_rsp_ready)
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
@@ -213,29 +213,24 @@
|
||||
`define DMRVQ_SIZE `MAX(`NUM_WARPS*`NUM_THREADS, 8)
|
||||
`endif
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef DDFPQ_SIZE
|
||||
`define DDFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef DSNRQ_SIZE
|
||||
`define DSNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
`ifndef DCWBQ_SIZE
|
||||
`define DCWBQ_SIZE `DCREQ_SIZE
|
||||
`endif
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef DDWBQ_SIZE
|
||||
`define DDWBQ_SIZE 4
|
||||
// DRAM Request Queue Size
|
||||
`ifndef DDREQ_SIZE
|
||||
`define DDREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
`ifndef DDFQQ_SIZE
|
||||
`define DDFQQ_SIZE `DCREQ_SIZE
|
||||
// DRAM Response Queue Size
|
||||
`ifndef DDRPQ_SIZE
|
||||
`define DDRPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef DSNRQ_SIZE
|
||||
`define DSNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Icache Configurable Knobs ==================================================
|
||||
@@ -270,24 +265,19 @@
|
||||
`define IMRVQ_SIZE `MAX(`ICREQ_SIZE, 8)
|
||||
`endif
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef IDFPQ_SIZE
|
||||
`define IDFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
`ifndef ICWBQ_SIZE
|
||||
`define ICWBQ_SIZE `ICREQ_SIZE
|
||||
`endif
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef IDWBQ_SIZE
|
||||
`define IDWBQ_SIZE 8
|
||||
// DRAM Request Queue Size
|
||||
`ifndef IDREQ_SIZE
|
||||
`define IDREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
`ifndef IDFQQ_SIZE
|
||||
`define IDFQQ_SIZE `ICREQ_SIZE
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRPQ_SIZE
|
||||
`define IDRPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs ======================================================
|
||||
@@ -354,29 +344,24 @@
|
||||
`define L2MRVQ_SIZE `MAX(`L2CREQ_SIZE, 8)
|
||||
`endif
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef L2DFPQ_SIZE
|
||||
`define L2DFPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef L2SNRQ_SIZE
|
||||
`define L2SNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
`ifndef L2CWBQ_SIZE
|
||||
`define L2CWBQ_SIZE `L2CREQ_SIZE
|
||||
`endif
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef L2DWBQ_SIZE
|
||||
`define L2DWBQ_SIZE 8
|
||||
// DRAM Request Queue Size
|
||||
`ifndef L2DREQ_SIZE
|
||||
`define L2DREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
`ifndef L2DFQQ_SIZE
|
||||
`define L2DFQQ_SIZE `L2CREQ_SIZE
|
||||
// DRAM Response Queue Size
|
||||
`ifndef L2DRPQ_SIZE
|
||||
`define L2DRPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef L2SNRQ_SIZE
|
||||
`define L2SNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs =================================================
|
||||
@@ -411,9 +396,19 @@
|
||||
`define L3MRVQ_SIZE `MAX(`L3CREQ_SIZE, 8)
|
||||
`endif
|
||||
|
||||
// Dram Fill Rsp Queue Size
|
||||
`ifndef L3DFPQ_SIZE
|
||||
`define L3DFPQ_SIZE 8
|
||||
// Core Writeback Queue Size
|
||||
`ifndef L3CWBQ_SIZE
|
||||
`define L3CWBQ_SIZE `L3CREQ_SIZE
|
||||
`endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
`ifndef L3DREQ_SIZE
|
||||
`define L3DREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef L3DRPQ_SIZE
|
||||
`define L3DRPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
@@ -421,19 +416,4 @@
|
||||
`define L3SNRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Core Writeback Queue Size
|
||||
`ifndef L3CWBQ_SIZE
|
||||
`define L3CWBQ_SIZE `L3CREQ_SIZE
|
||||
`endif
|
||||
|
||||
// Dram Writeback Queue Size
|
||||
`ifndef L3DWBQ_SIZE
|
||||
`define L3DWBQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dram Fill Req Queue Size
|
||||
`ifndef L3DFQQ_SIZE
|
||||
`define L3DFQQ_SIZE `L3CREQ_SIZE
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
@@ -260,7 +260,7 @@ module VX_core #(
|
||||
.core_dcache_req_if (arb_dcache_req_if),
|
||||
.core_dcache_rsp_if (arb_dcache_rsp_if),
|
||||
|
||||
// Dram <-> Dcache
|
||||
// DRAM <-> Dcache
|
||||
.dcache_dram_req_if (dcache_dram_req_if),
|
||||
.dcache_dram_rsp_if (dcache_dram_rsp_if),
|
||||
.dcache_snp_req_if (dcache_snp_req_if),
|
||||
@@ -270,7 +270,7 @@ module VX_core #(
|
||||
.core_icache_req_if (core_icache_req_if),
|
||||
.core_icache_rsp_if (core_icache_rsp_if),
|
||||
|
||||
// Dram <-> Icache
|
||||
// DRAM <-> Icache
|
||||
.icache_dram_req_if (icache_dram_req_if),
|
||||
.icache_dram_rsp_if (icache_dram_rsp_if)
|
||||
);
|
||||
|
||||
@@ -17,7 +17,9 @@ module VX_csr_data #(
|
||||
input wire write_enable,
|
||||
input wire[`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire[`NW_BITS-1:0] write_wid,
|
||||
input wire[`CSR_WIDTH-1:0] write_data
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
|
||||
input wire busy
|
||||
);
|
||||
reg [`CSR_WIDTH-1:0] csr_satp;
|
||||
reg [`CSR_WIDTH-1:0] csr_mstatus;
|
||||
@@ -86,7 +88,9 @@ module VX_csr_data #(
|
||||
csr_cycle <= 0;
|
||||
csr_instret <= 0;
|
||||
end else begin
|
||||
if (busy) begin
|
||||
csr_cycle <= csr_cycle + 1;
|
||||
end
|
||||
if (cmt_to_csr_if.valid) begin
|
||||
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits);
|
||||
end
|
||||
|
||||
@@ -10,28 +10,28 @@ module VX_csr_io_arb #(
|
||||
input wire [REQS_BITS-1:0] request_id,
|
||||
|
||||
// input requests
|
||||
input wire in_csr_io_req_valid,
|
||||
input wire [11:0] in_csr_io_req_addr,
|
||||
input wire in_csr_io_req_rw,
|
||||
input wire [31:0] in_csr_io_req_data,
|
||||
output wire in_csr_io_req_ready,
|
||||
input wire csr_io_req_valid_in,
|
||||
input wire [11:0] csr_io_req_addr_in,
|
||||
input wire csr_io_req_rw_in,
|
||||
input wire [31:0] csr_io_req_data_in,
|
||||
output wire csr_io_req_ready_in,
|
||||
|
||||
// input response
|
||||
input wire [NUM_REQUESTS-1:0] in_csr_io_rsp_valid,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] in_csr_io_rsp_data,
|
||||
output wire [NUM_REQUESTS-1:0] in_csr_io_rsp_ready,
|
||||
input wire [NUM_REQUESTS-1:0] csr_io_rsp_valid_in,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] csr_io_rsp_data_in,
|
||||
output wire [NUM_REQUESTS-1:0] csr_io_rsp_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [NUM_REQUESTS-1:0] out_csr_io_req_valid,
|
||||
output wire [NUM_REQUESTS-1:0][11:0] out_csr_io_req_addr,
|
||||
output wire [NUM_REQUESTS-1:0] out_csr_io_req_rw,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] out_csr_io_req_data,
|
||||
input wire [NUM_REQUESTS-1:0] out_csr_io_req_ready,
|
||||
output wire [NUM_REQUESTS-1:0] csr_io_req_valid_out,
|
||||
output wire [NUM_REQUESTS-1:0][11:0] csr_io_req_addr_out,
|
||||
output wire [NUM_REQUESTS-1:0] csr_io_req_rw_out,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] csr_io_req_data_out,
|
||||
input wire [NUM_REQUESTS-1:0] csr_io_req_ready_out,
|
||||
|
||||
// output response
|
||||
output wire out_csr_io_rsp_valid,
|
||||
output wire [31:0] out_csr_io_rsp_data,
|
||||
input wire out_csr_io_rsp_ready
|
||||
output wire csr_io_rsp_valid_out,
|
||||
output wire [31:0] csr_io_rsp_data_out,
|
||||
input wire csr_io_rsp_ready_out
|
||||
);
|
||||
if (NUM_REQUESTS == 1) begin
|
||||
|
||||
@@ -39,26 +39,26 @@ module VX_csr_io_arb #(
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (request_id)
|
||||
|
||||
assign out_csr_io_req_valid = in_csr_io_req_valid;
|
||||
assign out_csr_io_req_rw = in_csr_io_req_rw;
|
||||
assign out_csr_io_req_addr = in_csr_io_req_addr;
|
||||
assign out_csr_io_req_data = in_csr_io_req_data;
|
||||
assign in_csr_io_req_ready = out_csr_io_req_ready;
|
||||
assign csr_io_req_valid_out = csr_io_req_valid_in;
|
||||
assign csr_io_req_rw_out = csr_io_req_rw_in;
|
||||
assign csr_io_req_addr_out = csr_io_req_addr_in;
|
||||
assign csr_io_req_data_out = csr_io_req_data_in;
|
||||
assign csr_io_req_ready_in = csr_io_req_ready_out;
|
||||
|
||||
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid;
|
||||
assign out_csr_io_rsp_data = in_csr_io_rsp_data;
|
||||
assign in_csr_io_rsp_ready = out_csr_io_rsp_ready;
|
||||
assign csr_io_rsp_valid_out = csr_io_rsp_valid_in;
|
||||
assign csr_io_rsp_data_out = csr_io_rsp_data_in;
|
||||
assign csr_io_rsp_ready_in = csr_io_rsp_ready_out;
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i));
|
||||
assign out_csr_io_req_rw[i] = in_csr_io_req_rw;
|
||||
assign out_csr_io_req_addr[i] = in_csr_io_req_addr;
|
||||
assign out_csr_io_req_data[i] = in_csr_io_req_data;
|
||||
assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i));
|
||||
assign csr_io_req_rw_out[i] = csr_io_req_rw_in;
|
||||
assign csr_io_req_addr_out[i] = csr_io_req_addr_in;
|
||||
assign csr_io_req_data_out[i] = csr_io_req_data_in;
|
||||
end
|
||||
|
||||
assign in_csr_io_req_ready = out_csr_io_req_ready[request_id];
|
||||
assign csr_io_req_ready_in = csr_io_req_ready_out[request_id];
|
||||
|
||||
reg [REQS_BITS-1:0] bus_rsp_sel;
|
||||
|
||||
@@ -67,17 +67,17 @@ module VX_csr_io_arb #(
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (in_csr_io_rsp_valid),
|
||||
.requests (csr_io_rsp_valid_in),
|
||||
.grant_index (bus_rsp_sel),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel];
|
||||
assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel];
|
||||
assign csr_io_rsp_valid_out = csr_io_rsp_valid_in [bus_rsp_sel];
|
||||
assign csr_io_rsp_data_out = csr_io_rsp_data_in [bus_rsp_sel];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i));
|
||||
assign csr_io_rsp_ready_in[i] = csr_io_rsp_ready_out && (bus_rsp_sel == `REQS_BITS'(i));
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -13,7 +13,9 @@ module VX_csr_unit #(
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_exu_to_cmt_if csr_commit_if
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
|
||||
input wire busy
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_exu_to_cmt_if csr_pipe_rsp_if();
|
||||
@@ -53,7 +55,8 @@ module VX_csr_unit #(
|
||||
.write_enable (csr_we_s1),
|
||||
.write_addr (csr_addr_s1),
|
||||
.write_wid (csr_pipe_rsp_if.wid),
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0])
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||
|
||||
@@ -38,6 +38,7 @@ module VX_execute #(
|
||||
VX_fpu_to_cmt_if fpu_commit_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
|
||||
input wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
|
||||
@@ -73,7 +74,8 @@ module VX_execute #(
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
@@ -73,22 +73,24 @@ module VX_gpu_unit #(
|
||||
|
||||
// output
|
||||
|
||||
assign warp_ctl_if.valid = gpu_req_if.valid && gpu_commit_if.ready;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
assign warp_ctl_if.tmc = tmc;
|
||||
assign warp_ctl_if.wspawn = wspawn;
|
||||
assign warp_ctl_if.split = split;
|
||||
assign warp_ctl_if.barrier = barrier;
|
||||
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.wid = gpu_req_if.wid;
|
||||
assign gpu_commit_if.tmask = gpu_req_if.tmask;
|
||||
assign gpu_commit_if.PC = gpu_req_if.PC;
|
||||
assign gpu_commit_if.rd = gpu_req_if.rd;
|
||||
assign gpu_commit_if.wb = gpu_req_if.wb;
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
||||
assign gpu_req_if.ready = ~stall;
|
||||
|
||||
`SCOPE_ASSIGN (gpu_req_fire, gpu_req_if.valid && gpu_req_if.ready);
|
||||
`SCOPE_ASSIGN (gpu_req_wid, gpu_req_if.wid);
|
||||
|
||||
@@ -14,52 +14,52 @@ module VX_io_arb #(
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] in_io_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0] in_io_req_rw,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] in_io_req_byteen,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] in_io_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] in_io_req_data,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_io_req_tag,
|
||||
output wire [NUM_REQUESTS-1:0] in_io_req_ready,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in,
|
||||
input wire [NUM_REQUESTS-1:0] io_req_rw_in,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in,
|
||||
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in,
|
||||
output wire [NUM_REQUESTS-1:0] io_req_ready_in,
|
||||
|
||||
// input response
|
||||
output wire [NUM_REQUESTS-1:0] in_io_rsp_valid,
|
||||
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_io_rsp_data,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_io_rsp_tag,
|
||||
input wire [NUM_REQUESTS-1:0] in_io_rsp_ready,
|
||||
output wire [NUM_REQUESTS-1:0] io_rsp_valid_in,
|
||||
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in,
|
||||
input wire [NUM_REQUESTS-1:0] io_rsp_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [`NUM_THREADS-1:0] out_io_req_valid,
|
||||
output wire out_io_req_rw,
|
||||
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] out_io_req_byteen,
|
||||
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] out_io_req_addr,
|
||||
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] out_io_req_data,
|
||||
output wire [TAG_OUT_WIDTH-1:0] out_io_req_tag,
|
||||
input wire out_io_req_ready,
|
||||
output wire [`NUM_THREADS-1:0] io_req_valid_out,
|
||||
output wire io_req_rw_out,
|
||||
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out,
|
||||
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out,
|
||||
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out,
|
||||
input wire io_req_ready_out,
|
||||
|
||||
// output response
|
||||
input wire out_io_rsp_valid,
|
||||
input wire [WORD_WIDTH-1:0] out_io_rsp_data,
|
||||
input wire [TAG_OUT_WIDTH-1:0] out_io_rsp_tag,
|
||||
output wire out_io_rsp_ready
|
||||
input wire io_rsp_valid_out,
|
||||
input wire [WORD_WIDTH-1:0] io_rsp_data_out,
|
||||
input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out,
|
||||
output wire io_rsp_ready_out
|
||||
);
|
||||
if (NUM_REQUESTS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign out_io_req_valid = in_io_req_valid;
|
||||
assign out_io_req_rw = in_io_req_rw;
|
||||
assign out_io_req_byteen = in_io_req_byteen;
|
||||
assign out_io_req_addr = in_io_req_addr;
|
||||
assign out_io_req_data = in_io_req_data;
|
||||
assign out_io_req_tag = in_io_req_tag;
|
||||
assign in_io_req_ready = out_io_req_ready;
|
||||
assign io_req_valid_out = io_req_valid_in;
|
||||
assign io_req_rw_out = io_req_rw_in;
|
||||
assign io_req_byteen_out = io_req_byteen_in;
|
||||
assign io_req_addr_out = io_req_addr_in;
|
||||
assign io_req_data_out = io_req_data_in;
|
||||
assign io_req_tag_out = io_req_tag_in;
|
||||
assign io_req_ready_in = io_req_ready_out;
|
||||
|
||||
assign in_io_rsp_valid = out_io_rsp_valid;
|
||||
assign in_io_rsp_data = out_io_rsp_data;
|
||||
assign in_io_rsp_tag = out_io_rsp_tag;
|
||||
assign out_io_rsp_ready = in_io_rsp_ready;
|
||||
assign io_rsp_valid_in = io_rsp_valid_out;
|
||||
assign io_rsp_data_in = io_rsp_data_out;
|
||||
assign io_rsp_tag_in = io_rsp_tag_out;
|
||||
assign io_rsp_ready_out = io_rsp_ready_in;
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -68,7 +68,7 @@ module VX_io_arb #(
|
||||
wire [NUM_REQUESTS-1:0] valid_requests;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign valid_requests[i] = (| in_io_req_valid[i]);
|
||||
assign valid_requests[i] = (| io_req_valid_in[i]);
|
||||
end
|
||||
|
||||
VX_rr_arbiter #(
|
||||
@@ -82,25 +82,25 @@ module VX_io_arb #(
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign out_io_req_valid = in_io_req_valid [bus_req_sel];
|
||||
assign out_io_req_rw = in_io_req_rw [bus_req_sel];
|
||||
assign out_io_req_byteen = in_io_req_byteen [bus_req_sel];
|
||||
assign out_io_req_addr = in_io_req_addr [bus_req_sel];
|
||||
assign out_io_req_data = in_io_req_data [bus_req_sel];
|
||||
assign out_io_req_tag = {in_io_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||
assign io_req_valid_out = io_req_valid_in [bus_req_sel];
|
||||
assign io_req_rw_out = io_req_rw_in [bus_req_sel];
|
||||
assign io_req_byteen_out = io_req_byteen_in [bus_req_sel];
|
||||
assign io_req_addr_out = io_req_addr_in [bus_req_sel];
|
||||
assign io_req_data_out = io_req_data_in [bus_req_sel];
|
||||
assign io_req_tag_out = {io_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_io_req_ready[i] = out_io_req_ready && (bus_req_sel == REQS_BITS'(i));
|
||||
assign io_req_ready_in[i] = io_req_ready_out && (bus_req_sel == REQS_BITS'(i));
|
||||
end
|
||||
|
||||
wire [REQS_BITS-1:0] bus_rsp_sel = out_io_rsp_tag[REQS_BITS-1:0];
|
||||
wire [REQS_BITS-1:0] bus_rsp_sel = io_rsp_tag_out[REQS_BITS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_io_rsp_valid[i] = out_io_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
|
||||
assign in_io_rsp_data[i] = out_io_rsp_data;
|
||||
assign in_io_rsp_tag[i] = out_io_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign io_rsp_valid_in[i] = io_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i));
|
||||
assign io_rsp_data_in[i] = io_rsp_data_out;
|
||||
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
|
||||
end
|
||||
assign out_io_rsp_ready = in_io_rsp_ready[bus_rsp_sel];
|
||||
assign io_rsp_ready_out = io_rsp_ready_in[bus_rsp_sel];
|
||||
|
||||
end
|
||||
|
||||
|
||||
@@ -14,52 +14,52 @@ module VX_mem_arb #(
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQUESTS-1:0] in_mem_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0] in_mem_req_rw,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] in_mem_req_byteen,
|
||||
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] in_mem_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_req_data,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_req_tag,
|
||||
output wire [NUM_REQUESTS-1:0] in_mem_req_ready,
|
||||
input wire [NUM_REQUESTS-1:0] mem_req_valid_in,
|
||||
input wire [NUM_REQUESTS-1:0] mem_req_rw_in,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire [NUM_REQUESTS-1:0] mem_req_ready_in,
|
||||
|
||||
// input response
|
||||
output wire [NUM_REQUESTS-1:0] in_mem_rsp_valid,
|
||||
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_rsp_data,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_rsp_tag,
|
||||
input wire [NUM_REQUESTS-1:0] in_mem_rsp_ready,
|
||||
output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in,
|
||||
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in,
|
||||
input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in,
|
||||
|
||||
// output request
|
||||
output wire out_mem_req_valid,
|
||||
output wire out_mem_req_rw,
|
||||
output wire [WORD_SIZE-1:0] out_mem_req_byteen,
|
||||
output wire [ADDR_WIDTH-1:0] out_mem_req_addr,
|
||||
output wire [WORD_WIDTH-1:0] out_mem_req_data,
|
||||
output wire [TAG_OUT_WIDTH-1:0] out_mem_req_tag,
|
||||
input wire out_mem_req_ready,
|
||||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [WORD_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
// output response
|
||||
input wire out_mem_rsp_valid,
|
||||
input wire [WORD_WIDTH-1:0] out_mem_rsp_data,
|
||||
input wire [TAG_OUT_WIDTH-1:0] out_mem_rsp_tag,
|
||||
output wire out_mem_rsp_ready
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [WORD_WIDTH-1:0] mem_rsp_data_out,
|
||||
input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out,
|
||||
output wire mem_rsp_ready_out
|
||||
);
|
||||
if (NUM_REQUESTS == 1) begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign out_mem_req_valid = in_mem_req_valid;
|
||||
assign out_mem_req_rw = in_mem_req_rw;
|
||||
assign out_mem_req_byteen = in_mem_req_byteen;
|
||||
assign out_mem_req_addr = in_mem_req_addr;
|
||||
assign out_mem_req_data = in_mem_req_data;
|
||||
assign out_mem_req_tag = in_mem_req_tag;
|
||||
assign in_mem_req_ready = out_mem_req_ready;
|
||||
assign mem_req_valid_out = mem_req_valid_in;
|
||||
assign mem_req_rw_out = mem_req_rw_in;
|
||||
assign mem_req_byteen_out = mem_req_byteen_in;
|
||||
assign mem_req_addr_out = mem_req_addr_in;
|
||||
assign mem_req_data_out = mem_req_data_in;
|
||||
assign mem_req_tag_out = mem_req_tag_in;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
|
||||
assign in_mem_rsp_valid = out_mem_rsp_valid;
|
||||
assign in_mem_rsp_data = out_mem_rsp_data;
|
||||
assign in_mem_rsp_tag = out_mem_rsp_tag;
|
||||
assign out_mem_rsp_ready = in_mem_rsp_ready;
|
||||
assign mem_rsp_valid_in = mem_rsp_valid_out;
|
||||
assign mem_rsp_data_in = mem_rsp_data_out;
|
||||
assign mem_rsp_tag_in = mem_rsp_tag_out;
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in;
|
||||
|
||||
end else begin
|
||||
|
||||
@@ -70,31 +70,31 @@ module VX_mem_arb #(
|
||||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (in_mem_req_valid),
|
||||
.requests (mem_req_valid_in),
|
||||
.grant_index (bus_req_sel),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign out_mem_req_valid = in_mem_req_valid [bus_req_sel];
|
||||
assign out_mem_req_rw = in_mem_req_rw [bus_req_sel];
|
||||
assign out_mem_req_byteen = in_mem_req_byteen [bus_req_sel];
|
||||
assign out_mem_req_addr = in_mem_req_addr [bus_req_sel];
|
||||
assign out_mem_req_data = in_mem_req_data [bus_req_sel];
|
||||
assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||
assign mem_req_valid_out = mem_req_valid_in [bus_req_sel];
|
||||
assign mem_req_rw_out = mem_req_rw_in [bus_req_sel];
|
||||
assign mem_req_byteen_out = mem_req_byteen_in [bus_req_sel];
|
||||
assign mem_req_addr_out = mem_req_addr_in [bus_req_sel];
|
||||
assign mem_req_data_out = mem_req_data_in [bus_req_sel];
|
||||
assign mem_req_tag_out = {mem_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i));
|
||||
assign mem_req_ready_in[i] = mem_req_ready_out && (bus_req_sel == REQS_BITS'(i));
|
||||
end
|
||||
|
||||
wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0];
|
||||
wire [REQS_BITS-1:0] bus_rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
|
||||
assign in_mem_rsp_data[i] = out_mem_rsp_data;
|
||||
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i));
|
||||
assign mem_rsp_data_in[i] = mem_rsp_data_out;
|
||||
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
|
||||
end
|
||||
assign out_mem_rsp_ready = in_mem_rsp_ready[bus_rsp_sel];
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in[bus_rsp_sel];
|
||||
|
||||
end
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ module VX_mem_unit # (
|
||||
VX_cache_core_req_if core_dcache_req_if,
|
||||
VX_cache_core_rsp_if core_dcache_rsp_if,
|
||||
|
||||
// Dram <-> Dcache
|
||||
// DRAM <-> Dcache
|
||||
VX_cache_dram_req_if dcache_dram_req_if,
|
||||
VX_cache_dram_rsp_if dcache_dram_rsp_if,
|
||||
VX_cache_snp_req_if dcache_snp_req_if,
|
||||
@@ -22,7 +22,7 @@ module VX_mem_unit # (
|
||||
VX_cache_core_req_if core_icache_req_if,
|
||||
VX_cache_core_rsp_if core_icache_rsp_if,
|
||||
|
||||
// Dram <-> Icache
|
||||
// DRAM <-> Icache
|
||||
VX_cache_dram_req_if icache_dram_req_if,
|
||||
VX_cache_dram_rsp_if icache_dram_rsp_if
|
||||
);
|
||||
@@ -65,11 +65,10 @@ module VX_mem_unit # (
|
||||
.NUM_REQUESTS (`SNUM_REQUESTS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.MRVQ_SIZE (8),
|
||||
.DFPQ_SIZE (1),
|
||||
.DRPQ_SIZE (1),
|
||||
.SNRQ_SIZE (1),
|
||||
.CWBQ_SIZE (`SCWBQ_SIZE),
|
||||
.DWBQ_SIZE (1),
|
||||
.DFQQ_SIZE (1),
|
||||
.DREQ_SIZE (1),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (0),
|
||||
.WRITE_ENABLE (1),
|
||||
@@ -146,11 +145,10 @@ module VX_mem_unit # (
|
||||
.NUM_REQUESTS (`DNUM_REQUESTS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MRVQ_SIZE (`DMRVQ_SIZE),
|
||||
.DFPQ_SIZE (`DDFPQ_SIZE),
|
||||
.DRPQ_SIZE (`DDRPQ_SIZE),
|
||||
.SNRQ_SIZE (`DSNRQ_SIZE),
|
||||
.CWBQ_SIZE (`DCWBQ_SIZE),
|
||||
.DWBQ_SIZE (`DDWBQ_SIZE),
|
||||
.DFQQ_SIZE (`DDFQQ_SIZE),
|
||||
.DREQ_SIZE (`DDREQ_SIZE),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
@@ -228,11 +226,10 @@ module VX_mem_unit # (
|
||||
.NUM_REQUESTS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MRVQ_SIZE (`IMRVQ_SIZE),
|
||||
.DFPQ_SIZE (`IDFPQ_SIZE),
|
||||
.DRPQ_SIZE (`IDRPQ_SIZE),
|
||||
.SNRQ_SIZE (1),
|
||||
.CWBQ_SIZE (`ICWBQ_SIZE),
|
||||
.DWBQ_SIZE (`IDWBQ_SIZE),
|
||||
.DFQQ_SIZE (`IDFQQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.SNOOP_FORWARDING (0),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (0),
|
||||
|
||||
@@ -200,6 +200,7 @@ module VX_pipeline #(
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.busy (busy),
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||
|
||||
`define SCOPE_SIZE 4096
|
||||
|
||||
`else
|
||||
|
||||
`define SCOPE_IO_VX_icache_stage
|
||||
|
||||
195
hw/rtl/Vortex.v
195
hw/rtl/Vortex.v
@@ -135,7 +135,7 @@ module Vortex (
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
|
||||
wire l3_core_req_ready;
|
||||
wire cluster_dram_req_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
|
||||
@@ -196,7 +196,7 @@ module Vortex (
|
||||
.dram_req_addr (per_cluster_dram_req_addr [i]),
|
||||
.dram_req_data (per_cluster_dram_req_data [i]),
|
||||
.dram_req_tag (per_cluster_dram_req_tag [i]),
|
||||
.dram_req_ready (l3_core_req_ready),
|
||||
.dram_req_ready (cluster_dram_req_ready),
|
||||
|
||||
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
|
||||
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
|
||||
@@ -252,34 +252,34 @@ module Vortex (
|
||||
.reset (reset),
|
||||
|
||||
// input requests
|
||||
.in_io_req_valid (per_cluster_io_req_valid),
|
||||
.in_io_req_rw (per_cluster_io_req_rw),
|
||||
.in_io_req_byteen (per_cluster_io_req_byteen),
|
||||
.in_io_req_addr (per_cluster_io_req_addr),
|
||||
.in_io_req_data (per_cluster_io_req_data),
|
||||
.in_io_req_tag (per_cluster_io_req_tag),
|
||||
.in_io_req_ready (per_cluster_io_req_ready),
|
||||
.io_req_valid_in (per_cluster_io_req_valid),
|
||||
.io_req_rw_in (per_cluster_io_req_rw),
|
||||
.io_req_byteen_in (per_cluster_io_req_byteen),
|
||||
.io_req_addr_in (per_cluster_io_req_addr),
|
||||
.io_req_data_in (per_cluster_io_req_data),
|
||||
.io_req_tag_in (per_cluster_io_req_tag),
|
||||
.io_req_ready_in (per_cluster_io_req_ready),
|
||||
|
||||
// input responses
|
||||
.in_io_rsp_valid (per_cluster_io_rsp_valid),
|
||||
.in_io_rsp_data (per_cluster_io_rsp_data),
|
||||
.in_io_rsp_tag (per_cluster_io_rsp_tag),
|
||||
.in_io_rsp_ready (per_cluster_io_rsp_ready),
|
||||
.io_rsp_valid_in (per_cluster_io_rsp_valid),
|
||||
.io_rsp_data_in (per_cluster_io_rsp_data),
|
||||
.io_rsp_tag_in (per_cluster_io_rsp_tag),
|
||||
.io_rsp_ready_in (per_cluster_io_rsp_ready),
|
||||
|
||||
// output request
|
||||
.out_io_req_valid (io_req_valid),
|
||||
.out_io_req_rw (io_req_rw),
|
||||
.out_io_req_byteen (io_req_byteen),
|
||||
.out_io_req_addr (io_req_addr),
|
||||
.out_io_req_data (io_req_data),
|
||||
.out_io_req_tag (io_req_tag),
|
||||
.out_io_req_ready (io_req_ready),
|
||||
.io_req_valid_out (io_req_valid),
|
||||
.io_req_rw_out (io_req_rw),
|
||||
.io_req_byteen_out (io_req_byteen),
|
||||
.io_req_addr_out (io_req_addr),
|
||||
.io_req_data_out (io_req_data),
|
||||
.io_req_tag_out (io_req_tag),
|
||||
.io_req_ready_out (io_req_ready),
|
||||
|
||||
// output response
|
||||
.out_io_rsp_valid (io_rsp_valid),
|
||||
.out_io_rsp_tag (io_rsp_tag),
|
||||
.out_io_rsp_data (io_rsp_data),
|
||||
.out_io_rsp_ready (io_rsp_ready)
|
||||
.io_rsp_valid_out (io_rsp_valid),
|
||||
.io_rsp_tag_out (io_rsp_tag),
|
||||
.io_rsp_data_out (io_rsp_data),
|
||||
.io_rsp_ready_out (io_rsp_ready)
|
||||
);
|
||||
|
||||
VX_csr_io_arb #(
|
||||
@@ -291,28 +291,28 @@ module Vortex (
|
||||
.request_id (csr_io_request_id),
|
||||
|
||||
// input requests
|
||||
.in_csr_io_req_valid (csr_io_req_valid),
|
||||
.in_csr_io_req_addr (csr_io_req_addr),
|
||||
.in_csr_io_req_rw (csr_io_req_rw),
|
||||
.in_csr_io_req_data (csr_io_req_data),
|
||||
.in_csr_io_req_ready (csr_io_req_ready),
|
||||
.csr_io_req_valid_in (csr_io_req_valid),
|
||||
.csr_io_req_addr_in (csr_io_req_addr),
|
||||
.csr_io_req_rw_in (csr_io_req_rw),
|
||||
.csr_io_req_data_in (csr_io_req_data),
|
||||
.csr_io_req_ready_in (csr_io_req_ready),
|
||||
|
||||
// input responses
|
||||
.in_csr_io_rsp_valid (per_cluster_csr_io_rsp_valid),
|
||||
.in_csr_io_rsp_data (per_cluster_csr_io_rsp_data),
|
||||
.in_csr_io_rsp_ready (per_cluster_csr_io_rsp_ready),
|
||||
.csr_io_rsp_valid_in (per_cluster_csr_io_rsp_valid),
|
||||
.csr_io_rsp_data_in (per_cluster_csr_io_rsp_data),
|
||||
.csr_io_rsp_ready_in (per_cluster_csr_io_rsp_ready),
|
||||
|
||||
// output request
|
||||
.out_csr_io_req_valid (per_cluster_csr_io_req_valid),
|
||||
.out_csr_io_req_addr (per_cluster_csr_io_req_addr),
|
||||
.out_csr_io_req_rw (per_cluster_csr_io_req_rw),
|
||||
.out_csr_io_req_data (per_cluster_csr_io_req_data),
|
||||
.out_csr_io_req_ready (per_cluster_csr_io_req_ready),
|
||||
.csr_io_req_valid_out (per_cluster_csr_io_req_valid),
|
||||
.csr_io_req_addr_out (per_cluster_csr_io_req_addr),
|
||||
.csr_io_req_rw_out (per_cluster_csr_io_req_rw),
|
||||
.csr_io_req_data_out (per_cluster_csr_io_req_data),
|
||||
.csr_io_req_ready_out (per_cluster_csr_io_req_ready),
|
||||
|
||||
// output response
|
||||
.out_csr_io_rsp_valid (csr_io_rsp_valid),
|
||||
.out_csr_io_rsp_data (csr_io_rsp_data),
|
||||
.out_csr_io_rsp_ready (csr_io_rsp_ready)
|
||||
.csr_io_rsp_valid_out (csr_io_rsp_valid),
|
||||
.csr_io_rsp_data_out (csr_io_rsp_data),
|
||||
.csr_io_rsp_ready_out (csr_io_rsp_ready)
|
||||
);
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
@@ -320,56 +320,56 @@ module Vortex (
|
||||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_req_rw;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] cluster_dram_req_byteen;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_dram_req_addr;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_req_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_req_tag;
|
||||
|
||||
wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
|
||||
wire l3_core_rsp_ready;
|
||||
wire [`L3NUM_REQUESTS-1:0] cluster_dram_rsp_valid;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] cluster_dram_rsp_data;
|
||||
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] cluster_dram_rsp_tag;
|
||||
wire cluster_dram_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] cluster_snp_fwdout_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_invalidate;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdout_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdout_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] cluster_snp_fwdin_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] cluster_snp_fwdin_ready;
|
||||
|
||||
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||
// Core Request
|
||||
assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i];
|
||||
assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i];
|
||||
assign l3_core_req_byteen [i] = per_cluster_dram_req_byteen[i];
|
||||
assign l3_core_req_addr [i] = per_cluster_dram_req_addr [i];
|
||||
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
|
||||
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
|
||||
assign cluster_dram_req_valid [i] = per_cluster_dram_req_valid [i];
|
||||
assign cluster_dram_req_rw [i] = per_cluster_dram_req_rw [i];
|
||||
assign cluster_dram_req_byteen [i] = per_cluster_dram_req_byteen[i];
|
||||
assign cluster_dram_req_addr [i] = per_cluster_dram_req_addr [i];
|
||||
assign cluster_dram_req_tag [i] = per_cluster_dram_req_tag [i];
|
||||
assign cluster_dram_req_data [i] = per_cluster_dram_req_data [i];
|
||||
|
||||
// Core Response
|
||||
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i] && l3_core_rsp_ready;
|
||||
assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i];
|
||||
assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i];
|
||||
assign per_cluster_dram_rsp_valid [i] = cluster_dram_rsp_valid [i] && cluster_dram_rsp_ready;
|
||||
assign per_cluster_dram_rsp_data [i] = cluster_dram_rsp_data [i];
|
||||
assign per_cluster_dram_rsp_tag [i] = cluster_dram_rsp_tag [i];
|
||||
|
||||
// Snoop Forwarding out
|
||||
assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i];
|
||||
assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i];
|
||||
assign per_cluster_snp_req_invalidate [i] = l3_snp_fwdout_invalidate[i];
|
||||
assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i];
|
||||
assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
|
||||
assign per_cluster_snp_req_valid [i] = cluster_snp_fwdout_valid[i];
|
||||
assign per_cluster_snp_req_addr [i] = cluster_snp_fwdout_addr[i];
|
||||
assign per_cluster_snp_req_invalidate [i] = cluster_snp_fwdout_invalidate[i];
|
||||
assign per_cluster_snp_req_tag [i] = cluster_snp_fwdout_tag[i];
|
||||
assign cluster_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
|
||||
|
||||
// Snoop Forwarding in
|
||||
assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
|
||||
assign l3_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
|
||||
assign per_cluster_snp_rsp_ready [i] = l3_snp_fwdin_ready [i];
|
||||
assign cluster_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
|
||||
assign cluster_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
|
||||
assign per_cluster_snp_rsp_ready [i] = cluster_snp_fwdin_ready [i];
|
||||
end
|
||||
|
||||
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
|
||||
assign cluster_dram_rsp_ready = (& per_cluster_dram_rsp_ready);
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_ID (`L3CACHE_ID),
|
||||
@@ -380,11 +380,10 @@ module Vortex (
|
||||
.NUM_REQUESTS (`L3NUM_REQUESTS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.MRVQ_SIZE (`L3MRVQ_SIZE),
|
||||
.DFPQ_SIZE (`L3DFPQ_SIZE),
|
||||
.DRPQ_SIZE (`L3DRPQ_SIZE),
|
||||
.SNRQ_SIZE (`L3SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L3CWBQ_SIZE),
|
||||
.DWBQ_SIZE (`L3DWBQ_SIZE),
|
||||
.DFQQ_SIZE (`L3DFQQ_SIZE),
|
||||
.DREQ_SIZE (`L3DREQ_SIZE),
|
||||
.DRAM_ENABLE (1),
|
||||
.WRITE_ENABLE (1),
|
||||
.SNOOP_FORWARDING (1),
|
||||
@@ -401,19 +400,19 @@ module Vortex (
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (l3_core_req_valid),
|
||||
.core_req_rw (l3_core_req_rw),
|
||||
.core_req_byteen (l3_core_req_byteen),
|
||||
.core_req_addr (l3_core_req_addr),
|
||||
.core_req_data (l3_core_req_data),
|
||||
.core_req_tag (l3_core_req_tag),
|
||||
.core_req_ready (l3_core_req_ready),
|
||||
.core_req_valid (cluster_dram_req_valid),
|
||||
.core_req_rw (cluster_dram_req_rw),
|
||||
.core_req_byteen (cluster_dram_req_byteen),
|
||||
.core_req_addr (cluster_dram_req_addr),
|
||||
.core_req_data (cluster_dram_req_data),
|
||||
.core_req_tag (cluster_dram_req_tag),
|
||||
.core_req_ready (cluster_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (l3_core_rsp_valid),
|
||||
.core_rsp_data (l3_core_rsp_data),
|
||||
.core_rsp_tag (l3_core_rsp_tag),
|
||||
.core_rsp_ready (l3_core_rsp_ready),
|
||||
.core_rsp_valid (cluster_dram_rsp_valid),
|
||||
.core_rsp_data (cluster_dram_rsp_data),
|
||||
.core_rsp_tag (cluster_dram_rsp_tag),
|
||||
.core_rsp_ready (cluster_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_valid (dram_req_valid),
|
||||
@@ -443,16 +442,16 @@ module Vortex (
|
||||
.snp_rsp_ready (snp_rsp_ready),
|
||||
|
||||
// Snoop forwarding out
|
||||
.snp_fwdout_valid (l3_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (l3_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(l3_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (l3_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (l3_snp_fwdout_ready),
|
||||
.snp_fwdout_valid (cluster_snp_fwdout_valid),
|
||||
.snp_fwdout_addr (cluster_snp_fwdout_addr),
|
||||
.snp_fwdout_invalidate(cluster_snp_fwdout_invalidate),
|
||||
.snp_fwdout_tag (cluster_snp_fwdout_tag),
|
||||
.snp_fwdout_ready (cluster_snp_fwdout_ready),
|
||||
|
||||
// Snoop forwarding in
|
||||
.snp_fwdin_valid (l3_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (l3_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (l3_snp_fwdin_ready)
|
||||
.snp_fwdin_valid (cluster_snp_fwdin_valid),
|
||||
.snp_fwdin_tag (cluster_snp_fwdin_tag),
|
||||
.snp_fwdin_ready (cluster_snp_fwdin_ready)
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
533
hw/rtl/cache/VX_bank.v
vendored
533
hw/rtl/cache/VX_bank.v
vendored
@@ -19,18 +19,16 @@ module VX_bank #(
|
||||
parameter CREQ_SIZE = 0,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 0,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 0,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRPQ_SIZE = 0,
|
||||
// Snoop Req Queue Size
|
||||
parameter SNRQ_SIZE = 0,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 0,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 0,
|
||||
// DRAM Request Queue Size
|
||||
parameter DREQ_SIZE = 0,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 0,
|
||||
@@ -71,31 +69,28 @@ module VX_bank #(
|
||||
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire core_rsp_ready,
|
||||
|
||||
// Dram Fill Requests
|
||||
output wire dram_fill_req_valid,
|
||||
output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr,
|
||||
input wire dram_fill_req_ready,
|
||||
// DRAM request
|
||||
output wire dram_req_valid,
|
||||
output wire dram_req_rw,
|
||||
output wire [BANK_LINE_SIZE-1:0] dram_req_byteen,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] dram_req_addr,
|
||||
output wire [`BANK_LINE_WIDTH-1:0] dram_req_data,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// Dram Fill Response
|
||||
input wire dram_fill_rsp_valid,
|
||||
input wire [`BANK_LINE_WIDTH-1:0] dram_fill_rsp_data,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] dram_fill_rsp_addr,
|
||||
output wire dram_fill_rsp_ready,
|
||||
// DRAM response
|
||||
input wire dram_rsp_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr,
|
||||
input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// Dram WB Requests
|
||||
output wire dram_wb_req_valid,
|
||||
output wire [BANK_LINE_SIZE-1:0] dram_wb_req_byteen,
|
||||
output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr,
|
||||
output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data,
|
||||
input wire dram_wb_req_ready,
|
||||
|
||||
// Snp Request
|
||||
// Snoop Request
|
||||
input wire snp_req_valid,
|
||||
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
|
||||
input wire snp_req_invalidate,
|
||||
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
|
||||
output wire snp_req_ready,
|
||||
|
||||
// Snoop Response
|
||||
output wire snp_rsp_valid,
|
||||
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
|
||||
input wire snp_rsp_ready
|
||||
@@ -137,51 +132,51 @@ module VX_bank #(
|
||||
wire snrq_invalidate_st0;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
|
||||
|
||||
wire snp_req_fire = snp_req_valid && snp_req_ready;
|
||||
assign snp_req_ready = !snrq_full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH),
|
||||
.SIZE(SNRQ_SIZE)
|
||||
) snp_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (snp_req_valid && snp_req_ready),
|
||||
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.push (snp_req_fire),
|
||||
.pop (snrq_pop),
|
||||
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}),
|
||||
.empty (snrq_empty),
|
||||
.full (snrq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign snp_req_ready = !snrq_full;
|
||||
|
||||
wire dfpq_pop;
|
||||
wire dfpq_empty;
|
||||
wire dfpq_full;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0;
|
||||
|
||||
wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready;
|
||||
assign dram_rsp_ready = !dfpq_full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)),
|
||||
.SIZE(DFPQ_SIZE)
|
||||
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
|
||||
.SIZE(DRPQ_SIZE)
|
||||
) dfp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (dram_fill_rsp_valid && dram_fill_rsp_ready),
|
||||
.data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}),
|
||||
.push (dram_rsp_fire),
|
||||
.pop (dfpq_pop),
|
||||
.data_in ({dram_rsp_addr, dram_rsp_data}),
|
||||
.data_out({dfpq_addr_st0, dfpq_filldata_st0}),
|
||||
.empty (dfpq_empty),
|
||||
.full (dfpq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign dram_fill_rsp_ready = !dfpq_full;
|
||||
|
||||
wire reqq_pop;
|
||||
wire reqq_push;
|
||||
wire reqq_empty;
|
||||
wire reqq_full;
|
||||
wire reqq_req_st0;
|
||||
wire [`REQS_BITS-1:0] reqq_req_tid_st0;
|
||||
wire reqq_req_rw_st0;
|
||||
wire [WORD_SIZE-1:0] reqq_req_byteen_st0;
|
||||
@@ -191,6 +186,9 @@ module VX_bank #(
|
||||
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
|
||||
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
|
||||
|
||||
wire core_req_fire = (| core_req_valid) && core_req_ready;
|
||||
assign core_req_ready = !reqq_full;
|
||||
|
||||
VX_bank_core_req_arb #(
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
@@ -201,7 +199,7 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Enqueue
|
||||
.reqq_push (reqq_push),
|
||||
.reqq_push (core_req_fire),
|
||||
.bank_valids (core_req_valid),
|
||||
.bank_rw (core_req_rw),
|
||||
.bank_byteen (core_req_byteen),
|
||||
@@ -211,7 +209,6 @@ module VX_bank #(
|
||||
|
||||
// Dequeue
|
||||
.reqq_pop (reqq_pop),
|
||||
.reqq_req_st0 (reqq_req_st0),
|
||||
.reqq_req_tid_st0 (reqq_req_tid_st0),
|
||||
.reqq_req_rw_st0 (reqq_req_rw_st0),
|
||||
.reqq_req_byteen_st0 (reqq_req_byteen_st0),
|
||||
@@ -222,26 +219,21 @@ module VX_bank #(
|
||||
.reqq_full (reqq_full)
|
||||
);
|
||||
|
||||
assign core_req_ready = !reqq_full;
|
||||
assign reqq_push = (| core_req_valid) && core_req_ready;
|
||||
|
||||
wire mrvq_pop;
|
||||
wire mrvq_full;
|
||||
wire mrvq_stop;
|
||||
wire mrvq_valid_st0;
|
||||
wire[`REQS_BITS-1:0] mrvq_tid_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] mrvq_addr_st0;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] mrvq_wsel_st0;
|
||||
wire [`WORD_WIDTH-1:0] mrvq_writeword_st0;
|
||||
wire [`REQ_TAG_WIDTH-1:0] mrvq_tag_st0;
|
||||
wire mrvq_rw_st0;
|
||||
wire [WORD_SIZE-1:0] mrvq_byteen_st0;
|
||||
wire mrvq_is_snp_st0;
|
||||
wire mrvq_snp_invalidate_st0;
|
||||
|
||||
wire mrvq_pending_hazard_st1;
|
||||
wire st2_pending_hazard_st1;
|
||||
wire force_request_miss_st1;
|
||||
wire msrq_pop;
|
||||
wire msrq_full;
|
||||
wire msrq_almfull;
|
||||
wire msrq_valid_st0;
|
||||
wire[`REQS_BITS-1:0] msrq_tid_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] msrq_addr_st0;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] msrq_wsel_st0;
|
||||
wire [`WORD_WIDTH-1:0] msrq_writeword_st0;
|
||||
wire [`REQ_TAG_WIDTH-1:0] msrq_tag_st0;
|
||||
wire msrq_rw_st0;
|
||||
wire [WORD_SIZE-1:0] msrq_byteen_st0;
|
||||
wire msrq_is_snp_st0;
|
||||
wire msrq_snp_invalidate_st0;
|
||||
wire msrq_pending_hazard_st0;
|
||||
wire msrq_pending_hazard_st1;
|
||||
|
||||
wire[`REQS_BITS-1:0] miss_add_tid;
|
||||
wire[`REQ_TAG_WIDTH-1:0] miss_add_tag;
|
||||
@@ -249,48 +241,41 @@ module VX_bank #(
|
||||
wire[WORD_SIZE-1:0] miss_add_byteen;
|
||||
|
||||
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
|
||||
wire is_fill_st2;
|
||||
|
||||
wire recover_mrvq_state_st2;
|
||||
wire is_msrq_miss_st2;
|
||||
|
||||
wire mrvq_push_stall;
|
||||
wire msrq_push_stall;
|
||||
wire cwbq_push_stall;
|
||||
wire dwbq_push_stall;
|
||||
wire dram_fill_req_stall;
|
||||
wire stall_bank_pipe;
|
||||
|
||||
wire is_fill_st1;
|
||||
`DEBUG_BEGIN
|
||||
wire going_to_write_st1;
|
||||
`DEBUG_END
|
||||
|
||||
//determines if the if it is time to pop a req from the queues
|
||||
//unqual - the req does NOT qualify for execution in the bank.
|
||||
wire mrvq_pop_unqual = mrvq_valid_st0;
|
||||
wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty;
|
||||
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1;
|
||||
wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty && !reqq_req_st0; // if there's any reqq_req, don't schedule snrq.
|
||||
// determine which queue to pop next in piority order
|
||||
wire msrq_pop_unqual = msrq_valid_st0;
|
||||
wire dfpq_pop_unqual = !msrq_pop_unqual && !dfpq_empty;
|
||||
wire reqq_pop_unqual = !msrq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && !msrq_almfull;
|
||||
wire snrq_pop_unqual = !msrq_pop_unqual && !dfpq_pop_unqual && !reqq_pop_unqual && !snrq_empty && !msrq_almfull;
|
||||
|
||||
assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2;
|
||||
assign msrq_pop = msrq_pop_unqual && !stall_bank_pipe
|
||||
&& !is_msrq_miss_st2; // stop if previous request was a miss
|
||||
assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe;
|
||||
assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe;
|
||||
assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe;
|
||||
|
||||
//signals to progress to the next stage
|
||||
wire qual_is_fill_st0;
|
||||
wire qual_valid_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] qual_wsel_st0;
|
||||
wire qual_is_mrvq_st0;
|
||||
wire is_fill_st0;
|
||||
wire valid_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0;
|
||||
wire is_msrq_st0;
|
||||
|
||||
wire [`WORD_WIDTH-1:0] qual_writeword_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0;
|
||||
wire qual_going_to_write_st0;
|
||||
wire qual_is_snp_st0;
|
||||
wire qual_snp_invalidate_st0;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] writedata_st0;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st0;
|
||||
wire is_snp_st0;
|
||||
wire snp_invalidate_st0;
|
||||
wire msrq_pending_hazard_unqual_st0;
|
||||
|
||||
//signals to be *used* in the next stage
|
||||
wire valid_st1;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1;
|
||||
@@ -299,64 +284,56 @@ module VX_bank #(
|
||||
wire [`BANK_LINE_WIDTH-1:0] writedata_st1;
|
||||
wire is_snp_st1;
|
||||
wire snp_invalidate_st1;
|
||||
wire is_mrvq_st1;
|
||||
wire is_msrq_st1;
|
||||
wire msrq_pending_hazard_st1;
|
||||
|
||||
//Determine which req will progress to the next stage
|
||||
assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request
|
||||
assign is_msrq_st0 = msrq_pop_unqual;
|
||||
|
||||
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
|
||||
assign is_fill_st0 = dfpq_pop_unqual;
|
||||
|
||||
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
|
||||
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
assign valid_st0 = dfpq_pop || msrq_pop || reqq_pop || snrq_pop;
|
||||
|
||||
assign addr_st0 = msrq_pop_unqual ? msrq_addr_st0 :
|
||||
dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
|
||||
//Word select does ? Does this just pick a specific word from the line instead of the whole line?
|
||||
if (`WORD_SELECT_WIDTH != 0) begin
|
||||
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
|
||||
mrvq_pop_unqual ? mrvq_wsel_st0 :
|
||||
assign wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
|
||||
msrq_pop_unqual ? msrq_wsel_st0 :
|
||||
0;
|
||||
end else begin
|
||||
`UNUSED_VAR(mrvq_wsel_st0)
|
||||
assign qual_wsel_st0 = 0;
|
||||
`UNUSED_VAR(msrq_wsel_st0)
|
||||
assign wsel_st0 = 0;
|
||||
end
|
||||
|
||||
//if you are filling from dram then that is the write data? What about core? What is 57?
|
||||
assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57;
|
||||
assign writedata_st0 = dfpq_filldata_st0;
|
||||
|
||||
//note that this is stored even if a DRAM fill is processed
|
||||
assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} :
|
||||
assign inst_meta_st0 = msrq_pop_unqual ? {`REQ_TAG_WIDTH'(msrq_tag_st0) , msrq_rw_st0, msrq_byteen_st0, msrq_tid_st0} :
|
||||
reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} :
|
||||
snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} :
|
||||
0;
|
||||
|
||||
|
||||
assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 :
|
||||
(mrvq_pop_unqual && mrvq_rw_st0) ? 1 :
|
||||
(reqq_pop_unqual && reqq_req_rw_st0) ? 1 :
|
||||
0;
|
||||
|
||||
//snp signals check to see if the miss reserve as a snp in it first.
|
||||
assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 :
|
||||
assign is_snp_st0 = msrq_pop_unqual ? msrq_is_snp_st0 :
|
||||
snrq_pop_unqual ? 1 :
|
||||
0;
|
||||
//if we are popping from the miss reserve then assign to the mrvq invalidate. If not and popping from the snoop queue use the snoop invalidate. Else this is 0
|
||||
assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 :
|
||||
|
||||
assign snp_invalidate_st0 = msrq_pop_unqual ? msrq_snp_invalidate_st0 :
|
||||
snrq_pop_unqual ? snrq_invalidate_st0 :
|
||||
0;
|
||||
//choose which word of the lien is being written to
|
||||
assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 :
|
||||
|
||||
assign writeword_st0 = msrq_pop_unqual ? msrq_writeword_st0 :
|
||||
reqq_pop_unqual ? reqq_req_writeword_st0 :
|
||||
0;
|
||||
|
||||
|
||||
assign qual_is_mrvq_st0 = mrvq_pop_unqual;
|
||||
// we have a miss in msrq or going into it for the current address
|
||||
wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0
|
||||
|| (miss_add_unqual && (addr_st2 == addr_st0));
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -367,10 +344,16 @@ module VX_bank #(
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (1'b0),
|
||||
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
|
||||
.out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
.in ({is_msrq_st0, is_snp_st0, snp_invalidate_st0, msrq_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}),
|
||||
.out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire[`WORD_WIDTH-1:0] readword_st1;
|
||||
wire[`BANK_LINE_WIDTH-1:0] readdata_st1;
|
||||
wire[`TAG_SELECT_BITS-1:0] readtag_st1;
|
||||
@@ -383,24 +366,20 @@ module VX_bank #(
|
||||
`DEBUG_END
|
||||
wire mem_rw_st1;
|
||||
wire [WORD_SIZE-1:0] mem_byteen_st1;
|
||||
wire fill_saw_dirty_st1;
|
||||
wire snp_to_mrvq_st1;
|
||||
wire mrvq_init_ready_state_st1;
|
||||
wire miss_add_because_miss;
|
||||
wire mrvq_recover_ready_state_st1;
|
||||
wire miss_add_unqual;
|
||||
|
||||
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
|
||||
|
||||
assign st2_pending_hazard_st1 = (miss_add_because_miss)
|
||||
&& ((addr_st2 == addr_st1) && !is_fill_st2);
|
||||
// we have a miss in st2 for the current address
|
||||
wire st2_pending_hazard_st1 = miss_add_unqual && (addr_st2 == addr_st1);
|
||||
|
||||
assign force_request_miss_st1 = (valid_st1 && !is_mrvq_st1 && (mrvq_pending_hazard_st1 || st2_pending_hazard_st1))
|
||||
|| (valid_st1 && is_mrvq_st1 && recover_mrvq_state_st2);
|
||||
// force miss to ensure commit order when a new request has pending previous requests to same block
|
||||
// also force a miss for msrq requests when previous request in st2 got a miss
|
||||
wire force_miss_st1 = (valid_st1 && !is_msrq_st1 && ~is_fill_st1 && (msrq_pending_hazard_st1 || st2_pending_hazard_st1))
|
||||
|| (valid_st1 && is_msrq_st1 && is_msrq_miss_st2);
|
||||
|
||||
assign mrvq_recover_ready_state_st1 = valid_st1
|
||||
&& is_mrvq_st1
|
||||
&& recover_mrvq_state_st2
|
||||
&& (addr_st2 == addr_st1);
|
||||
// access the tag data store
|
||||
wire tag_data_fire = valid_st1 && !stall_bank_pipe;
|
||||
|
||||
VX_tag_data_access #(
|
||||
.BANK_ID (BANK_ID),
|
||||
@@ -423,27 +402,19 @@ module VX_bank #(
|
||||
.debug_tagid_st1(debug_tagid_st1),
|
||||
`endif
|
||||
|
||||
.stall (stall_bank_pipe),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
|
||||
.force_request_miss_st1(force_request_miss_st1),
|
||||
|
||||
// Initial Read
|
||||
.readaddr_st1(addr_st1[`LINE_SELECT_BITS-1:0]),
|
||||
|
||||
// Actual Read/Write
|
||||
.valid_req_st1 (valid_st1),
|
||||
.valid_req_st1 (tag_data_fire),
|
||||
.writefill_st1 (is_fill_st1),
|
||||
.writeaddr_st1 (addr_st1),
|
||||
.addr_st1 (addr_st1),
|
||||
.wordsel_st1 (wsel_st1),
|
||||
.writeword_st1 (writeword_st1),
|
||||
.writedata_st1 (writedata_st1),
|
||||
|
||||
.mem_rw_st1 (mem_rw_st1),
|
||||
.mem_byteen_st1 (mem_byteen_st1),
|
||||
|
||||
.is_snp_st1 (is_snp_st1),
|
||||
.snp_invalidate_st1(snp_invalidate_st1),
|
||||
.force_miss_st1 (force_miss_st1),
|
||||
|
||||
// Read Data
|
||||
.readword_st1 (readword_st1),
|
||||
@@ -451,23 +422,9 @@ module VX_bank #(
|
||||
.readtag_st1 (readtag_st1),
|
||||
.miss_st1 (miss_st1),
|
||||
.dirty_st1 (dirty_st1),
|
||||
.dirtyb_st1 (dirtyb_st1),
|
||||
.fill_saw_dirty_st1(fill_saw_dirty_st1),
|
||||
.snp_to_mrvq_st1(snp_to_mrvq_st1),
|
||||
.mrvq_init_ready_state_st1(mrvq_init_ready_state_st1)
|
||||
.dirtyb_st1 (dirtyb_st1)
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
end else begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire qual_valid_st1_2 = valid_st1 && !is_fill_st1;
|
||||
wire is_mrvq_st1_st2 = is_mrvq_st1;
|
||||
|
||||
wire valid_st2;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st2;
|
||||
@@ -478,26 +435,21 @@ module VX_bank #(
|
||||
wire [BANK_LINE_SIZE-1:0] dirtyb_st2;
|
||||
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2;
|
||||
wire [`TAG_SELECT_BITS-1:0] readtag_st2;
|
||||
wire fill_saw_dirty_st2;
|
||||
wire is_fill_st2;
|
||||
wire is_snp_st2;
|
||||
wire snp_invalidate_st2;
|
||||
wire snp_to_mrvq_st2;
|
||||
wire is_mrvq_st2;
|
||||
wire mrvq_init_ready_state_st2;
|
||||
wire mrvq_recover_ready_state_st2;
|
||||
wire mrvq_init_ready_state_unqual_st2;
|
||||
wire mrvq_init_ready_state_hazard_st0_st1;
|
||||
wire mrvq_init_ready_state_hazard_st1_st1;
|
||||
wire force_miss_st2;
|
||||
wire is_msrq_st2;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
|
||||
.N(1+ 1+ 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_bank_pipe),
|
||||
.flush (1'b0),
|
||||
.in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
|
||||
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
|
||||
.in ({is_msrq_st1, force_miss_st1, is_snp_st1, snp_invalidate_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
|
||||
.out ({is_msrq_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
@@ -507,19 +459,17 @@ module VX_bank #(
|
||||
`endif
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2;
|
||||
wire miss_add_because_pending = snp_to_mrvq_st2;
|
||||
|
||||
wire miss_add_unqual = (miss_add_because_miss || miss_add_because_pending);
|
||||
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
|
||||
assign miss_add_unqual = miss_st2 || force_miss_st2;
|
||||
assign msrq_push_stall = miss_add_unqual && msrq_full;
|
||||
|
||||
wire miss_add = miss_add_unqual
|
||||
&& !mrvq_full
|
||||
&& !(cwbq_push_stall
|
||||
|| dwbq_push_stall
|
||||
|| dram_fill_req_stall);
|
||||
&& !msrq_full
|
||||
&& !cwbq_push_stall
|
||||
&& !dwbq_push_stall;
|
||||
|
||||
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
|
||||
// we have a recurrent msrq miss
|
||||
assign is_msrq_miss_st2 = miss_add_unqual && is_msrq_st2;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2;
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2;
|
||||
@@ -528,18 +478,31 @@ module VX_bank #(
|
||||
wire miss_add_is_snp = is_snp_st2;
|
||||
wire miss_add_snp_invalidate = snp_invalidate_st2;
|
||||
|
||||
wire miss_add_is_mrvq = valid_st2 && is_mrvq_st2 && !stall_bank_pipe;
|
||||
wire msrq_real_pop_st2 = valid_st2 && is_msrq_st2 && !miss_add_unqual && !stall_bank_pipe;
|
||||
|
||||
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == dfpq_addr_st0); // Doesn't need to be muxed to qual, only care about fills
|
||||
assign mrvq_init_ready_state_hazard_st1_st1 = miss_add_unqual && is_fill_st1 && (miss_add_addr == addr_st1);
|
||||
// mark msrq entry that match DRAM fill as 'ready'
|
||||
wire update_ready_st0 = dfpq_pop;
|
||||
|
||||
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 // When req was in st1e, either matched with an mrvq entery OR mrvq recovering state
|
||||
|| mrvq_init_ready_state_hazard_st0_st1 // If there's a fill in st0 that has the same address as miss_add_addr
|
||||
|| mrvq_init_ready_state_hazard_st1_st1; // If there's a fill in st1 that has the same address as miss_add_addr
|
||||
// push missed requests as 'ready'
|
||||
// if it didn't actually missed but had to abort because of pending requets in msrq
|
||||
// if matching fill request to the block is in stage 0
|
||||
// if matching fill request to the block is in stage 1
|
||||
wire match_st0_fill_st2 = is_fill_st0 && (miss_add_addr == addr_st0);
|
||||
wire match_st1_fill_st2 = is_fill_st1 && (miss_add_addr == addr_st1);
|
||||
wire msrq_init_ready_state_st2 = !miss_st2
|
||||
|| match_st0_fill_st2
|
||||
|| match_st1_fill_st2;
|
||||
|
||||
always @(*) begin
|
||||
if (miss_st2 && (match_st0_fill_st2 || match_st1_fill_st2)) begin
|
||||
$display("%t: incoming fill - addr=%0h, st0=%b, st1=%b", $time, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), match_st0_fill_st2, match_st1_fill_st2);
|
||||
end
|
||||
end
|
||||
|
||||
VX_cache_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
@@ -551,9 +514,19 @@ module VX_bank #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Enqueue
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
.debug_pc_st0 (debug_pc_st0),
|
||||
.debug_rd_st0 (debug_rd_st0),
|
||||
.debug_wid_st0 (debug_wid_st0),
|
||||
.debug_tagid_st0(debug_tagid_st0),
|
||||
.debug_pc_st2 (debug_pc_st2),
|
||||
.debug_rd_st2 (debug_rd_st2),
|
||||
.debug_wid_st2 (debug_wid_st2),
|
||||
.debug_tagid_st2(debug_tagid_st2),
|
||||
`endif
|
||||
|
||||
// enqueue
|
||||
.miss_add (miss_add),
|
||||
.is_mrvq (miss_add_is_mrvq),
|
||||
.miss_add_addr (miss_add_addr),
|
||||
.miss_add_wsel (miss_add_wsel),
|
||||
.miss_add_data (miss_add_data),
|
||||
@@ -563,27 +536,30 @@ module VX_bank #(
|
||||
.miss_add_byteen (miss_add_byteen),
|
||||
.miss_add_is_snp (miss_add_is_snp),
|
||||
.miss_add_snp_invalidate (miss_add_snp_invalidate),
|
||||
.miss_resrv_full (mrvq_full),
|
||||
.miss_resrv_stop (mrvq_stop),
|
||||
.mrvq_init_ready_state (mrvq_init_ready_state_st2),
|
||||
.is_msrq_st2 (is_msrq_st2),
|
||||
.init_ready_state_st2 (msrq_init_ready_state_st2),
|
||||
|
||||
// Broadcast
|
||||
.is_fill_st1 (is_fill_st1),
|
||||
.fill_addr_st1 (addr_st1),
|
||||
.pending_hazard_st1 (mrvq_pending_hazard_st1),
|
||||
.miss_resrv_full (msrq_full),
|
||||
.miss_resrv_almfull (msrq_almfull),
|
||||
|
||||
// Dequeue
|
||||
.miss_resrv_pop (mrvq_pop),
|
||||
.miss_resrv_valid_st0 (mrvq_valid_st0),
|
||||
.miss_resrv_addr_st0 (mrvq_addr_st0),
|
||||
.miss_resrv_wsel_st0 (mrvq_wsel_st0),
|
||||
.miss_resrv_data_st0 (mrvq_writeword_st0),
|
||||
.miss_resrv_tid_st0 (mrvq_tid_st0),
|
||||
.miss_resrv_tag_st0 (mrvq_tag_st0),
|
||||
.miss_resrv_rw_st0 (mrvq_rw_st0),
|
||||
.miss_resrv_byteen_st0 (mrvq_byteen_st0),
|
||||
.miss_resrv_is_snp_st0 (mrvq_is_snp_st0),
|
||||
.miss_resrv_snp_invalidate_st0 (mrvq_snp_invalidate_st0)
|
||||
// fill
|
||||
.update_ready_st0 (update_ready_st0),
|
||||
.fill_addr_st0 (addr_st0),
|
||||
.pending_hazard_st0 (msrq_pending_hazard_unqual_st0),
|
||||
|
||||
// dequeue
|
||||
.miss_resrv_schedule_st0 (msrq_pop),
|
||||
.miss_resrv_valid_st0 (msrq_valid_st0),
|
||||
.miss_resrv_addr_st0 (msrq_addr_st0),
|
||||
.miss_resrv_wsel_st0 (msrq_wsel_st0),
|
||||
.miss_resrv_data_st0 (msrq_writeword_st0),
|
||||
.miss_resrv_tid_st0 (msrq_tid_st0),
|
||||
.miss_resrv_tag_st0 (msrq_tag_st0),
|
||||
.miss_resrv_rw_st0 (msrq_rw_st0),
|
||||
.miss_resrv_byteen_st0 (msrq_byteen_st0),
|
||||
.miss_resrv_is_snp_st0 (msrq_is_snp_st0),
|
||||
.miss_resrv_snp_invalidate_st0 (msrq_snp_invalidate_st0),
|
||||
.miss_resrv_pop_st2 (msrq_real_pop_st2)
|
||||
);
|
||||
|
||||
// Enqueue core response
|
||||
@@ -591,15 +567,13 @@ module VX_bank #(
|
||||
wire cwbq_push, cwbq_pop;
|
||||
wire cwbq_empty, cwbq_full;
|
||||
|
||||
wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2;
|
||||
wire cwbq_push_unqual = valid_st2 && !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2 && !miss_add_rw;
|
||||
assign cwbq_push_stall = cwbq_push_unqual && cwbq_full;
|
||||
|
||||
assign cwbq_push = cwbq_push_unqual
|
||||
&& !cwbq_full
|
||||
&& (miss_add_rw == 0)
|
||||
&& !(dwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| dram_fill_req_stall);
|
||||
&& !dwbq_push_stall
|
||||
&& !msrq_push_stall;
|
||||
|
||||
assign cwbq_pop = core_rsp_valid && core_rsp_ready;
|
||||
|
||||
@@ -613,11 +587,9 @@ module VX_bank #(
|
||||
) cwb_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.push (cwbq_push),
|
||||
.data_in ({cwbq_tid, cwbq_tag, cwbq_data}),
|
||||
|
||||
.pop (cwbq_pop),
|
||||
.data_in ({cwbq_tid, cwbq_tag, cwbq_data}),
|
||||
.data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}),
|
||||
.empty (cwbq_empty),
|
||||
.full (cwbq_full),
|
||||
@@ -626,90 +598,92 @@ module VX_bank #(
|
||||
|
||||
assign core_rsp_valid = !cwbq_empty;
|
||||
|
||||
// Enqueue DRAM fill request
|
||||
|
||||
wire dram_fill_req_fast = miss_add_unqual; // Completely unqualified hint that we might send a dram_fill_req
|
||||
wire dram_fill_req_unqual = dram_fill_req_fast
|
||||
&& (!mrvq_init_ready_state_st2
|
||||
|| (is_mrvq_st2 && !mrvq_recover_ready_state_st2)); // If this is set, then we are sure we will be sending a dram_fill_req
|
||||
|
||||
assign dram_fill_req_valid = dram_fill_req_unqual
|
||||
&& !(dwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| cwbq_push_stall);
|
||||
|
||||
assign dram_fill_req_addr = addr_st2;
|
||||
assign dram_fill_req_stall = dram_fill_req_fast && !dram_fill_req_ready; // Uses dram_fill_req_fast for critical path
|
||||
|
||||
// Enqueue DRAM writeback request
|
||||
// Enqueue DRAM / Snoop request
|
||||
|
||||
wire dwbq_push, dwbq_pop;
|
||||
wire dwbq_empty, dwbq_full;
|
||||
|
||||
wire dwbq_is_dwb_in, dwbq_is_snp_in;
|
||||
wire dwbq_is_dwb_out, dwbq_is_snp_out;
|
||||
wire dwbq_is_dram_out, dwbq_is_snp_out;
|
||||
|
||||
assign dwbq_is_snp_in = is_snp_st2 && valid_st2 && !snp_to_mrvq_st2;
|
||||
assign dwbq_is_dwb_in = (valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2;
|
||||
wire dwbq_push_unqual = dwbq_is_dwb_in || dwbq_is_snp_in;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] dwbq_snp_tag;
|
||||
|
||||
wire dwbq_is_dfl_in = miss_st2 && !msrq_init_ready_state_st2 && (!force_miss_st2 || is_msrq_st2);
|
||||
wire dwbq_is_dwb_in = dirty_st2 && !force_miss_st2 && (is_fill_st2 || is_snp_st2);
|
||||
wire dwbq_is_snp_in = valid_st2 && !force_miss_st2 && is_snp_st2;
|
||||
|
||||
wire dwbq_is_dram_in = dwbq_is_dfl_in || dwbq_is_dwb_in;
|
||||
|
||||
always @(posedge clk) begin
|
||||
assert(!is_msrq_st2 || !is_fill_st2);
|
||||
assert(!dwbq_is_dfl_in || !dwbq_is_dwb_in);
|
||||
end
|
||||
|
||||
wire dwbq_push_unqual = dwbq_is_dram_in || dwbq_is_snp_in;
|
||||
|
||||
assign dwbq_push_stall = dwbq_push_unqual && dwbq_full;
|
||||
|
||||
assign dwbq_push = dwbq_push_unqual
|
||||
&& !dwbq_full
|
||||
&& !(cwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| dram_fill_req_stall);
|
||||
&& !cwbq_push_stall
|
||||
&& !msrq_push_stall;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
|
||||
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st2 = SNP_REQ_TAG_WIDTH'(miss_add_tag);
|
||||
assign dwbq_req_addr = dwbq_is_dwb_in ? {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]} : addr_st2;
|
||||
assign dwbq_snp_tag = SNP_REQ_TAG_WIDTH'(miss_add_tag);
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(1 + 1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH),
|
||||
.SIZE(DWBQ_SIZE)
|
||||
.DATAW(1 + 1 + 1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH),
|
||||
.SIZE(DREQ_SIZE)
|
||||
) dwb_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.push (dwbq_push),
|
||||
.data_in ({dwbq_is_dwb_in, dwbq_is_snp_in, dirtyb_st2, dwbq_req_addr, readdata_st2, snrq_tag_st2}),
|
||||
|
||||
.pop (dwbq_pop),
|
||||
.data_out({dwbq_is_dwb_out, dwbq_is_snp_out, dram_wb_req_byteen, dram_wb_req_addr, dram_wb_req_data, snp_rsp_tag}),
|
||||
.data_in ({dwbq_is_dram_in, dwbq_is_snp_in, dwbq_is_dwb_in, dirtyb_st2, dwbq_req_addr, readdata_st2, dwbq_snp_tag}),
|
||||
.data_out({dwbq_is_dram_out, dwbq_is_snp_out, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data, snp_rsp_tag}),
|
||||
.empty (dwbq_empty),
|
||||
.full (dwbq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
wire dram_wb_req_fire = dram_wb_req_valid && dram_wb_req_ready;
|
||||
wire dram_req_fire = dram_req_valid && dram_req_ready;
|
||||
wire snp_rsp_fire = snp_rsp_valid && snp_rsp_ready;
|
||||
|
||||
reg dwbq_dual_valid_sel;
|
||||
reg dwbq_out_sel_snp;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
dwbq_dual_valid_sel <= 0;
|
||||
end else if (dwbq_is_dwb_out
|
||||
dwbq_out_sel_snp <= 0;
|
||||
end else if (dwbq_is_dram_out
|
||||
&& dwbq_is_snp_out
|
||||
&& (dram_wb_req_fire || snp_rsp_fire)) begin
|
||||
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
|
||||
&& (dram_req_fire || snp_rsp_fire)) begin
|
||||
dwbq_out_sel_snp <= ~dwbq_out_sel_snp;
|
||||
end
|
||||
end
|
||||
|
||||
// when both dwb and snp are asserted, first release the cwb, then release the snp.
|
||||
assign dram_wb_req_valid = !dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0);
|
||||
assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1);
|
||||
assign dram_req_valid = !dwbq_empty && dwbq_is_dram_out && (~dwbq_is_snp_out || !dwbq_out_sel_snp);
|
||||
assign snp_rsp_valid = !dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dram_out || dwbq_out_sel_snp);
|
||||
|
||||
assign dwbq_pop = (dwbq_is_dwb_out && !dwbq_is_snp_out && dram_wb_req_fire)
|
||||
|| (dwbq_is_snp_out && !dwbq_is_dwb_out && snp_rsp_fire)
|
||||
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
|
||||
assign dwbq_pop = (dwbq_is_dram_out && !dwbq_is_snp_out && dram_req_fire)
|
||||
|| (dwbq_is_snp_out && snp_rsp_fire);
|
||||
|
||||
// bank pipeline stall
|
||||
assign stall_bank_pipe = cwbq_push_stall
|
||||
|| dwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| dram_fill_req_stall;
|
||||
assign stall_bank_pipe = (cwbq_push_stall || dwbq_push_stall || msrq_push_stall);
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (valid_st2, valid_st2);
|
||||
|
||||
`SCOPE_ASSIGN (is_msrq_st1, is_msrq_st1);
|
||||
`SCOPE_ASSIGN (miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (force_miss_st1, force_miss_st1);
|
||||
`SCOPE_ASSIGN (stall_pipe, stall_bank_pipe);
|
||||
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(posedge clk) begin
|
||||
@@ -719,14 +693,11 @@ module VX_bank #(
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
$display("%t: cache%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
end
|
||||
if (dram_fill_req_valid && dram_fill_req_ready) begin
|
||||
$display("%t: cache%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||
if (dram_req_valid && dram_req_ready) begin
|
||||
$display("%t: cache%0d:%0d dram req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_req_addr, BANK_ID), dram_req_data);
|
||||
end
|
||||
if (dram_wb_req_valid && dram_wb_req_ready) begin
|
||||
$display("%t: cache%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||
end
|
||||
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||
$display("%t: cache%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||
if (dram_rsp_valid && dram_rsp_ready) begin
|
||||
$display("%t: cache%0d:%0d dram rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data);
|
||||
end
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: cache%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
|
||||
@@ -734,21 +705,25 @@ module VX_bank #(
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: cache%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
end
|
||||
if (msrq_pop) begin
|
||||
$display("%t: cache%0d:%0d msrq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
|
||||
end
|
||||
if (dfpq_pop) begin
|
||||
$display("%t: cache%0d:%0d dfpq_pop: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
end
|
||||
if (reqq_pop) begin
|
||||
$display("%t: cache%0d:%0d reqq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
|
||||
end
|
||||
if (snrq_pop) begin
|
||||
$display("%t: cache%0d:%0d snrq_pop: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
|
||||
end
|
||||
if (cwbq_push) begin
|
||||
$display("%t: cache%0d:%0d cwbq_push: addr=%0h wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2);
|
||||
end
|
||||
if (dwbq_push) begin
|
||||
$display("%t: cache%0d:%0d dwbq_push: addr=%0h wid=%0d, PC=%0h, fill=%b, wb=%b, snp=%b", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dwbq_is_dfl_in, dwbq_is_dwb_in, dwbq_is_snp_in);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, qual_valid_st0);
|
||||
`SCOPE_ASSIGN (valid_st1, valid_st1);
|
||||
`SCOPE_ASSIGN (valid_st2, valid_st2);
|
||||
|
||||
`SCOPE_ASSIGN (is_mrvq_st1, is_mrvq_st1);
|
||||
`SCOPE_ASSIGN (miss_st1, miss_st1);
|
||||
`SCOPE_ASSIGN (dirty_st1, dirty_st1);
|
||||
`SCOPE_ASSIGN (force_miss_st1, force_request_miss_st1);
|
||||
`SCOPE_ASSIGN (stall_pipe, stall_bank_pipe);
|
||||
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
`SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
||||
endmodule
|
||||
|
||||
2
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
2
hw/rtl/cache/VX_bank_core_req_arb.v
vendored
@@ -26,7 +26,6 @@ module VX_bank_core_req_arb #(
|
||||
|
||||
// Dequeue Data
|
||||
input wire reqq_pop,
|
||||
output wire reqq_req_st0,
|
||||
output wire [`REQS_BITS-1:0] reqq_req_tid_st0,
|
||||
output wire reqq_req_rw_st0,
|
||||
output wire [WORD_SIZE-1:0] reqq_req_byteen_st0,
|
||||
@@ -107,7 +106,6 @@ module VX_bank_core_req_arb #(
|
||||
);
|
||||
|
||||
assign reqq_empty = !qual_has_request;
|
||||
assign reqq_req_st0 = qual_has_request;
|
||||
assign reqq_req_tid_st0 = qual_request_index;
|
||||
assign reqq_req_byteen_st0 = qual_byteen[qual_request_index];
|
||||
assign reqq_req_addr_st0 = qual_addr[qual_request_index];
|
||||
|
||||
171
hw/rtl/cache/VX_cache.v
vendored
171
hw/rtl/cache/VX_cache.v
vendored
@@ -19,18 +19,16 @@ module VX_cache #(
|
||||
parameter CREQ_SIZE = 8,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MRVQ_SIZE = 16,
|
||||
// Dram Fill Rsp Queue Size
|
||||
parameter DFPQ_SIZE = 16,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRPQ_SIZE = 16,
|
||||
// Snoop Req Queue Size
|
||||
parameter SNRQ_SIZE = 16,
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
parameter CWBQ_SIZE = 8,
|
||||
// Dram Writeback Queue Size
|
||||
parameter DWBQ_SIZE = 4,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 8,
|
||||
// DRAM Request Queue Size
|
||||
parameter DREQ_SIZE = 4,
|
||||
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
@@ -144,17 +142,14 @@ module VX_cache #(
|
||||
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid;
|
||||
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr;
|
||||
wire dram_fill_req_ready;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_req_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_req_rw;
|
||||
wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr;
|
||||
wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid;
|
||||
wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_wb_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr;
|
||||
wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data;
|
||||
wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
|
||||
|
||||
@@ -236,7 +231,7 @@ module VX_cache #(
|
||||
);
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
||||
assign dram_rsp_ready = (| per_bank_dram_rsp_ready);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
@@ -245,6 +240,7 @@ module VX_cache #(
|
||||
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
|
||||
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
||||
wire curr_bank_core_req_ready;
|
||||
|
||||
wire curr_bank_core_rsp_valid;
|
||||
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
||||
@@ -252,20 +248,17 @@ module VX_cache #(
|
||||
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
|
||||
wire curr_bank_core_rsp_ready;
|
||||
|
||||
wire curr_bank_dram_fill_rsp_valid;
|
||||
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
|
||||
wire curr_bank_dram_fill_rsp_ready;
|
||||
wire curr_bank_dram_req_valid;
|
||||
wire curr_bank_dram_req_rw;
|
||||
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr;
|
||||
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data;
|
||||
wire curr_bank_dram_req_ready;
|
||||
|
||||
wire curr_bank_dram_fill_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
|
||||
wire curr_bank_dram_fill_req_ready;
|
||||
|
||||
wire curr_bank_dram_wb_req_valid;
|
||||
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_wb_req_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
|
||||
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
|
||||
wire curr_bank_dram_wb_req_ready;
|
||||
wire curr_bank_dram_rsp_valid;
|
||||
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr;
|
||||
wire curr_bank_dram_rsp_ready;
|
||||
|
||||
wire curr_bank_snp_req_valid;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
|
||||
@@ -277,8 +270,6 @@ module VX_cache #(
|
||||
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
|
||||
wire curr_bank_snp_rsp_ready;
|
||||
|
||||
wire curr_bank_core_req_ready;
|
||||
|
||||
// Core Req
|
||||
assign curr_bank_core_req_valid = (per_bank_valid[i] & {NUM_REQUESTS{core_req_ready}});
|
||||
assign curr_bank_core_req_addr = core_req_addr;
|
||||
@@ -295,36 +286,28 @@ module VX_cache #(
|
||||
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
|
||||
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
|
||||
|
||||
// Dram fill request
|
||||
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
|
||||
// DRAM request
|
||||
assign per_bank_dram_req_valid[i] = curr_bank_dram_req_valid;
|
||||
assign per_bank_dram_req_rw[i] = curr_bank_dram_req_rw;
|
||||
assign per_bank_dram_req_byteen[i] = curr_bank_dram_req_byteen;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_fill_req_addr[i] = curr_bank_dram_fill_req_addr;
|
||||
assign per_bank_dram_req_addr[i] = curr_bank_dram_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
|
||||
assign per_bank_dram_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_req_addr, i);
|
||||
end
|
||||
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
|
||||
assign per_bank_dram_req_data[i] = curr_bank_dram_req_data;
|
||||
assign curr_bank_dram_req_ready = per_bank_dram_req_ready[i];
|
||||
|
||||
// Dram fill response
|
||||
// DRAM response
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid;
|
||||
assign curr_bank_dram_fill_rsp_addr = dram_rsp_tag;
|
||||
assign curr_bank_dram_rsp_valid = dram_rsp_valid;
|
||||
assign curr_bank_dram_rsp_addr = dram_rsp_tag;
|
||||
end else begin
|
||||
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
|
||||
assign curr_bank_dram_fill_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
|
||||
assign curr_bank_dram_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
|
||||
assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag);
|
||||
end
|
||||
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
|
||||
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
|
||||
|
||||
// Dram writeback request
|
||||
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
|
||||
assign per_bank_dram_wb_req_byteen[i] = curr_bank_dram_wb_req_byteen;
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign per_bank_dram_wb_req_addr[i] = curr_bank_dram_wb_req_addr;
|
||||
end else begin
|
||||
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
|
||||
end
|
||||
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
|
||||
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
|
||||
assign curr_bank_dram_rsp_data = dram_rsp_data;
|
||||
assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready;
|
||||
|
||||
// Snoop request
|
||||
if (NUM_BANKS == 1) begin
|
||||
@@ -353,11 +336,10 @@ module VX_cache #(
|
||||
.NUM_REQUESTS (NUM_REQUESTS),
|
||||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.MRVQ_SIZE (MRVQ_SIZE),
|
||||
.DFPQ_SIZE (DFPQ_SIZE),
|
||||
.DRPQ_SIZE (DRPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DWBQ_SIZE (DWBQ_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE),
|
||||
.DREQ_SIZE (DREQ_SIZE),
|
||||
.DRAM_ENABLE (DRAM_ENABLE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.SNOOP_FORWARDING (SNOOP_FORWARDING),
|
||||
@@ -385,23 +367,19 @@ module VX_cache #(
|
||||
.core_rsp_tag (curr_bank_core_rsp_tag),
|
||||
.core_rsp_ready (curr_bank_core_rsp_ready),
|
||||
|
||||
// Dram fill request
|
||||
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
|
||||
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
|
||||
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
|
||||
// DRAM request
|
||||
.dram_req_valid (curr_bank_dram_req_valid),
|
||||
.dram_req_rw (curr_bank_dram_req_rw),
|
||||
.dram_req_byteen (curr_bank_dram_req_byteen),
|
||||
.dram_req_addr (curr_bank_dram_req_addr),
|
||||
.dram_req_data (curr_bank_dram_req_data),
|
||||
.dram_req_ready (curr_bank_dram_req_ready),
|
||||
|
||||
// Dram fill response
|
||||
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
|
||||
.dram_fill_rsp_data (curr_bank_dram_fill_rsp_data),
|
||||
.dram_fill_rsp_addr (curr_bank_dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_ready (curr_bank_dram_fill_rsp_ready),
|
||||
|
||||
// Dram writeback request
|
||||
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
|
||||
.dram_wb_req_byteen (curr_bank_dram_wb_req_byteen),
|
||||
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
|
||||
.dram_wb_req_data (curr_bank_dram_wb_req_data),
|
||||
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
|
||||
// DRAM response
|
||||
.dram_rsp_valid (curr_bank_dram_rsp_valid),
|
||||
.dram_rsp_data (curr_bank_dram_rsp_data),
|
||||
.dram_rsp_addr (curr_bank_dram_rsp_addr),
|
||||
.dram_rsp_ready (curr_bank_dram_rsp_ready),
|
||||
|
||||
// Snoop request
|
||||
.snp_req_valid (curr_bank_snp_req_valid),
|
||||
@@ -417,30 +395,6 @@ module VX_cache #(
|
||||
);
|
||||
end
|
||||
|
||||
VX_cache_dram_req_arb #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.DFQQ_SIZE (DFQQ_SIZE)
|
||||
) cache_dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
|
||||
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
|
||||
.dram_fill_req_ready (dram_fill_req_ready),
|
||||
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
|
||||
.per_bank_dram_wb_req_byteen (per_bank_dram_wb_req_byteen),
|
||||
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
|
||||
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
|
||||
.per_bank_dram_wb_req_ready (per_bank_dram_wb_req_ready),
|
||||
.dram_req_valid (dram_req_valid),
|
||||
.dram_req_rw (dram_req_rw),
|
||||
.dram_req_byteen (dram_req_byteen),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_ready (dram_req_ready)
|
||||
);
|
||||
|
||||
VX_cache_core_rsp_merge #(
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
@@ -461,6 +415,27 @@ module VX_cache #(
|
||||
.core_rsp_ready (core_rsp_ready)
|
||||
);
|
||||
|
||||
VX_cache_dram_req_arb #(
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE)
|
||||
) cache_dram_req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.per_bank_dram_req_valid (per_bank_dram_req_valid),
|
||||
.per_bank_dram_req_rw (per_bank_dram_req_rw),
|
||||
.per_bank_dram_req_byteen (per_bank_dram_req_byteen),
|
||||
.per_bank_dram_req_addr (per_bank_dram_req_addr),
|
||||
.per_bank_dram_req_data (per_bank_dram_req_data),
|
||||
.per_bank_dram_req_ready (per_bank_dram_req_ready),
|
||||
.dram_req_valid (dram_req_valid),
|
||||
.dram_req_rw (dram_req_rw),
|
||||
.dram_req_byteen (dram_req_byteen),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_ready (dram_req_ready)
|
||||
);
|
||||
|
||||
VX_snp_rsp_arb #(
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
|
||||
92
hw/rtl/cache/VX_cache_dram_fill_arb.v
vendored
92
hw/rtl/cache/VX_cache_dram_fill_arb.v
vendored
@@ -1,92 +0,0 @@
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dram_fill_arb #(
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire dfqq_push,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire[NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr,
|
||||
|
||||
input wire dfqq_pop,
|
||||
output wire dfqq_req,
|
||||
output wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr,
|
||||
output wire dfqq_empty,
|
||||
output wire dfqq_full
|
||||
);
|
||||
reg [NUM_BANKS-1:0] use_per_bank_dram_fill_req_valid;
|
||||
reg [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] use_per_bank_dram_fill_req_addr;
|
||||
|
||||
wire [NUM_BANKS-1:0] out_per_bank_dram_fill_req_valid;
|
||||
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] out_per_bank_dram_fill_req_addr;
|
||||
|
||||
wire [NUM_BANKS-1:0] use_per_bqual_bank_dram_fill_req_valid;
|
||||
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] qual_bank_dram_fill_req_addr;
|
||||
|
||||
wire [NUM_BANKS-1:0] updated_bank_dram_fill_req_valid;
|
||||
|
||||
wire o_empty;
|
||||
|
||||
wire use_empty = !(| use_per_bank_dram_fill_req_valid);
|
||||
wire out_empty = !(| out_per_bank_dram_fill_req_valid) || o_empty;
|
||||
|
||||
wire push_qual = dfqq_push && !dfqq_full;
|
||||
wire pop_qual = dfqq_pop && use_empty && !out_empty;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(NUM_BANKS * (1+`DRAM_ADDR_WIDTH)),
|
||||
.SIZE(DFQQ_SIZE)
|
||||
) dfqq_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push_qual),
|
||||
.data_in ({per_bank_dram_fill_req_valid, per_bank_dram_fill_req_addr}),
|
||||
.pop (pop_qual),
|
||||
.data_out({out_per_bank_dram_fill_req_valid, out_per_bank_dram_fill_req_addr}),
|
||||
.empty (o_empty),
|
||||
.full (dfqq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign use_per_bqual_bank_dram_fill_req_valid = use_empty ? (out_per_bank_dram_fill_req_valid & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req_valid & {NUM_BANKS{!use_empty}});
|
||||
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
|
||||
|
||||
wire[`BANK_BITS-1:0] qual_request_index;
|
||||
wire qual_has_request;
|
||||
|
||||
VX_fixed_arbiter #(
|
||||
.N(NUM_BANKS)
|
||||
) sel_bank (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (use_per_bqual_bank_dram_fill_req_valid),
|
||||
.grant_index (qual_request_index),
|
||||
.grant_valid (qual_has_request),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign dfqq_empty = !qual_has_request;
|
||||
assign dfqq_req = use_per_bqual_bank_dram_fill_req_valid [qual_request_index];
|
||||
assign dfqq_req_addr = qual_bank_dram_fill_req_addr[qual_request_index];
|
||||
|
||||
assign updated_bank_dram_fill_req_valid = use_per_bqual_bank_dram_fill_req_valid & (~(1 << qual_request_index));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
use_per_bank_dram_fill_req_valid <= 0;
|
||||
use_per_bank_dram_fill_req_addr <= 0;
|
||||
end else begin
|
||||
if (dfqq_pop && qual_has_request) begin
|
||||
use_per_bank_dram_fill_req_valid <= updated_bank_dram_fill_req_valid;
|
||||
use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
82
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
82
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
@@ -6,88 +6,50 @@ module VX_cache_dram_req_arb #(
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
parameter NUM_BANKS = 0,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 0,
|
||||
// Dram Fill Req Queue Size
|
||||
parameter DFQQ_SIZE = 0
|
||||
parameter WORD_SIZE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Fill Request
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr,
|
||||
output wire dram_fill_req_ready,
|
||||
// Inputs
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_req_valid,
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_req_rw,
|
||||
input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen,
|
||||
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr,
|
||||
input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data,
|
||||
output wire [NUM_BANKS-1:0] per_bank_dram_req_ready,
|
||||
|
||||
// Writeback Request
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
|
||||
input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_wb_req_byteen,
|
||||
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr,
|
||||
input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data,
|
||||
output wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready,
|
||||
|
||||
// Merged Request
|
||||
// Output
|
||||
output wire dram_req_valid,
|
||||
output wire dram_req_rw,
|
||||
output wire [BANK_LINE_SIZE-1:0] dram_req_byteen,
|
||||
output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
|
||||
output wire [`BANK_LINE_WIDTH-1:0] dram_req_data,
|
||||
|
||||
input wire dram_req_ready
|
||||
);
|
||||
|
||||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
wire[`DRAM_ADDR_WIDTH-1:0] dfqq_req_addr;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire dfqq_empty;
|
||||
`DEBUG_END
|
||||
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (| per_bank_dram_fill_req_valid);
|
||||
wire dfqq_full;
|
||||
|
||||
VX_cache_dram_fill_arb #(
|
||||
.BANK_LINE_SIZE(BANK_LINE_SIZE),
|
||||
.NUM_BANKS(NUM_BANKS),
|
||||
.DFQQ_SIZE(DFQQ_SIZE)
|
||||
) dram_fill_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dfqq_push (dfqq_push),
|
||||
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
|
||||
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
|
||||
.dfqq_pop (dfqq_pop),
|
||||
.dfqq_req (dfqq_req),
|
||||
.dfqq_req_addr (dfqq_req_addr),
|
||||
.dfqq_empty (dfqq_empty),
|
||||
.dfqq_full (dfqq_full)
|
||||
);
|
||||
|
||||
assign dram_fill_req_ready = !dfqq_full;
|
||||
|
||||
wire [`BANK_BITS-1:0] dwb_bank;
|
||||
wire [`BANK_BITS-1:0] sel_bank;
|
||||
wire sel_valid;
|
||||
|
||||
VX_fixed_arbiter #(
|
||||
.N(NUM_BANKS)
|
||||
) sel_dwb (
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (per_bank_dram_wb_req_valid),
|
||||
.grant_index (dwb_bank),
|
||||
.grant_valid (dwb_valid),
|
||||
.requests (per_bank_dram_req_valid),
|
||||
.grant_index (sel_bank),
|
||||
.grant_valid (sel_valid),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign dram_req_valid = sel_valid;
|
||||
assign dram_req_rw = per_bank_dram_req_rw[sel_bank];
|
||||
assign dram_req_byteen = per_bank_dram_req_byteen[sel_bank];
|
||||
assign dram_req_addr = per_bank_dram_req_addr[sel_bank];
|
||||
assign dram_req_data = per_bank_dram_req_data[sel_bank];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
|
||||
assign per_bank_dram_req_ready[i] = dram_req_ready && (sel_bank == `BANK_BITS'(i));
|
||||
end
|
||||
|
||||
assign dram_req_valid = dwb_valid || dfqq_req;
|
||||
assign dram_req_rw = dwb_valid;
|
||||
assign dram_req_byteen = dwb_valid ? per_bank_dram_wb_req_byteen[dwb_bank] : {BANK_LINE_SIZE{1'b1}};
|
||||
assign dram_req_addr = dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr;
|
||||
assign {dram_req_data} = dwb_valid ? per_bank_dram_wb_req_data[dwb_bank] : 0;
|
||||
|
||||
endmodule
|
||||
|
||||
159
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
159
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -3,6 +3,7 @@
|
||||
module VX_cache_miss_resrv #(
|
||||
parameter CACHE_ID = 0,
|
||||
parameter BANK_ID = 0,
|
||||
parameter CORE_TAG_ID_BITS = 0,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter BANK_LINE_SIZE = 0,
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
@@ -21,9 +22,21 @@ module VX_cache_miss_resrv #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Miss enqueue
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc_st0,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st0,
|
||||
input wire[`NW_BITS-1:0] debug_wid_st0,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0,
|
||||
input wire[31:0] debug_pc_st2,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st2,
|
||||
input wire[`NW_BITS-1:0] debug_wid_st2,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
// enqueue
|
||||
input wire miss_add,
|
||||
input wire is_mrvq,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr,
|
||||
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel,
|
||||
input wire[`WORD_WIDTH-1:0] miss_add_data,
|
||||
@@ -31,20 +44,21 @@ module VX_cache_miss_resrv #(
|
||||
input wire[`REQ_TAG_WIDTH-1:0] miss_add_tag,
|
||||
input wire miss_add_rw,
|
||||
input wire[WORD_SIZE-1:0] miss_add_byteen,
|
||||
input wire mrvq_init_ready_state,
|
||||
input wire miss_add_is_snp,
|
||||
input wire miss_add_snp_invalidate,
|
||||
input wire is_msrq_st2,
|
||||
input wire init_ready_state_st2,
|
||||
|
||||
output wire miss_resrv_full,
|
||||
output wire miss_resrv_stop,
|
||||
output wire miss_resrv_almfull,
|
||||
|
||||
// Broadcast Address
|
||||
input wire is_fill_st1,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st1,
|
||||
// fill
|
||||
input wire update_ready_st0,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] fill_addr_st0,
|
||||
output wire pending_hazard_st0,
|
||||
|
||||
output wire pending_hazard_st1,
|
||||
|
||||
// Miss dequeue
|
||||
input wire miss_resrv_pop,
|
||||
// dequeue
|
||||
input wire miss_resrv_schedule_st0,
|
||||
output wire miss_resrv_valid_st0,
|
||||
output wire[`LINE_ADDR_WIDTH-1:0] miss_resrv_addr_st0,
|
||||
output wire[`UP(`WORD_SELECT_WIDTH)-1:0] miss_resrv_wsel_st0,
|
||||
@@ -54,43 +68,38 @@ module VX_cache_miss_resrv #(
|
||||
output wire miss_resrv_rw_st0,
|
||||
output wire[WORD_SIZE-1:0] miss_resrv_byteen_st0,
|
||||
output wire miss_resrv_is_snp_st0,
|
||||
output wire miss_resrv_snp_invalidate_st0
|
||||
output wire miss_resrv_snp_invalidate_st0,
|
||||
input wire miss_resrv_pop_st2
|
||||
);
|
||||
localparam FULL_DISTANCE = 2; // need 2 cycles window to prevent pipeline lock
|
||||
|
||||
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
|
||||
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
`NO_RW_RAM_CHECK reg [`LINE_ADDR_WIDTH-1:0] addr_table [MRVQ_SIZE-1:0];
|
||||
|
||||
reg [MRVQ_SIZE-1:0] valid_table;
|
||||
reg [MRVQ_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr, restore_ptr;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr;
|
||||
|
||||
reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size;
|
||||
|
||||
`STATIC_ASSERT(MRVQ_SIZE > 5, ("invalid size"))
|
||||
`STATIC_ASSERT(MRVQ_SIZE > FULL_DISTANCE, ("invalid size"))
|
||||
|
||||
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
|
||||
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
|
||||
|
||||
wire enqueue_possible = !miss_resrv_full;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] make_ready;
|
||||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||
reg [MRVQ_SIZE-1:0] valid_address_match;
|
||||
assign miss_resrv_almfull = (size >= $bits(size)'(MRVQ_SIZE-FULL_DISTANCE));
|
||||
|
||||
wire [MRVQ_SIZE-1:0] valid_address_match;
|
||||
for (genvar i = 0; i < MRVQ_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||
assign valid_address_match[i] = valid_table[i] && (addr_table[i] == fill_addr_st0);
|
||||
end
|
||||
|
||||
assign pending_hazard_st1 = |(valid_address_match);
|
||||
assign pending_hazard_st0 = (| valid_address_match);
|
||||
|
||||
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
|
||||
wire dequeue_ready = valid_table[schedule_ptr] && ready_table[schedule_ptr];
|
||||
|
||||
assign miss_resrv_valid_st0 = dequeue_possible;
|
||||
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
|
||||
assign miss_resrv_valid_st0 = dequeue_ready;
|
||||
assign miss_resrv_addr_st0 = addr_table[schedule_ptr];
|
||||
assign {miss_resrv_data_st0,
|
||||
miss_resrv_tid_st0,
|
||||
miss_resrv_tag_st0,
|
||||
@@ -100,56 +109,51 @@ module VX_cache_miss_resrv #(
|
||||
miss_resrv_is_snp_st0,
|
||||
miss_resrv_snp_invalidate_st0} = metadata_table;
|
||||
|
||||
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
|
||||
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
|
||||
wire msrq_push = miss_add && !is_msrq_st2;
|
||||
|
||||
wire recover_state = miss_add && is_mrvq;
|
||||
wire increment_head = !miss_add && is_mrvq;
|
||||
|
||||
wire update_ready = (|make_ready);
|
||||
|
||||
wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state;
|
||||
|
||||
assign make_ready_push = (MRVQ_SIZE'(qual_mrvq_init)) << enqueue_index;
|
||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
size <= 0;
|
||||
schedule_ptr <= 0;
|
||||
restore_ptr <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
size <= 0;
|
||||
end else begin
|
||||
if (mrvq_push) begin
|
||||
valid_table[enqueue_index] <= 1;
|
||||
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
||||
addr_table[enqueue_index] <= miss_add_addr;
|
||||
|
||||
if (update_ready_st0) begin
|
||||
ready_table <= ready_table | valid_address_match;
|
||||
end
|
||||
|
||||
if (miss_add) begin
|
||||
assert(!miss_resrv_full);
|
||||
if (is_msrq_st2) begin
|
||||
// returning missed msrq entry, restore schedule
|
||||
valid_table[restore_ptr] <= 1;
|
||||
ready_table[restore_ptr] <= init_ready_state_st2;
|
||||
restore_ptr <= restore_ptr + $bits(restore_ptr)'(1);
|
||||
schedule_ptr <= head_ptr;
|
||||
end else begin
|
||||
valid_table[tail_ptr] <= 1;
|
||||
ready_table[tail_ptr] <= init_ready_state_st2;
|
||||
addr_table[tail_ptr] <= miss_add_addr;
|
||||
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
|
||||
end else if (increment_head) begin
|
||||
valid_table[head_ptr] <= 0;
|
||||
head_ptr <= head_ptr + $bits(head_ptr)'(1);
|
||||
end else if (recover_state) begin
|
||||
schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1);
|
||||
end
|
||||
|
||||
// update entry as 'ready' during DRAM fill response
|
||||
if (update_ready) begin
|
||||
ready_table <= ready_table | make_ready | make_ready_push;
|
||||
end
|
||||
|
||||
if (mrvq_pop) begin
|
||||
ready_table[dequeue_index] <= 0;
|
||||
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
|
||||
end
|
||||
|
||||
if (!(mrvq_push && increment_head)) begin
|
||||
if (mrvq_push) begin
|
||||
size <= size + $bits(size)'(1);
|
||||
end
|
||||
if (increment_head) begin
|
||||
end else if (miss_resrv_pop_st2) begin
|
||||
head_ptr <= head_ptr_n;
|
||||
restore_ptr <= head_ptr_n;
|
||||
valid_table[head_ptr] <= 0;
|
||||
size <= size - $bits(size)'(1);
|
||||
end
|
||||
|
||||
if (miss_resrv_schedule_st0) begin
|
||||
assert(miss_resrv_valid_st0);
|
||||
valid_table[schedule_ptr] <= 0;
|
||||
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -160,11 +164,11 @@ module VX_cache_miss_resrv #(
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) metadata_ram (
|
||||
) metadata (
|
||||
.clk(clk),
|
||||
.waddr(enqueue_index),
|
||||
.raddr(dequeue_index),
|
||||
.wren(mrvq_push),
|
||||
.waddr(tail_ptr),
|
||||
.raddr(schedule_ptr),
|
||||
.wren(msrq_push),
|
||||
.rden(1'b1),
|
||||
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
|
||||
.dout(metadata_table)
|
||||
@@ -172,8 +176,16 @@ module VX_cache_miss_resrv #(
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: cache%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
if (miss_add || miss_resrv_schedule_st0 || miss_resrv_pop_st2) begin
|
||||
if (miss_add)
|
||||
if (is_msrq_st2)
|
||||
$write("%t: cache%0d:%0d msrq-restore addr%0d=%0h ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2);
|
||||
else
|
||||
$write("%t: cache%0d:%0d msrq-push addr%0d=%0h ready=%b wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(miss_add_addr, BANK_ID), init_ready_state_st2, debug_wid_st2, debug_pc_st2);
|
||||
else if (miss_resrv_schedule_st0)
|
||||
$write("%t: cache%0d:%0d msrq-schedule wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st0, debug_pc_st0);
|
||||
else if (miss_resrv_pop_st2)
|
||||
$write("%t: cache%0d:%0d msrq-pop addr%0d wid=%0d PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st2, debug_pc_st2);
|
||||
for (integer j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
@@ -181,6 +193,11 @@ module VX_cache_miss_resrv #(
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
|
||||
end
|
||||
else if (schedule_ptr == $bits(schedule_ptr)'(j)) begin
|
||||
$write(" *");
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("[addr%0d=%0h]", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
end
|
||||
|
||||
16
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
16
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
@@ -17,25 +17,25 @@ module VX_snp_rsp_arb #(
|
||||
input wire snp_rsp_ready
|
||||
);
|
||||
|
||||
wire [`BANK_BITS-1:0] fsq_bank;
|
||||
wire fsq_valid;
|
||||
wire [`BANK_BITS-1:0] sel_bank;
|
||||
wire sel_valid;
|
||||
|
||||
VX_fixed_arbiter #(
|
||||
.N(NUM_BANKS)
|
||||
) sel_ffsq (
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (per_bank_snp_rsp_valid),
|
||||
.grant_index (fsq_bank),
|
||||
.grant_valid (fsq_valid),
|
||||
.grant_index (sel_bank),
|
||||
.grant_valid (sel_valid),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign snp_rsp_valid = fsq_valid;
|
||||
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
|
||||
assign snp_rsp_valid = sel_valid;
|
||||
assign snp_rsp_tag = per_bank_snp_rsp_tag[sel_bank];
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
|
||||
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (sel_bank == `BANK_BITS'(i));
|
||||
end
|
||||
|
||||
endmodule
|
||||
121
hw/rtl/cache/VX_tag_data_access.v
vendored
121
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -31,15 +31,10 @@ module VX_tag_data_access #(
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
input wire stall,
|
||||
input wire is_snp_st1,
|
||||
input wire snp_invalidate_st1,
|
||||
input wire stall_bank_pipe,
|
||||
|
||||
input wire force_request_miss_st1,
|
||||
|
||||
input wire[`LINE_SELECT_BITS-1:0] readaddr_st1,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1,
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr_st1,
|
||||
|
||||
input wire valid_req_st1,
|
||||
input wire writefill_st1,
|
||||
@@ -52,17 +47,15 @@ module VX_tag_data_access #(
|
||||
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1,
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
input wire force_miss_st1,
|
||||
|
||||
output wire[`WORD_WIDTH-1:0] readword_st1,
|
||||
output wire[`BANK_LINE_WIDTH-1:0] readdata_st1,
|
||||
output wire[`TAG_SELECT_BITS-1:0] readtag_st1,
|
||||
output wire miss_st1,
|
||||
output wire dirty_st1,
|
||||
output wire[BANK_LINE_SIZE-1:0] dirtyb_st1,
|
||||
output wire fill_saw_dirty_st1,
|
||||
output wire snp_to_mrvq_st1,
|
||||
output wire mrvq_init_ready_state_st1
|
||||
output wire[BANK_LINE_SIZE-1:0] dirtyb_st1
|
||||
);
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
wire qual_read_valid_st1;
|
||||
wire qual_read_dirty_st1;
|
||||
@@ -78,15 +71,11 @@ module VX_tag_data_access #(
|
||||
wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_write_enable;
|
||||
wire[`BANK_LINE_WIDTH-1:0] use_write_data;
|
||||
|
||||
wire fill_sent;
|
||||
wire invalidate_line;
|
||||
wire use_invalidate;
|
||||
wire tags_match;
|
||||
|
||||
wire real_writefill = valid_req_st1 && writefill_st1
|
||||
&& ((~use_read_valid_st1) || (use_read_valid_st1 && ~tags_match));
|
||||
|
||||
wire[`TAG_SELECT_BITS-1:0] writetag_st1 = writeaddr_st1[`TAG_LINE_ADDR_RNG];
|
||||
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1 = writeaddr_st1[`LINE_SELECT_BITS-1:0];
|
||||
wire[`TAG_SELECT_BITS-1:0] addrtag_st1 = addr_st1[`TAG_LINE_ADDR_RNG];
|
||||
wire[`LINE_SELECT_BITS-1:0] addrline_st1 = addr_st1[`LINE_SELECT_BITS-1:0];
|
||||
|
||||
VX_tag_data_store #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
@@ -96,27 +85,25 @@ module VX_tag_data_access #(
|
||||
) tag_data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall_bank_pipe(stall_bank_pipe),
|
||||
|
||||
.read_addr (readaddr_st1),
|
||||
.read_addr (addrline_st1),
|
||||
.read_valid (qual_read_valid_st1),
|
||||
.read_dirty (qual_read_dirty_st1),
|
||||
.read_dirtyb (qual_read_dirtyb_st1),
|
||||
.read_tag (qual_read_tag_st1),
|
||||
.read_data (qual_read_data_st1),
|
||||
|
||||
.invalidate (invalidate_line),
|
||||
.invalidate (use_invalidate),
|
||||
.write_enable(use_write_enable),
|
||||
.write_fill (real_writefill),
|
||||
.write_addr (writeladdr_st1),
|
||||
.tag_index (writetag_st1),
|
||||
.write_data (use_write_data),
|
||||
.fill_sent (fill_sent)
|
||||
.write_fill (writefill_st1),
|
||||
.write_addr (addrline_st1),
|
||||
.tag_index (addrtag_st1),
|
||||
.write_data (use_write_data)
|
||||
);
|
||||
|
||||
assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid
|
||||
assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
|
||||
assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : writetag_st1; // Tag is always the same in SM
|
||||
assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : addrtag_st1; // Tag is always the same in SM
|
||||
assign use_read_dirtyb_st1= qual_read_dirtyb_st1;
|
||||
assign use_read_data_st1 = qual_read_data_st1;
|
||||
|
||||
@@ -131,67 +118,69 @@ module VX_tag_data_access #(
|
||||
end
|
||||
end
|
||||
|
||||
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we;
|
||||
wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] write_enable;
|
||||
wire [`BANK_LINE_WIDTH-1:0] data_write;
|
||||
|
||||
wire should_write = mem_rw_st1
|
||||
&& valid_req_st1
|
||||
&& use_read_valid_st1
|
||||
&& ~miss_st1
|
||||
&& ~is_snp_st1
|
||||
&& ~real_writefill;
|
||||
wire normal_write = valid_req_st1
|
||||
&& !writefill_st1
|
||||
&& !is_snp_st1
|
||||
&& !miss_st1
|
||||
&& !force_miss_st1
|
||||
&& mem_rw_st1
|
||||
&& use_read_valid_st1;
|
||||
|
||||
wire fill_write = valid_req_st1 && writefill_st1 && !force_miss_st1;
|
||||
|
||||
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||
&& should_write;
|
||||
wire normal_write_w = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||
&& normal_write;
|
||||
|
||||
assign we[i] = real_writefill ? {WORD_SIZE{1'b1}} :
|
||||
normal_write ? mem_byteen_st1 :
|
||||
assign write_enable[i] = fill_write ? {WORD_SIZE{1'b1}} :
|
||||
normal_write_w ? mem_byteen_st1 :
|
||||
{WORD_SIZE{1'b0}};
|
||||
|
||||
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1;
|
||||
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = writefill_st1 ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1;
|
||||
end
|
||||
|
||||
assign use_write_enable = (writefill_st1 && ~real_writefill) ? 0 : we;
|
||||
assign use_write_data = data_write;
|
||||
|
||||
// use "case equality" to handle uninitialized tag when block entry is not valid
|
||||
assign tags_match = (writetag_st1 === use_read_tag_st1);
|
||||
assign tags_match = (addrtag_st1 === use_read_tag_st1);
|
||||
|
||||
wire snoop_hit_no_pending = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match && (use_read_dirty_st1 || snp_invalidate_st1) && ~force_request_miss_st1;
|
||||
wire req_invalid = valid_req_st1 && ~is_snp_st1 && ~use_read_valid_st1 && ~writefill_st1;
|
||||
wire req_miss = valid_req_st1 && ~is_snp_st1 && use_read_valid_st1 && ~writefill_st1 && ~tags_match;
|
||||
wire real_miss = req_invalid || req_miss;
|
||||
wire force_core_miss = (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1 && ~real_miss);
|
||||
assign snp_to_mrvq_st1 = valid_req_st1 && is_snp_st1 && force_request_miss_st1;
|
||||
assign use_write_enable = write_enable;
|
||||
assign use_write_data = data_write;
|
||||
assign use_invalidate = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match
|
||||
&& (use_read_dirty_st1 || snp_invalidate_st1) // block is dirty or need to force invalidation
|
||||
&& !force_miss_st1;
|
||||
|
||||
// The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss
|
||||
assign mrvq_init_ready_state_st1 = snp_to_mrvq_st1
|
||||
|| (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1);
|
||||
wire core_req_miss = valid_req_st1 && !is_snp_st1 && !writefill_st1 // is core request
|
||||
&& (!use_read_valid_st1 || !tags_match); // block missing or has wrong tag
|
||||
|
||||
assign miss_st1 = real_miss || snoop_hit_no_pending || force_core_miss;
|
||||
assign miss_st1 = core_req_miss;
|
||||
assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1;
|
||||
assign dirtyb_st1 = use_read_dirtyb_st1;
|
||||
assign readdata_st1 = use_read_data_st1;
|
||||
assign readtag_st1 = use_read_tag_st1;
|
||||
assign fill_sent = miss_st1;
|
||||
assign fill_saw_dirty_st1 = real_writefill && dirty_st1;
|
||||
assign invalidate_line = snoop_hit_no_pending;
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(*) begin
|
||||
if (valid_req_st1 && writefill_st1) begin
|
||||
if (!(!use_read_valid_st1 || !tags_match)) begin
|
||||
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
if (valid_req_st1) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1) begin
|
||||
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
|
||||
end
|
||||
end else
|
||||
if (miss_st1) begin
|
||||
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
|
||||
$display("%t: cache%0d:%0d data-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, tagmatch=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, use_read_dirty_st1, tags_match, addrline_st1, addrtag_st1);
|
||||
end else if ((| use_write_enable)) begin
|
||||
if (writefill_st1) begin
|
||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), dirty_st1, addrline_st1, addrtag_st1, use_write_data);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, addrtag_st1, wordsel_st1, writeword_st1);
|
||||
end
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, dirty_st1, addrline_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
59
hw/rtl/cache/VX_tag_data_store.v
vendored
59
hw/rtl/cache/VX_tag_data_store.v
vendored
@@ -12,7 +12,6 @@ module VX_tag_data_store #(
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall_bank_pipe,
|
||||
|
||||
input wire[`LINE_SELECT_BITS-1:0] read_addr,
|
||||
output wire read_valid,
|
||||
@@ -26,19 +25,13 @@ module VX_tag_data_store #(
|
||||
input wire write_fill,
|
||||
input wire[`LINE_SELECT_BITS-1:0] write_addr,
|
||||
input wire[`TAG_SELECT_BITS-1:0] tag_index,
|
||||
input wire[`BANK_LINE_WIDTH-1:0] write_data,
|
||||
input wire fill_sent
|
||||
input wire[`BANK_LINE_WIDTH-1:0] write_data
|
||||
);
|
||||
|
||||
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
|
||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
|
||||
reg [`BANK_LINE_COUNT-1:0] dirty;
|
||||
reg [`BANK_LINE_COUNT-1:0] valid;
|
||||
|
||||
assign read_valid = valid[read_addr];
|
||||
assign read_dirty = dirty[read_addr];
|
||||
assign read_dirtyb = dirtyb [read_addr];
|
||||
assign read_tag = tag [read_addr];
|
||||
|
||||
wire do_write = (| write_enable);
|
||||
|
||||
@@ -48,30 +41,40 @@ module VX_tag_data_store #(
|
||||
valid[i] <= 0;
|
||||
dirty[i] <= 0;
|
||||
end
|
||||
end else if (!stall_bank_pipe) begin
|
||||
if (do_write) begin
|
||||
valid[write_addr] <= 1;
|
||||
tag [write_addr] <= tag_index;
|
||||
if (write_fill) begin
|
||||
dirty[write_addr] <= 0;
|
||||
dirtyb[write_addr] <= 0;
|
||||
end else begin
|
||||
dirty[write_addr] <= 1;
|
||||
dirtyb[write_addr] <= dirtyb[write_addr] | write_enable;
|
||||
end
|
||||
end else if (fill_sent) begin
|
||||
dirty[write_addr] <= 0;
|
||||
dirtyb[write_addr] <= 0;
|
||||
end
|
||||
|
||||
if (invalidate) begin
|
||||
if (do_write) begin
|
||||
assert(!invalidate);
|
||||
dirty[write_addr] <= !write_fill;
|
||||
valid[write_addr] <= 1;
|
||||
end else if (invalidate) begin
|
||||
valid[write_addr] <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren;
|
||||
assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}};
|
||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
|
||||
always @(posedge clk) begin
|
||||
if (do_write) begin
|
||||
dirtyb[write_addr] <= write_fill ? 0 : (dirtyb[write_addr] | write_enable);
|
||||
end
|
||||
end
|
||||
assign read_dirtyb = dirtyb [read_addr];
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`TAG_SELECT_BITS),
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(1),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) tags (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
.raddr(read_addr),
|
||||
.wren(do_write),
|
||||
.rden(1'b1),
|
||||
.din(tag_index),
|
||||
.dout(read_tag)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8),
|
||||
@@ -79,11 +82,11 @@ module VX_tag_data_store #(
|
||||
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
) data (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
.raddr(read_addr),
|
||||
.wren(ram_wren),
|
||||
.wren(write_enable),
|
||||
.rden(1'b1),
|
||||
.din(write_data),
|
||||
.dout(read_data)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
module VX_generic_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 2,
|
||||
parameter BUFFERED = 1,
|
||||
parameter BUFFERED = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1)
|
||||
) (
|
||||
|
||||
@@ -8,21 +8,18 @@ module VX_priority_encoder #(
|
||||
output wire valid_out
|
||||
);
|
||||
reg [`LOG2UP(N)-1:0] data_out_r;
|
||||
reg valid_out_r;
|
||||
|
||||
always @(*) begin
|
||||
data_out_r = 0;
|
||||
valid_out_r = 0;
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
if (data_in[i]) begin
|
||||
data_out_r = `LOG2UP(N)'(i);
|
||||
valid_out_r = 1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_out_r;
|
||||
assign valid_out = valid_out_r;
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
endmodule
|
||||
@@ -10,6 +10,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
@@ -44,7 +45,7 @@ gen-s:
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace-fst --trace-threads 1 $(DBG)
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs --trace-threads 1 $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
@@ -53,7 +54,7 @@ gen-m:
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md:
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace-fst --trace-threads 1 $(DBG)
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs --trace-threads 1 $(DBG)
|
||||
|
||||
gen-mt:
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
||||
@@ -30,9 +30,9 @@ Simulator::Simulator() {
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedFstC();
|
||||
trace_ = new VerilatedVcdC();
|
||||
vortex_->trace(trace_, 99);
|
||||
trace_->open("trace.fst");
|
||||
trace_->open("trace.vcd");
|
||||
#endif
|
||||
|
||||
// reset the device
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "verilated.h"
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
#include <verilated_fst_c.h>
|
||||
#include <verilated_vcd_c.h>
|
||||
#endif
|
||||
|
||||
#include <VX_config.h>
|
||||
@@ -76,6 +76,6 @@ private:
|
||||
RAM *ram_;
|
||||
VVortex *vortex_;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedFstC *trace_;
|
||||
VerilatedVcdC *trace_;
|
||||
#endif
|
||||
};
|
||||
1
hw/unit_tests/cache/Makefile
vendored
1
hw/unit_tests/cache/Makefile
vendored
@@ -8,6 +8,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_CACHE_DATA \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_OPAE
|
||||
|
||||
|
||||
Reference in New Issue
Block a user