diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 72ca9d26..99d3cbb6 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -21,7 +21,7 @@ endif SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp -RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/generic_cache -I../../hw/rtl/shared_memory -I../../hw/rtl/pipe_regs -I../../hw/rtl/compat +RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/shared_memory -I../../hw/rtl/pipe_regs VL_FLAGS += -DNDEBUG --assert -Wall -Wpedantic diff --git a/hw/Makefile b/hw/Makefile index 1547e8d2..42c94001 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -4,7 +4,7 @@ CF += -std=c++11 -fms-extensions VF += -compiler gcc --language 1800-2009 --assert -Wall -Wpedantic -INCLUDE = -I./rtl/ -I./rtl/shared_memory -I./rtl/cache -I./rtl/generic_cache -I./rtl/generic_cache/interfaces -I./rtl/interfaces/ -I./rtl/pipe_regs/ -I./rtl/compat/ -I./rtl/simulate +INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/shared_memory -I./rtl/simulate SINGLE_CORE = Vortex.v diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 2f4453ad..4017ec31 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -6,7 +6,7 @@ vortex_afu.json +incdir+../rtl +incdir+../rtl/shared_memory +incdir+../rtl/cache -+incdir+../rtl/generic_cache ++incdir+../rtl/cache +incdir+../rtl/interfaces +incdir+../rtl/pipe_regs +incdir+../rtl/compat @@ -14,7 +14,7 @@ vortex_afu.json ../rtl/VX_user_config.vh ../rtl/VX_config.vh ../rtl/VX_define.vh -../rtl/generic_cache/VX_cache_config.vh +../rtl/cache/VX_cache_config.vh ../rtl/Vortex_Socket.v ../rtl/Vortex_Cluster.v ../rtl/Vortex.v @@ -32,87 +32,84 @@ vortex_afu.json ../rtl/VX_countones.v ../rtl/VX_csr_handler.v ../rtl/VX_csr_pipe.v -../rtl/VX_generic_queue_ll.v ../rtl/VX_warp_scheduler.v -../rtl/VX_priority_encoder.v -../rtl/VX_generic_queue.v -../rtl/pipe_regs/VX_f_d_reg.v -../rtl/pipe_regs/VX_i_d_reg.v -../rtl/pipe_regs/VX_d_e_reg.v ../rtl/VX_gpr.v ../rtl/VX_gpr_stage.v ../rtl/VX_dmem_controller.v ../rtl/VX_alu.v -../rtl/VX_generic_stack.v -../rtl/VX_generic_priority_encoder.v ../rtl/VX_csr_data.v ../rtl/VX_lsu.v ../rtl/VX_decode.v ../rtl/VX_inst_multiplex.v ../rtl/VX_csr_wrapper.v -../rtl/VX_priority_encoder_w_mask.v -../rtl/VX_generic_register.v ../rtl/VX_lsu_addr_gen.v -../rtl/compat/VX_mult.v -../rtl/compat/VX_divide.v -../rtl/generic_cache/VX_snp_fwd_arb.v -../rtl/generic_cache/VX_cache_dram_req_arb.v -../rtl/generic_cache/VX_cache_dfq_queue.v -../rtl/generic_cache/VX_cache_wb_sel_merge.v -../rtl/generic_cache/VX_mrv_queue.v -../rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v -../rtl/generic_cache/VX_tag_data_access.v -../rtl/generic_cache/generic_cache.v -../rtl/generic_cache/VX_cache_core_req_bank_sel.v -../rtl/generic_cache/VX_cache_req_queue.v -../rtl/generic_cache/VX_bank.v -../rtl/generic_cache/VX_cache_miss_resrv.v -../rtl/generic_cache/VX_fill_invalidator.v -../rtl/generic_cache/VX_tag_data_structure.v -../rtl/generic_cache/VX_prefetcher.v -../rtl/cache/VX_generic_pe.v -../rtl/cache/cache_set.v -../rtl/cache/VX_d_cache.v -../rtl/cache/VX_Cache_Bank.v -../rtl/cache/VX_cache_data_per_index.v -../rtl/cache/VX_d_cache_encapsulate.v -../rtl/cache/VX_cache_bank_valid.v -../rtl/cache/VX_cache_data.v + +../rtl/interfaces/VX_exec_unit_req_if.v +../rtl/interfaces/VX_branch_response_if.v +../rtl/interfaces/VX_inst_meta_if.v +../rtl/interfaces/VX_join_if.v +../rtl/interfaces/VX_icache_response_if.v +../rtl/interfaces/VX_inst_exec_wb_if.v +../rtl/interfaces/VX_gpu_dcache_dram_req_if.v +../rtl/interfaces/VX_csr_req_if.v +../rtl/interfaces/VX_icache_request_if.v +../rtl/interfaces/VX_gpu_dcache_rsp_if.v +../rtl/interfaces/VX_frE_to_bckE_req_if.v +../rtl/interfaces/VX_dram_req_rsp_if.v +../rtl/interfaces/VX_dcache_request_if.v +../rtl/interfaces/VX_gpr_data_if.v +../rtl/interfaces/VX_dcache_response_if.v +../rtl/interfaces/VX_csr_wb_if.v +../rtl/interfaces/VX_gpu_dcache_req_if.v +../rtl/interfaces/VX_lsu_req_if.v +../rtl/interfaces/VX_gpu_snp_req_rsp.v +../rtl/interfaces/VX_mw_wb_if.v +../rtl/interfaces/VX_gpr_jal_if.v +../rtl/interfaces/VX_gpu_inst_req_if.v +../rtl/interfaces/VX_wstall_if.v +../rtl/interfaces/VX_wb_if.v +../rtl/interfaces/VX_gpr_read_if.v +../rtl/interfaces/VX_mem_req_if.v +../rtl/interfaces/VX_jal_response_if.v +../rtl/interfaces/VX_warp_ctl_if.v +../rtl/interfaces/VX_gpu_dcache_snp_req_if.v +../rtl/interfaces/VX_gpu_dcache_dram_rsp_if.v +../rtl/interfaces/VX_inst_mem_wb_if.v + +../rtl/pipe_regs/VX_f_d_reg.v +../rtl/pipe_regs/VX_i_d_reg.v +../rtl/pipe_regs/VX_d_e_reg.v + +../rtl/cache/VX_snp_fwd_arb.v +../rtl/cache/VX_cache_dram_req_arb.v +../rtl/cache/VX_cache_dfq_queue.v +../rtl/cache/VX_cache_wb_sel_merge.v +../rtl/cache/VX_mrv_queue.v +../rtl/cache/VX_dcache_llv_resp_bank_sel.v +../rtl/cache/VX_tag_data_access.v +../rtl/cache/cache.v +../rtl/cache/VX_cache_core_req_bank_sel.v +../rtl/cache/VX_cache_req_queue.v +../rtl/cache/VX_bank.v +../rtl/cache/VX_cache_miss_resrv.v +../rtl/cache/VX_fill_invalidator.v +../rtl/cache/VX_tag_data_structure.v +../rtl/cache/VX_prefetcher.v + ../rtl/shared_memory/VX_shared_memory_block.v ../rtl/shared_memory/VX_priority_encoder_sm.v ../rtl/shared_memory/VX_shared_memory.v ../rtl/shared_memory/VX_bank_valids.v -../rtl/interfaces/VX_exec_unit_req_inter.v -../rtl/interfaces/VX_branch_response_inter.v -../rtl/interfaces/VX_inst_meta_inter.v -../rtl/interfaces/VX_join_inter.v -../rtl/interfaces/VX_icache_response_inter.v -../rtl/interfaces/VX_inst_exec_wb_inter.v -../rtl/interfaces/VX_gpu_dcache_dram_req_inter.v -../rtl/interfaces/VX_csr_req_inter.v -../rtl/interfaces/VX_icache_request_inter.v -../rtl/interfaces/VX_gpu_dcache_rsp_inter.v -../rtl/interfaces/VX_frE_to_bckE_req_inter.v -../rtl/interfaces/VX_dram_req_rsp_inter.v -../rtl/interfaces/VX_dcache_request_inter.v -../rtl/interfaces/VX_gpr_data_inter.v -../rtl/interfaces/VX_dcache_response_inter.v -../rtl/interfaces/VX_csr_wb_inter.v -../rtl/interfaces/VX_gpu_dcache_req_inter.v -../rtl/interfaces/VX_lsu_req_inter.v -../rtl/interfaces/VX_gpu_snp_req_rsp.v -../rtl/interfaces/VX_mw_wb_inter.v -../rtl/interfaces/VX_gpr_jal_inter.v -../rtl/interfaces/VX_gpu_inst_req_inter.v -../rtl/interfaces/VX_wstall_inter.v -../rtl/interfaces/VX_wb_inter.v -../rtl/interfaces/VX_gpr_read_inter.v -../rtl/interfaces/VX_mem_req_inter.v -../rtl/interfaces/VX_jal_response_inter.v -../rtl/interfaces/VX_warp_ctl_inter.v -../rtl/interfaces/VX_gpu_dcache_snp_req_inter.v -../rtl/interfaces/VX_gpu_dcache_dram_rsp_inter.v -../rtl/interfaces/VX_inst_mem_wb_inter.v + +../rtl/libs/VX_priority_encoder_w_mask.v +../rtl/libs/VX_generic_register.v +../rtl/libs/VX_mult.v +../rtl/libs/VX_divide.v +../rtl/libs/VX_generic_stack.v +../rtl/libs/VX_generic_priority_encoder.v +../rtl/libs/VX_priority_encoder.v +../rtl/libs/VX_generic_queue.v +../rtl/libs/VX_generic_queue_ll.v ccip_interface_reg.sv ccip_std_afu.sv diff --git a/hw/rtl/cache/Makefile b/hw/rtl/cache/Makefile deleted file mode 100644 index e1247633..00000000 --- a/hw/rtl/cache/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -all: RUNFILE - - -VERILATOR: - verilator --compiler gcc --Wno-UNOPTFLAT -Wall --trace -cc VX_d_cache_encapsulate.v -Iinterfaces/ --exe d_cache_test_bench.cpp -CFLAGS -std=c++11 - -RUNFILE: VERILATOR - (cd obj_dir && make -j -f VVX_d_cache_encapsulate.mk) - -clean: - rm ./obj_dir/* - diff --git a/hw/rtl/cache/Notes b/hw/rtl/cache/Notes deleted file mode 100644 index 0458c659..00000000 --- a/hw/rtl/cache/Notes +++ /dev/null @@ -1,46 +0,0 @@ -Notes - - -8 kB L1 Data Cache | 16 kB L1 I cache (maybe) -[tag index offset_remaining_block bank wordOffset], use a blocksize of 128 bytes between memory and cache. So each bank gets 16 bytes. - total offset is b its - 4 bits new offset, 2 bits block, 2 bits word offset - xxxxxxxIIIIIIIIoobbbyy - 9876543210 - bbbyyyyy - o = index into block offset - b = bank - y = word offset - I = index into cach - 6 bits indexes (64 indeces) No ways || 16 indexes with 4 ways - Rest of the bits are tag bits - -blocks / banks = 16 bytes, 8 banks. 128 bytes. 256 indexes (height). width is 16 bytes. 4 words per block (per bank). 17 bit tag - -gtkwave ___.vcd - - -// Splitting it up - -// word byte -wire[127:0][3:0] data_from_ram; - - -// word byte bank -wire[15:0][3:0] bank_data_n[3:0] - -integer i; -for (i = 0; i < something; i+=8) -{ - bank_data_n[0][i/8] = data_from_ram[i+0] - bank_data_n[1][i/8] = data_from_ram[i+1] - bank_data_n[2][i/8] = data_from_ram[i+2] - bank_data_n[3][i/8] = data_from_ram[i+3] - bank_data_n[4][i/8] = data_from_ram[i+4] - bank_data_n[5][i/8] = data_from_ram[i+5] - bank_data_n[6][i/8] = data_from_ram[i+6] - bank_data_n[7][i/8] = data_from_ram[i+7] -} - - -With Cache. If miss. Go to memory, grab all data, replace that data in the cache. Generate a new request, feed that into the cache (this one will hit), return that diff --git a/hw/rtl/cache/VX_Cache_Bank.v b/hw/rtl/cache/VX_Cache_Bank.v deleted file mode 100644 index 68c681c2..00000000 --- a/hw/rtl/cache/VX_Cache_Bank.v +++ /dev/null @@ -1,240 +0,0 @@ -// To Do: Change way_id_out to an internal register which holds when in between access and finished. -// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default -// Also make sure all possible output states are transmitted back to the bank correctly - -`include "VX_define.vh" - -module VX_Cache_Bank - #( - parameter CACHE_SIZE = 4096, // Bytes - parameter CACHE_WAYS = 1, - parameter CACHE_BLOCK = 128, // Bytes - parameter CACHE_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 8, - parameter LOG_NUM_REQ = 3, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter OFFSET_SIZE_START = 0, - parameter OFFSET_SIZE_END = 1, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7, - parameter ADDR_TAG_START = 15, - parameter ADDR_TAG_END = 31, - parameter ADDR_OFFSET_START = 5, - parameter ADDR_OFFSET_END = 6, - parameter ADDR_IND_START = 7, - parameter ADDR_IND_END = 14 - ) - ( - clk, - rst, - state, - read_or_write, // Read = 0 | Write = 1 - i_p_mem_read, - i_p_mem_write, - valid_in, - //write_from_mem, - actual_index, - o_tag, - block_offset, - writedata, - fetched_writedata, - - byte_select, - - readdata, - hit, - //miss, - - eviction_wb, // Need to evict - eviction_addr, // What's the eviction tag - - data_evicted, - evicted_way - ); - - // localparam NUM_BANKS = `CACHE_BANKS; - // localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4); - // localparam NUM_INDEXES = `NUM_IND; - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - - localparam BLOCK_BITS = `LOG2UP(CACHE_BLOCK); - // Inputs - input wire rst; - input wire clk; - input wire [3:0] state; -//input wire write_from_mem; - - // Reading Data - input wire[IND_SIZE_END:IND_SIZE_START] actual_index; - - - input wire[TAG_SIZE_END:TAG_SIZE_START] o_tag; // When write_from_mem = 1, o_tag is the new tag - input wire[OFFSET_SIZE_END:OFFSET_SIZE_START] block_offset; - - - input wire[31:0] writedata; - input wire valid_in; - input wire read_or_write; // Specifies if it is a read or write operation - - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] fetched_writedata; - input wire[2:0] i_p_mem_read; - input wire[2:0] i_p_mem_write; - input wire[1:0] byte_select; - - - input wire[CACHE_WAY_INDEX-1:0] evicted_way; - - // Outputs - // Normal shit - output wire[31:0] readdata; - output wire hit; - //output wire miss; - - // Eviction Data (Notice) - output wire eviction_wb; // Need to evict - output wire[31:0] eviction_addr; // What's the eviction tag - - // Eviction Data (Extraction) - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_evicted; - - - - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use; - wire[TAG_SIZE_END:TAG_SIZE_START] tag_use; - wire[TAG_SIZE_END:TAG_SIZE_START] eviction_tag; - wire valid_use; - wire dirty_use; - wire access; - wire write_from_mem; - wire miss; // -10/21 - - - - wire[CACHE_WAY_INDEX-1:0] way_to_update; - - assign miss = (tag_use != o_tag) && valid_use && valid_in; - - - assign data_evicted = data_use; - - // assign eviction_wb = miss && (dirty_use != 1'b0) && valid_use; - assign eviction_wb = (dirty_use != 1'b0); - assign eviction_tag = tag_use; - assign access = (state == CACHE_IDLE) && valid_in; - assign write_from_mem = (state == RECIV_MEM_RSP) && valid_in; // TODO - assign hit = (access && (tag_use == o_tag) && valid_use); - //assign eviction_addr = {eviction_tag, actual_index, block_offset, 5'b0}; // Fix with actual data - assign eviction_addr = {eviction_tag, actual_index, {(BLOCK_BITS){1'b0}}}; // Fix with actual data - - wire lw = (i_p_mem_read == `LW_MEM_READ); - wire lb = (i_p_mem_read == `LB_MEM_READ); - wire lh = (i_p_mem_read == `LH_MEM_READ); - wire lhu = (i_p_mem_read == `LHU_MEM_READ); - wire lbu = (i_p_mem_read == `LBU_MEM_READ); - - wire sw = (i_p_mem_write == `SW_MEM_WRITE); - wire sb = (i_p_mem_write == `SB_MEM_WRITE); - wire sh = (i_p_mem_write == `SH_MEM_WRITE); - - wire b0 = (byte_select == 0); - wire b1 = (byte_select == 1); - wire b2 = (byte_select == 2); - wire b3 = (byte_select == 3); - - wire[31:0] data_unQual = (b0 || lw) ? (data_use[block_offset] ) : - b1 ? (data_use[block_offset] >> 8) : - b2 ? (data_use[block_offset] >> 16) : - (data_use[block_offset] >> 24); - - wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF); - wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF); - wire[31:0] lbu_data = (data_unQual & 32'hFF); - wire[31:0] lhu_data = (data_unQual & 32'hFFFF); - wire[31:0] lw_data = (data_unQual); - - wire[31:0] sw_data = writedata; - - wire[31:0] sb_data = b1 ? {{16{1'b0}}, writedata[7:0], { 8{1'b0}}} : - b2 ? {{ 8{1'b0}}, writedata[7:0], {16{1'b0}}} : - b3 ? {{ 0{1'b0}}, writedata[7:0], {24{1'b0}}} : - writedata; - - wire[31:0] sh_data = b2 ? {writedata[15:0], {16{1'b0}}} : writedata; - - wire[31:0] use_write_data = sb ? sb_data : - sh ? sh_data : - sw_data; - - - wire[31:0] data_Qual = lb ? lb_data : - lh ? lh_data : - lhu ? lhu_data : - lbu ? lbu_data : - lw_data; - - assign readdata = (access) ? data_Qual : 32'b0; // Fix with actual data - - wire[3:0] sb_mask = (b0 ? 4'b0001 : (b1 ? 4'b0010 : (b2 ? 4'b0100 : 4'b1000))); - wire[3:0] sh_mask = (b0 ? 4'b0011 : 4'b1100); - - wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write; - genvar g; - generate - for (g = 0; g < NUM_WORDS_PER_BLOCK; g = g + 1) begin : write_enables - wire normal_write = (read_or_write && ((access && (block_offset == g))) && !miss); - - assign we[g] = (write_from_mem) ? 4'b1111 : - (normal_write && sw) ? 4'b1111 : - (normal_write && sb) ? sb_mask : - (normal_write && sh) ? sh_mask : - 4'b0000; - - // assign we[g] = (normal_write || (write_from_mem)) ? 1'b1 : 1'b0; - assign data_write[g] = write_from_mem ? fetched_writedata[g] : use_write_data; - assign way_to_update = evicted_way; - end - endgenerate - - VX_cache_data_per_index #( - .CACHE_WAYS (CACHE_WAYS), - .NUM_IND (NUM_IND), - .CACHE_WAY_INDEX (CACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK(NUM_WORDS_PER_BLOCK), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END)) data_structures( - .clk (clk), - .rst (rst), - .valid_in (valid_in), - .state (state), - // Inputs - .addr (actual_index), - .we (we), - .evict (write_from_mem), - .data_write (data_write), - .tag_write (o_tag), - .way_to_update(way_to_update), - // Outputs - .tag_use (tag_use), - .data_use (data_use), - .valid_use (valid_use), - .dirty_use (dirty_use) - ); - -endmodule - - - - diff --git a/hw/rtl/generic_cache/VX_bank.v b/hw/rtl/cache/VX_bank.v similarity index 100% rename from hw/rtl/generic_cache/VX_bank.v rename to hw/rtl/cache/VX_bank.v diff --git a/hw/rtl/generic_cache/VX_cache.v b/hw/rtl/cache/VX_cache.v similarity index 99% rename from hw/rtl/generic_cache/VX_cache.v rename to hw/rtl/cache/VX_cache.v index 11596000..8863d83e 100644 --- a/hw/rtl/generic_cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -241,7 +241,7 @@ module VX_cache #( // Snoop Forward Logic VX_snp_fwd_arb #( .NUM_BANKS(NUM_BANKS) - ) snp_fwd_arb( + ) snp_fwd_arb ( .per_bank_snp_fwd_valid (per_bank_snp_fwd_valid), .per_bank_snp_fwd_addr (per_bank_snp_fwd_addr), .per_bank_snp_fwd_pop (per_bank_snp_fwd_pop), diff --git a/hw/rtl/cache/VX_cache_bank_valid.v b/hw/rtl/cache/VX_cache_bank_valid.v deleted file mode 100644 index ff0b95f1..00000000 --- a/hw/rtl/cache/VX_cache_bank_valid.v +++ /dev/null @@ -1,30 +0,0 @@ -`include "VX_define.vh" - -module VX_cache_bank_valid -#( - parameter NUM_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 1 -) -( - input wire [NUM_REQ-1:0] i_p_valid, - input wire [NUM_REQ-1:0][31:0] i_p_addr, - output reg [NUM_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks -); - - generate - integer t_id; - always @(*) begin - thread_track_banks = 0; - for (t_id = 0; t_id < NUM_REQ; t_id = t_id + 1) - begin - if (NUM_BANKS != 1) begin - thread_track_banks[i_p_addr[t_id][2+LOG_NUM_BANKS-1:2]][t_id] = i_p_valid[t_id]; - end else begin - thread_track_banks[0][t_id] = i_p_valid[t_id]; - end - end - end - endgenerate - -endmodule diff --git a/hw/rtl/generic_cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh similarity index 100% rename from hw/rtl/generic_cache/VX_cache_config.vh rename to hw/rtl/cache/VX_cache_config.vh diff --git a/hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v similarity index 100% rename from hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v rename to hw/rtl/cache/VX_cache_core_req_bank_sel.v diff --git a/hw/rtl/cache/VX_cache_data.v b/hw/rtl/cache/VX_cache_data.v deleted file mode 100644 index 0cefc07f..00000000 --- a/hw/rtl/cache/VX_cache_data.v +++ /dev/null @@ -1,212 +0,0 @@ -`include "VX_define.vh" - -module VX_cache_data #( - parameter NUM_IND = 8, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7 -) ( - input wire clk, rst, // Clock - -// `ifdef PARAM - // Addr - input wire[IND_SIZE_END:IND_SIZE_START] addr, - // WE - input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, - input wire evict, - // Data - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, - input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write, - - output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use, - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, - output wire valid_use, - output wire dirty_use -// `else -// // Addr -// input wire[7:0] addr, -// // WE -// input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, -// input wire evict, -// // Data -// input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data -// input wire[16:0] tag_write, - - -// output wire[16:0] tag_use, -// output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, -// output wire valid_use, -// output wire dirty_use -// `endif -); - //localparam NUM_BANKS = CACHE_BANKS; - //localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4); - //localparam NUM_INDEXES = NUM_IND; - - wire currently_writing = (|we); - wire update_dirty = ((!dirty_use) && currently_writing) || (evict); - wire dirt_new = evict ? 0 : (|we); - -`ifndef SYN - // (3:0) 4 bytes - reg[NUM_WORDS_PER_BLOCK-1:0][3:0][7:0] data[NUM_IND-1:0]; // Actual Data - reg[TAG_SIZE_END:TAG_SIZE_START] tag[NUM_IND-1:0]; - reg valid[NUM_IND-1:0]; - reg dirty[NUM_IND-1:0]; - - // 16 bytes - assign data_use = data[addr]; // Read Port - assign tag_use = tag[addr]; - assign valid_use = valid[addr]; - assign dirty_use = dirty[addr]; - - integer f; - integer ini_ind; - always @(posedge clk, posedge rst) begin : update_all - if (rst) begin - for (ini_ind = 0; ini_ind < NUM_IND; ini_ind=ini_ind+1) begin - //data[ini_ind] <= 0; - //tag[ini_ind] <= 0; - valid[ini_ind] <= 0; - //dirty[ini_ind] <= 0; - end - end else begin - if (update_dirty) dirty[addr] <= dirt_new; // WRite Port - if (evict) tag[addr] <= tag_write; - if (evict) valid[addr] <= 1; - - for (f = 0; f < NUM_WORDS_PER_BLOCK; f = f + 1) begin - if (we[f][0]) data[addr][f][0] <= data_write[f][7 :0 ]; - if (we[f][1]) data[addr][f][1] <= data_write[f][15:8 ]; - if (we[f][2]) data[addr][f][2] <= data_write[f][23:16]; - if (we[f][3]) data[addr][f][3] <= data_write[f][31:24]; - end - end - end - -`else - - wire[IND_SIZE_END:IND_SIZE_START] use_addr = addr; - - wire cena = 1; - - wire cenb_d = (|we); - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_d = data_write; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] write_bit_mask_d; - wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_d; - genvar cur_b; - for (cur_b = 0; cur_b < NUM_WORDS_PER_BLOCK; cur_b=cur_b+1) begin - assign write_bit_mask_d[cur_b] = {32{~we[cur_b]}}; - end - assign data_use = data_out_d; - - // Using ASIC MEM - `IGNORE_WARNINGS_BEGIN - rf2_32x128_wm1 data ( - .CENYA(), - .AYA(), - .CENYB(), - .WENYB(), - .AYB(), - .QA(data_out_d), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena), - .AA(use_addr), - .CLKB(clk), - .CENB(cenb_d), - .WENB(write_bit_mask_d), - .AB(use_addr), - .DB(wdata_d), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - .TWENB(128'b0), - .TAB(5'b0), - .TDB(128'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - `IGNORE_WARNINGS_END - - wire[16:0] old_tag; - wire old_valid; - wire old_dirty; - - wire[16:0] new_tag = evict ? tag_write : old_tag; - wire new_valid = evict ? 1 : old_valid; - wire new_dirty = update_dirty ? dirt_new : old_dirty; - - wire cenb_m = (evict || update_dirty); - wire[19-1:0][31:0] write_bit_mask_m = cenb_m ? 19'b0 : 19'b1; - - // Try to fix the error in memory conneciton, modified by Lingjun Zhu on Oct. 28 2019 - // wire[NUM_WORDS_PER_BLOCK-1:0][31:0] wdata_m = {new_tag, new_dirty, new_valid}; - // wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_out_m; - - wire[19-1:0] wdata_m = {new_tag, new_dirty, new_valid}; - - wire[19-1:0] data_out_m; - - assign {old_tag, old_dirty, old_valid} = data_out_m; - - - assign dirty_use = old_dirty; - assign valid_use = old_valid; - assign tag_use = old_tag; - - `IGNORE_WARNINGS_BEGIN - rf2_32x19_wm0 meta ( - .CENYA(), - .AYA(), - .CENYB(), - // .WENYB(), - .AYB(), - .QA(data_out_m), - .SOA(), - .SOB(), - .CLKA(clk), - .CENA(cena), - .AA(use_addr), - .CLKB(clk), - .CENB(cenb_m), - // .WENB(write_bit_mask_m), - .AB(use_addr), - .DB(wdata_m), - .EMAA(3'b011), - .EMASA(1'b0), - .EMAB(3'b011), - .TENA(1'b1), - .TCENA(1'b0), - .TAA(5'b0), - .TENB(1'b1), - .TCENB(1'b0), - // .TWENB(128'b0), - .TAB(5'b0), - .TDB(19'b0), - .RET1N(1'b1), - .SIA(2'b0), - .SEA(1'b0), - .DFTRAMBYP(1'b0), - .SIB(2'b0), - .SEB(1'b0), - .COLLDISN(1'b1) - ); - `IGNORE_WARNINGS_END -`endif - -endmodule diff --git a/hw/rtl/cache/VX_cache_data_per_index.v b/hw/rtl/cache/VX_cache_data_per_index.v deleted file mode 100644 index 0f824edc..00000000 --- a/hw/rtl/cache/VX_cache_data_per_index.v +++ /dev/null @@ -1,165 +0,0 @@ -`include "VX_define.vh" - -module VX_cache_data_per_index - #( - parameter CACHE_WAYS = 1, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7 - ) - ( - input wire clk, // Clock - input wire rst, - input wire valid_in, - input wire [3:0] state, - // Addr - input wire[IND_SIZE_END:IND_SIZE_START] addr, - // WE - input wire[NUM_WORDS_PER_BLOCK-1:0][3:0] we, - input wire evict, - input wire[CACHE_WAY_INDEX-1:0] way_to_update, - // Data - input wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_write, // Update Data - input wire[TAG_SIZE_END:TAG_SIZE_START] tag_write, - - - output wire[TAG_SIZE_END:TAG_SIZE_START] tag_use, - output wire[NUM_WORDS_PER_BLOCK-1:0][31:0] data_use, - output wire valid_use, - output wire dirty_use - -); - //localparam NUM_BANKS = CACHE_BANKS; - //localparam CACHE_BLOCK_PER_BANK = (CACHE_BLOCK / CACHE_BANKS); - // localparam NUM_WORDS_PER_BLOCK = CACHE_BLOCK / (CACHE_BANKS*4); - //localparam NUM_INDEXES = `DCACHE_NUM_IND; - - wire [CACHE_WAYS-1:0][TAG_SIZE_END:TAG_SIZE_START] tag_use_per_way; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_use_per_way; - wire [CACHE_WAYS-1:0] valid_use_per_way; - wire [CACHE_WAYS-1:0] dirty_use_per_way; - wire [CACHE_WAYS-1:0] hit_per_way; - // reg [CACHE_WAY_INDEX-1:0] eviction_way_index; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][3:0] we_per_way; - wire [CACHE_WAYS-1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] data_write_per_way; - wire [CACHE_WAYS-1:0] write_from_mem_per_way; - wire invalid_found; - - wire [CACHE_WAY_INDEX-1:0] way_index; - wire [CACHE_WAY_INDEX-1:0] invalid_index; - - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - generate - if(CACHE_WAYS != 1) begin - VX_generic_priority_encoder #(.N(CACHE_WAYS)) valid_index - ( - .valids(~valid_use_per_way), - .index (invalid_index), - .found (invalid_found) - ); - - VX_generic_priority_encoder #(.N(CACHE_WAYS)) way_indexing - ( - .valids(hit_per_way), - .index (way_index), - .found () - ); - end - else begin - assign way_index = 0; - assign invalid_found = (valid_use_per_way == 1'b0) ? 1 : 0; - assign invalid_index = 0; - end - endgenerate - - - - - // wire hit = |hit_per_way; - // wire miss = ~hit; - // wire update = |we && !miss; - // wire valid = &valid_use_per_way; - - wire[CACHE_WAY_INDEX-1:0] way_use_Qual; - - assign way_use_Qual = (state != CACHE_IDLE) ? way_to_update : way_index; - - assign tag_use = tag_use_per_way[way_use_Qual]; - assign data_use = data_use_per_way[way_use_Qual]; - assign valid_use = valid_use_per_way[way_use_Qual]; - assign dirty_use = dirty_use_per_way[way_use_Qual]; - - // assign tag_use = hit ? tag_use_per_way[way_index] : (valid ? tag_use_per_way[eviction_way_index] : (invalid_found ? tag_use_per_way[invalid_index] : 0)); - // assign data_use = hit ? data_use_per_way[way_index] : (valid ? data_use_per_way[eviction_way_index] : (invalid_found ? data_use_per_way[invalid_index] : 0)); - // assign valid_use = hit ? valid_use_per_way[way_index] : (valid ? valid_use_per_way[eviction_way_index] : (invalid_found ? valid_use_per_way[invalid_index] : 0)); - // assign dirty_use = hit ? dirty_use_per_way[way_index] : (valid ? dirty_use_per_way[eviction_way_index] : (invalid_found ? dirty_use_per_way[invalid_index] : 0)); - - - - genvar ways; - generate - for(ways=0; ways < CACHE_WAYS; ways = ways + 1) begin : each_way - - - assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0; - - - assign write_from_mem_per_way[ways] = evict && (ways == way_use_Qual); - assign we_per_way[ways] = (ways == way_use_Qual) ? (we) : 0; - assign data_write_per_way[ways] = data_write; - - - // assign hit_per_way[ways] = ((valid_use_per_way[ways] == 1'b1) && (tag_use_per_way[ways] == tag_write)) ? 1'b1 : 0; - - // assign we_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? (we) : 0) : 0; - // assign data_write_per_way[ways] = (evict == 1'b1) || (update == 1'b1) ? ((ways == way_use_Qual) ? data_write : 0) : 0; - // assign write_from_mem_per_way[ways] = (evict == 1'b1) ? ((ways == way_use_Qual) ? 1 : 0) : 0; - - VX_cache_data #( - .NUM_IND (NUM_IND), - .NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END)) data_structures( - .clk (clk), - .rst (rst), - // Inputs - .addr (addr), - .we (we_per_way[ways]), - .evict (write_from_mem_per_way[ways]), - .data_write(data_write_per_way[ways]), - .tag_write (tag_write), - // Outputs - .tag_use (tag_use_per_way[ways]), - .data_use (data_use_per_way[ways]), - .valid_use (valid_use_per_way[ways]), - .dirty_use (dirty_use_per_way[ways]) - ); - end - endgenerate - - // always @(posedge clk or posedge rst) begin - // if (rst) begin - // eviction_way_index <= 0; - // end else begin - // // if((miss && dirty_use && valid_use && !evict && valid_in)) begin // can be either evict or invalid cache entries - // if((state == SEND_MEM_REQ)) begin // can be either evict or invalid cache entries - // if((eviction_way_index+1) == CACHE_WAYS) begin - // eviction_way_index <= 0; - // end else begin - // eviction_way_index <= (eviction_way_index + 1); - // end - // end - // end - // end - -endmodule diff --git a/hw/rtl/generic_cache/VX_cache_dfq_queue.v b/hw/rtl/cache/VX_cache_dfq_queue.v similarity index 100% rename from hw/rtl/generic_cache/VX_cache_dfq_queue.v rename to hw/rtl/cache/VX_cache_dfq_queue.v diff --git a/hw/rtl/generic_cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v similarity index 100% rename from hw/rtl/generic_cache/VX_cache_dram_req_arb.v rename to hw/rtl/cache/VX_cache_dram_req_arb.v diff --git a/hw/rtl/generic_cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v similarity index 100% rename from hw/rtl/generic_cache/VX_cache_miss_resrv.v rename to hw/rtl/cache/VX_cache_miss_resrv.v diff --git a/hw/rtl/generic_cache/VX_cache_req_queue.v b/hw/rtl/cache/VX_cache_req_queue.v similarity index 100% rename from hw/rtl/generic_cache/VX_cache_req_queue.v rename to hw/rtl/cache/VX_cache_req_queue.v diff --git a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v b/hw/rtl/cache/VX_cache_wb_sel_merge.v similarity index 99% rename from hw/rtl/generic_cache/VX_cache_wb_sel_merge.v rename to hw/rtl/cache/VX_cache_wb_sel_merge.v index aee74767..d9f40050 100644 --- a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v +++ b/hw/rtl/cache/VX_cache_wb_sel_merge.v @@ -65,6 +65,7 @@ module VX_cache_wb_sel_merge #( ); reg [NUM_BANKS-1:0] per_bank_wb_pop_unqual; + assign per_bank_wb_pop = per_bank_wb_pop_unqual & {NUM_BANKS{core_rsp_ready}}; // wire[NUM_BANKS-1:0] bank_wants_wb; diff --git a/hw/rtl/cache/VX_d_cache.v b/hw/rtl/cache/VX_d_cache.v deleted file mode 100644 index 85fb1427..00000000 --- a/hw/rtl/cache/VX_d_cache.v +++ /dev/null @@ -1,389 +0,0 @@ -// Cache Memory (8way 4word) // -// i_ means input port // -// o_ means output port // -// _p_ means data exchange with processor // -// _m_ means data exchange with memory // - - -// TO DO: -// - Send in a response from memory of what the data is from the test bench - -`include "VX_define.vh" -//`include "VX_Cache_Bank.v" -//`include "VX_cache_bank_valid.v" -//`include "VX_priority_encoder.v" -//`include "VX_priority_encoder_w_mask.v" - -module VX_d_cache - #( - parameter CACHE_SIZE = 4096, // Bytes - parameter CACHE_WAYS = 1, - parameter CACHE_BLOCK = 128, // Bytes - parameter CACHE_BANKS = 8, - parameter LOG_NUM_BANKS = 3, - parameter NUM_REQ = 8, - parameter LOG_NUM_REQ = 3, - parameter NUM_IND = 8, - parameter CACHE_WAY_INDEX = 1, - parameter NUM_WORDS_PER_BLOCK = 4, - parameter OFFSET_SIZE_START = 0, - parameter OFFSET_SIZE_END = 1, - parameter TAG_SIZE_START = 0, - parameter TAG_SIZE_END = 16, - parameter IND_SIZE_START = 0, - parameter IND_SIZE_END = 7, - parameter ADDR_TAG_START = 15, - parameter ADDR_TAG_END = 31, - parameter ADDR_OFFSET_START = 5, - parameter ADDR_OFFSET_END = 6, - parameter ADDR_IND_START = 7, - parameter ADDR_IND_END = 14, - parameter MEM_ADDR_REQ_MASK = 32'hffffffc0 - ) - ( - clk, - rst, - i_p_addr, - //i_p_byte_en, - i_p_writedata, - i_p_read_or_write, // 0 = Read | 1 = Write - i_p_mem_read, - i_p_mem_write, - i_p_valid, - //i_p_write, - o_p_readdata, - o_p_delay, // 0 = all threads done | 1 = Still threads that need to - - o_m_evict_addr, - o_m_read_addr, - - o_m_writedata, - - o_m_read_or_write, // 0 = Read | 1 = Write - o_m_valid, - i_m_readdata, - - i_m_ready - ); - - //parameter NUM_BANKS = `CACHE_BANKS; - //localparam NUM_WORDS_PER_BLOCK = `CACHE_BLOCK / (`CACHE_BANKS*4); - - //localparam CACHE_BLOCK_PER_BANK = (`CACHE_BLOCK / `CACHE_BANKS); - - localparam CACHE_IDLE = 0; // Idle - localparam SEND_MEM_REQ = 1; // Write back this block into memory - localparam RECIV_MEM_RSP = 2; - - - //parameter cache_entry = 9; - input wire clk, rst; - input wire [NUM_REQ-1:0] i_p_valid; - input wire [NUM_REQ-1:0][31:0] i_p_addr; // FIXME - input wire [NUM_REQ-1:0][31:0] i_p_writedata; - input wire i_p_read_or_write; //, i_p_write; - output reg [NUM_REQ-1:0][31:0] o_p_readdata; - output wire o_p_delay; - output reg [31:0] o_m_evict_addr; // Address is xxxxxxxxxxoooobbbyy - output reg [31:0] o_m_read_addr; - output reg o_m_valid; - output reg[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata; - output reg o_m_read_or_write; //, o_m_write; - input wire[CACHE_BANKS - 1:0][NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata; - input wire i_m_ready; - - input wire[2:0] i_p_mem_read; - input wire[2:0] i_p_mem_write; - - - // Buffer for final data - reg [NUM_REQ-1:0][31:0] final_data_read; - reg [NUM_REQ-1:0][31:0] new_final_data_read; - wire[NUM_REQ-1:0][31:0] new_final_data_read_Qual; - - assign o_p_readdata = new_final_data_read_Qual; - - - reg[CACHE_WAY_INDEX-1:0] global_way_to_evict; - - - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] thread_track_banks; // Valid thread mask per bank - wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] index_per_bank; // Index of thread each bank will try to service - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] use_mask_per_bank; // A mask of index_per_bank - wire[CACHE_BANKS - 1 : 0] valid_per_bank; // Valid request going to each bank - wire[CACHE_BANKS - 1 : 0][NUM_REQ-1:0] threads_serviced_per_bank; // Bank successfully serviced per bank - - wire[CACHE_BANKS-1:0][31:0] readdata_per_bank; // Data read from each bank - wire[CACHE_BANKS-1:0] hit_per_bank; // Whether each bank got a hit or a miss - wire[CACHE_BANKS-1:0] eviction_wb; - reg[CACHE_BANKS-1:0] eviction_wb_old; - - - // wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_new; - // reg [CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] evicted_way_old; - // wire[CACHE_BANKS -1 : 0][CACHE_WAY_INDEX-1:0] way_used; - - // Internal State - reg [3:0] state; - wire[3:0] new_state; - - wire[NUM_REQ-1:0] use_valid; // Valid used throught the code - reg[NUM_REQ-1:0] stored_valid; // Saving the threads still left (bank conflict or bank miss) - wire[NUM_REQ-1:0] new_stored_valid; // New stored valid - - - - reg[CACHE_BANKS - 1 : 0][31:0] eviction_addr_per_bank; - - reg[31:0] miss_addr; - // reg[31:0] evict_addr; - - wire curr_processor_request_valid = (|i_p_valid); - - - assign use_valid = (stored_valid == 0) ? i_p_valid : stored_valid; - - - - - - - VX_cache_bank_valid #(.NUM_BANKS (CACHE_BANKS), - .LOG_NUM_BANKS (LOG_NUM_BANKS), - .NUM_REQ (NUM_REQ)) multip_banks( - .i_p_valid (use_valid), - .i_p_addr (i_p_addr), - .thread_track_banks(thread_track_banks) - ); - - - reg[NUM_REQ-1:0] threads_serviced_Qual; - - reg[NUM_REQ-1:0] debug_hit_per_bank_mask[CACHE_BANKS-1:0]; - - genvar bid; - generate - for (bid = 0; bid < CACHE_BANKS; bid=bid+1) begin : chooose_threads - wire[NUM_REQ-1:0] use_threads_track_banks = thread_track_banks[bid]; - wire[LOG_NUM_REQ-1:0] use_thread_index = index_per_bank[bid]; - wire use_write_final_data = hit_per_bank[bid]; - wire[31:0] use_data_final_data = readdata_per_bank[bid]; - VX_priority_encoder_w_mask #(.N(NUM_REQ)) choose_thread( - .valids(use_threads_track_banks), - .mask (use_mask_per_bank[bid]), - .index (index_per_bank[bid]), - .found (valid_per_bank[bid]) - ); - - assign debug_hit_per_bank_mask[bid] = {NUM_REQ{hit_per_bank[bid]}}; - assign threads_serviced_per_bank[bid] = use_mask_per_bank[bid] & debug_hit_per_bank_mask[bid]; - end - endgenerate - - integer test_bid; - always @(*) begin - new_final_data_read = 0; - for (test_bid=0; test_bid < CACHE_BANKS; test_bid=test_bid+1) - begin - if (hit_per_bank[test_bid]) begin - new_final_data_read[index_per_bank[test_bid]] = readdata_per_bank[test_bid]; - end - end - end - - - wire[CACHE_BANKS - 1 : 0] detect_bank_miss; - //assign threads_serviced_Qual = threads_serviced_per_bank[0] | threads_serviced_per_bank[1] | - // threads_serviced_per_bank[2] | threads_serviced_per_bank[3] | - // threads_serviced_per_bank[4] | threads_serviced_per_bank[5] | - // threads_serviced_per_bank[6] | threads_serviced_per_bank[7]; - integer bbid; - always @(*) begin - threads_serviced_Qual = 0; - for (bbid = 0; bbid < CACHE_BANKS; bbid=bbid+1) - begin - threads_serviced_Qual = threads_serviced_Qual | threads_serviced_per_bank[bbid]; - end - end - - - - genvar tid; - generate - for (tid = 0; tid < NUM_REQ; tid =tid+1) begin : new_final_data_read_Qual_setup - assign new_final_data_read_Qual[tid] = threads_serviced_Qual[tid] ? new_final_data_read[tid] : final_data_read[tid]; - end - endgenerate - - - assign detect_bank_miss = (valid_per_bank & ~hit_per_bank); - - wire delay; - assign delay = (new_stored_valid != 0) || (state != CACHE_IDLE); // add other states - - assign o_p_delay = delay; - - wire[CACHE_BANKS - 1 : 0][LOG_NUM_REQ-1:0] send_index_to_bank = index_per_bank; - - - wire[LOG_NUM_BANKS-1:0] miss_bank_index; - wire miss_found; - VX_generic_priority_encoder #(.N(CACHE_BANKS)) get_miss_index - ( - .valids(detect_bank_miss), - .index (miss_bank_index), - .found (miss_found) - ); - - - - assign new_state = ((state == CACHE_IDLE) && (|detect_bank_miss)) ? SEND_MEM_REQ : - (state == SEND_MEM_REQ) ? RECIV_MEM_RSP : - ((state == RECIV_MEM_RSP) && !i_m_ready) ? RECIV_MEM_RSP : - CACHE_IDLE; - - // Handle if there is more than one miss - assign new_stored_valid = use_valid & (~threads_serviced_Qual); - - - wire update_global_way_to_evict = ((state == RECIV_MEM_RSP) && (new_state == CACHE_IDLE)) && (CACHE_WAYS > 1); - -/////////////////////////////////////////////////////////////////////// - genvar cur_t; - integer init_b; - always @(posedge clk, posedge rst) begin - if (rst) begin - final_data_read <= 0; - // new_final_data_read = 0; - state <= 0; - stored_valid <= 0; - // eviction_addr_per_bank <= 0; - miss_addr <= 0; - // evict_addr <= 0; - // threads_serviced_Qual = 0; - // for (init_b = 0; init_b < NUM_BANKS; init_b=init_b+1) - // begin - // debug_hit_per_bank_mask[init_b] <= 0; - // end - // evicted_way_old <= 0; - // eviction_wb_old <= 0; - global_way_to_evict <= 0; - - end else begin - - global_way_to_evict <= (update_global_way_to_evict) ? (global_way_to_evict+1) : global_way_to_evict; - - state <= new_state; - - stored_valid <= new_stored_valid; - - if (state == CACHE_IDLE) begin - if (miss_found) begin - miss_addr <= i_p_addr[send_index_to_bank[miss_bank_index]]; - // evict_addr <= eviction_addr_per_bank[miss_bank_index]; - end else begin - miss_addr <= 0; - // evict_addr <= 0; - end - end - - final_data_read <= new_final_data_read_Qual; - // evicted_way_old <= evicted_way_new; - // eviction_wb_old <= eviction_wb; - end - end - - - genvar bank_id; - generate - for (bank_id = 0; bank_id < CACHE_BANKS; bank_id = bank_id + 1) begin : cache_banks - wire[31:0] bank_addr = (state == SEND_MEM_REQ) ? miss_addr : - (state == RECIV_MEM_RSP) ? miss_addr : - i_p_addr[send_index_to_bank[bank_id]]; - - // assign evicted_way_new[bank_id] = (state == SEND_MEM_REQ) ? way_used[bank_id] : - // (state == RECIV_MEM_RSP) ? evicted_way_old[bank_id] : - // 0; - - wire[1:0] byte_select = bank_addr[1:0]; - wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START]; - - `ifdef SYN_FUNC - wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0; - wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0; - `else - wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START]; - wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START]; - `endif - - - wire normal_valid_in = valid_per_bank[bank_id]; - wire use_valid_in = ((state == RECIV_MEM_RSP) && i_m_ready) ? 1'b1 : - ((state == RECIV_MEM_RSP) && !i_m_ready) ? 1'b0 : - ((state == SEND_MEM_REQ)) ? 1'b0 : - normal_valid_in; - - - VX_Cache_Bank #( - .CACHE_SIZE (CACHE_SIZE), - .CACHE_WAYS (CACHE_WAYS), - .CACHE_BLOCK (CACHE_BLOCK), - .CACHE_BANKS (CACHE_BANKS), - .LOG_NUM_BANKS (LOG_NUM_BANKS), - .NUM_REQ (NUM_REQ), - .LOG_NUM_REQ (LOG_NUM_REQ), - .NUM_IND (NUM_IND), - .CACHE_WAY_INDEX (CACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK (NUM_WORDS_PER_BLOCK), - .OFFSET_SIZE_START (OFFSET_SIZE_START), - .OFFSET_SIZE_END (OFFSET_SIZE_END), - .TAG_SIZE_START (TAG_SIZE_START), - .TAG_SIZE_END (TAG_SIZE_END), - .IND_SIZE_START (IND_SIZE_START), - .IND_SIZE_END (IND_SIZE_END), - .ADDR_TAG_START (ADDR_TAG_START), - .ADDR_TAG_END (ADDR_TAG_END), - .ADDR_OFFSET_START (ADDR_OFFSET_START), - .ADDR_OFFSET_END (ADDR_OFFSET_END), - .ADDR_IND_START (ADDR_IND_START), - .ADDR_IND_END (ADDR_IND_END) - ) bank_structure ( - .clk (clk), - .rst (rst), - .state (state), - .valid_in (use_valid_in), - .actual_index (cache_index), - .o_tag (cache_tag), - .block_offset (cache_offset), - .writedata (i_p_writedata[send_index_to_bank[bank_id]]), - .read_or_write (i_p_read_or_write), - .i_p_mem_read (i_p_mem_read), - .i_p_mem_write (i_p_mem_write), - .byte_select (byte_select), - .hit (hit_per_bank[bank_id]), - .readdata (readdata_per_bank[bank_id]), // Data read - .eviction_addr (eviction_addr_per_bank[bank_id]), - .data_evicted (o_m_writedata[bank_id]), - .eviction_wb (eviction_wb[bank_id]), // Something needs to be written back - .fetched_writedata(i_m_readdata[bank_id]), // Data From memory - .evicted_way (global_way_to_evict) - ); - - end - endgenerate - - // Mem Rsp - - // Req to mem: - assign o_m_evict_addr = (eviction_addr_per_bank[0]) & MEM_ADDR_REQ_MASK; // Could be anything because tag+index are same - assign o_m_read_addr = miss_addr & MEM_ADDR_REQ_MASK; - assign o_m_valid = (state == SEND_MEM_REQ); - assign o_m_read_or_write = (state == SEND_MEM_REQ) && (|eviction_wb); - //end - -endmodule - - - - - diff --git a/hw/rtl/cache/VX_d_cache_encapsulate.v b/hw/rtl/cache/VX_d_cache_encapsulate.v deleted file mode 100644 index 135e4a5a..00000000 --- a/hw/rtl/cache/VX_d_cache_encapsulate.v +++ /dev/null @@ -1,115 +0,0 @@ -`include "VX_define.vh" - -`define NUM_WORDS_PER_BLOCK 4 - -module VX_d_cache_encapsulate ( - clk, - rst, - - i_p_initial_request, - i_p_addr, - i_p_writedata, - i_p_read_or_write, - i_p_valid, - - o_p_readdata, - o_p_readdata_valid, - o_p_waitrequest, - - o_m_addr, - o_m_writedata, - o_m_read_or_write, - o_m_valid, - - i_m_readdata, - i_m_ready -); - - parameter NUM_BANKS = 8; - - - - - //parameter cache_entry = 9; - input wire clk, rst; - - input wire i_p_valid[`NUM_THREADS-1:0]; - input wire [31:0] i_p_addr[`NUM_THREADS-1:0]; - input wire i_p_initial_request; - input wire [31:0] i_p_writedata[`NUM_THREADS-1:0]; - input wire i_p_read_or_write; - - input wire [31:0] i_m_readdata[NUM_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; - input wire i_m_ready; - - output reg [31:0] o_p_readdata[`NUM_THREADS-1:0]; - output reg o_p_readdata_valid[`NUM_THREADS-1:0] ; - output reg o_p_waitrequest; - - output reg [31:0] o_m_addr; - output reg o_m_valid; - output reg [31:0] o_m_writedata[NUM_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; - output reg o_m_read_or_write; - - - // Inter - wire [`NUM_THREADS-1:0] i_p_valid_if; - wire [`NUM_THREADS-1:0][31:0] i_p_addr_if; - wire [`NUM_THREADS-1:0][31:0] i_p_writedata_if; - - reg [`NUM_THREADS-1:0][31:0] o_p_readdata_if; - reg [`NUM_THREADS-1:0] o_p_readdata_valid_if; - - reg[NUM_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_if; - wire[NUM_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_if; - - - genvar curr_thraed, curr_bank, curr_word; - generate - for (curr_thraed = 0; curr_thraed < `NUM_THREADS; curr_thraed = curr_thraed + 1) begin : threads - assign i_p_valid_if[curr_thraed] = i_p_valid[curr_thraed]; - assign i_p_addr_if[curr_thraed] = i_p_addr[curr_thraed]; - assign i_p_writedata_if[curr_thraed] = i_p_writedata[curr_thraed]; - assign o_p_readdata[curr_thraed] = o_p_readdata_if[curr_thraed]; - assign o_p_readdata_valid[curr_thraed] = o_p_readdata_valid_if[curr_thraed]; - end - - for (curr_bank = 0; curr_bank < NUM_BANKS; curr_bank = curr_bank + 1) begin : banks - for (curr_word = 0; curr_word < `NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin : words - - assign o_m_writedata[curr_bank][curr_word] = o_m_writedata_if[curr_bank][curr_word]; - assign i_m_readdata_if[curr_bank][curr_word] = i_m_readdata[curr_bank][curr_word]; - - end - end - endgenerate - -VX_d_cache dcache( - .clk (clk), - .rst (rst), - .i_p_valid (i_p_valid_if), - .i_p_addr (i_p_addr_if), - .i_p_initial_request(i_p_initial_request), - .i_p_writedata (i_p_writedata_if), - .i_p_read_or_write (i_p_read_or_write), - .o_p_readdata (o_p_readdata_if), - .o_p_readdata_valid (o_p_readdata_valid_if), - .o_p_waitrequest (o_p_waitrequest), - .o_m_addr (o_m_addr), - .o_m_valid (o_m_valid), - .o_m_writedata (o_m_writedata_if), - .o_m_read_or_write (o_m_read_or_write), - .i_m_readdata (i_m_readdata_if), - .i_m_ready (i_m_ready) - ); - - -endmodule - - - - - - - - diff --git a/hw/rtl/cache/VX_d_cache_tb.v b/hw/rtl/cache/VX_d_cache_tb.v deleted file mode 100644 index 530ce792..00000000 --- a/hw/rtl/cache/VX_d_cache_tb.v +++ /dev/null @@ -1,58 +0,0 @@ -`include "VX_define.vh" -`include "VX_d_cache.v" - -module VX_d_cache_tb; - - parameter NUM_BANKS = 8; - - reg clk, reset, im_ready; - reg [`NUM_THREADS-1:0] i_p_valid; - reg [`NUM_THREADS-1:0][13:0] i_p_addr; // FIXME - reg i_p_initial_request; - reg [`NUM_THREADS-1:0][31:0] i_p_writedata; - reg i_p_read_or_write; //, i_p_write; - reg [`NUM_THREADS-1:0][31:0] o_p_readdata; - reg [`NUM_THREADS-1:0] o_p_readdata_valid; - reg o_p_waitrequest; - reg [13:0] o_m_addr; // Only one address is sent out at a time to memory - reg o_m_valid; - reg [(NUM_BANKS * 32) - 1:0] o_m_writedata; - reg o_m_read_or_write; //, o_m_write; - reg [(NUM_BANKS * 32) - 1:0] i_m_readdata; // Read Data that is passed from the memory module back to the controller - - - VX_d_cache d_cache(.clk(clk), - .rst(reset), - .i_p_initial_request(i_p_initial_request), - .i_p_addr(i_p_addr), - .i_p_writedata(i_p_writedata), - .i_p_read_or_write(i_p_read_or_write), // 0 = Read | 1 = Write - .i_p_valid(i_p_valid), - .o_p_readdata(o_p_readdata), - .o_p_readdata_valid(o_p_readdata_valid), - .o_p_waitrequest(o_p_waitrequest), // 0 = all threads done | 1 = Still threads that need to - .o_m_addr(o_m_addr), - .o_m_writedata(o_m_writedata), - .o_m_read_or_write(o_m_read_or_write), // 0 = Read | 1 = Write - .o_m_valid(o_m_valid), - .i_m_readdata(i_m_readdata), - .i_m_ready(im_ready) - //cnt_r, - //cnt_w, - //cnt_hit_r, - //cnt_hit_w - ); - - - - initial - begin - clk = 0; - reset = 0; - - end - - always - #5 clk = ! clk; - -endmodule \ No newline at end of file diff --git a/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v b/hw/rtl/cache/VX_dcache_llv_resp_bank_sel.v similarity index 100% rename from hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v rename to hw/rtl/cache/VX_dcache_llv_resp_bank_sel.v diff --git a/hw/rtl/generic_cache/VX_fill_invalidator.v b/hw/rtl/cache/VX_fill_invalidator.v similarity index 100% rename from hw/rtl/generic_cache/VX_fill_invalidator.v rename to hw/rtl/cache/VX_fill_invalidator.v diff --git a/hw/rtl/cache/VX_generic_pe.v b/hw/rtl/cache/VX_generic_pe.v deleted file mode 100644 index 4ff3cc17..00000000 --- a/hw/rtl/cache/VX_generic_pe.v +++ /dev/null @@ -1,24 +0,0 @@ -module VX_generic_pe - #( - parameter N = 8 - ) - ( - input wire[N-1:0] valids, - output reg[$clog2(N)-1:0] index, - output reg found - ); - -parameter my_secret = 0; - - integer i; - always @(*) begin - index = 0; - found = 0; - for (i = N-1; i >= 0; i = i - 1) begin - if (valids[i]) begin - index = i[$clog2(N)-1:0]; - found = 1; - end - end - end -endmodule \ No newline at end of file diff --git a/hw/rtl/generic_cache/VX_mrv_queue.v b/hw/rtl/cache/VX_mrv_queue.v similarity index 100% rename from hw/rtl/generic_cache/VX_mrv_queue.v rename to hw/rtl/cache/VX_mrv_queue.v diff --git a/hw/rtl/generic_cache/VX_prefetcher.v b/hw/rtl/cache/VX_prefetcher.v similarity index 100% rename from hw/rtl/generic_cache/VX_prefetcher.v rename to hw/rtl/cache/VX_prefetcher.v diff --git a/hw/rtl/generic_cache/VX_snp_fwd_arb.v b/hw/rtl/cache/VX_snp_fwd_arb.v similarity index 100% rename from hw/rtl/generic_cache/VX_snp_fwd_arb.v rename to hw/rtl/cache/VX_snp_fwd_arb.v diff --git a/hw/rtl/generic_cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v similarity index 100% rename from hw/rtl/generic_cache/VX_tag_data_access.v rename to hw/rtl/cache/VX_tag_data_access.v diff --git a/hw/rtl/generic_cache/VX_tag_data_structure.v b/hw/rtl/cache/VX_tag_data_structure.v similarity index 100% rename from hw/rtl/generic_cache/VX_tag_data_structure.v rename to hw/rtl/cache/VX_tag_data_structure.v diff --git a/hw/rtl/cache/cache_set.v b/hw/rtl/cache/cache_set.v deleted file mode 100644 index 1b95f022..00000000 --- a/hw/rtl/cache/cache_set.v +++ /dev/null @@ -1,233 +0,0 @@ -// To Do: Change way_id_out to an internal register which holds when in between access and finished. -// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default -// Also make sure all possible output states are transmitted back to the bank correctly - -// `include "VX_define.vh" -module cache_set(clk, - rst, - // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes - access, // First - find_evict, - write_from_mem, - idle, - // entry, - o_tag, - writedata, - //byte_en, - write, - //word_en, - //way_id_in, - //way_id_out, - readdata, - //wb_addr, - hit, - eviction_wb, - eviction_tag, - //eviction_data, - //modify, - miss - //valid_data - //read_miss - ); - - parameter cache_entry = 14; - parameter ways_per_set = 4; - - input wire clk, rst; - input wire access; - input wire find_evict; - input wire write_from_mem; - input wire idle; - //input wire [cache_entry-1:0] entry; - input wire [1:0] o_tag; - input wire [31:0] writedata; - //input wire [3:0] byte_en; - input wire write; // 0 == False - //input wire [3:0] word_en; - //input wire read_miss; - //input wire [1:0] way_id_in; - //output reg [1:0] way_id_out; - output reg [31:0] readdata; - //output reg [3:0] hit; - output reg hit; - output reg miss; - output wire eviction_wb; - output wire [1:0] eviction_tag; - reg [31:0] eviction_data; - //output wire [22:0] wb_addr; - //output wire modify, valid_data; - - - - //wire [2:0] i_tag; - //wire dirty; - //wire [24-cache_entry:0] write_tag_data; - - // Table for one set - reg [2:0] counter; // Determines which to evict - reg valid [ways_per_set-1:0]; - reg [1:0] tag [ways_per_set-1:0]; - reg clean [ways_per_set-1:0]; - reg [31:0] data [ways_per_set-1:0]; - - - assign eviction_wb = miss && clean[counter[1:0]] != 1'b1 && valid[counter[1:0]] == 1'b1; - assign eviction_tag = tag[counter[1:0]]; - //assign eviction_data = data[counter[1:0]]; - //assign hit = valid_data && (o_tag == i_tag); - //assign modify = valid_data && (o_tag != i_tag) && dirty; - //assign miss = !valid_data || ((o_tag != i_tag) && !dirty); - - //assign wb_addr = {i_tag, entry}; - always @(posedge clk) begin - if (rst) begin - - end - if (find_evict) begin - if (tag[0] == o_tag && valid[0]) begin - readdata <= data[0]; - end else if (tag[1] == o_tag && valid[1]) begin - readdata <= data[1]; - end else if (tag[2] == o_tag && valid[2]) begin - readdata <= data[2]; - end else if (tag[3] == o_tag && valid[3]) begin - readdata <= data[3]; - end - end else if (access) begin - //tag[`NUM_THREADS-1:0] <= i_p_addr[`NUM_THREADS-1:0][13:12]; - counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC - // Hit in First Column - if (tag[0] == o_tag && valid[0]) begin - if (write == 1'b0) begin // if it is a read - if (clean[0] == 1'b1 ) begin - //hit <= 4'b0001; - hit <= 1'b1; - readdata <= data[0]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; // SHOULD PROBABLY TRACK WHERE THIS MISS IS IN A DIFFERENT VARIABLE - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[0] <= writedata; - clean[0] <= 1'b0; - //hit <= 4'b0001; - hit <= 1'b1; - end - end - // Hit in Second Column - else if (tag[1] == o_tag && valid[1]) begin - if (write == 1'b0) begin // if it is a read - if (clean[1] == 1'b1 ) begin - //hit <= 4'b0010; - hit <= 1'b1; - readdata <= data[1]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[1] <= writedata; - clean[1] <= 1'b0; - //hit <= 4'b0010; - hit <= 1'b1; - end - end - // Hit in Third Column - else if (tag[2] == o_tag && valid[2]) begin - if (write == 1'b0) begin // if it is a read - if (clean[2] == 1'b1 ) begin - //hit <= 4'b0100; - hit <= 1'b1; - readdata <= data[2]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[2] <= writedata; - clean[2] <= 1'b0; - //hit <= 4'b0100; - hit <= 1'b1; - end - end - // Hit in Fourth Column - else if (tag[3] == o_tag && valid[3]) begin - if (write == 1'b0) begin // if it is a read - if (clean[3] == 1'b1 ) begin - //hit <= 4'b1000; - hit <= 1'b1; - readdata <= data[3]; - miss <= 1'b0; - end else begin - //hit <= 4'b0000; - hit <= 1'b0; - readdata <= 32'b0; - miss <= 1'b1; - end - end else if (write == 1'b1) begin - data[3] <= writedata; - clean[3] <= 1'b0; - //hit <= 4'b1000; - hit <= 1'b1; - end - end - // Miss - else begin - //way_id_out <= counter; - miss <= 1'b1; - if (write == 1'b0) begin // Read Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS - end else if (write == 1'b1) begin // Write Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= writedata; - end - end - - end - if (write_from_mem) begin - tag[counter[1:0]] <= o_tag; - valid[counter[1:0]] <= 1'b1; - hit <= 1'b1; - if (write == 1'b0) begin // Read Miss - clean[counter[1:0]] <= 1'b1; - data[counter[1:0]] <= 32'h7FF; // FIX WITH ACTUAL MEMORY ACCESS - end else if (write == 1'b1) begin // Write Miss - clean[counter[1:0]] <= 1'b0; - data[counter[1:0]] <= writedata; - end - end - if (idle) begin // Set "way" register equal to invalid value - hit <= 1'b1; // set to know it is ready - miss <= 1'b0; - readdata <= 32'hFFFFFFFF; - end - if (find_evict) begin // Keep "way" value the same !!!! Fix. Need to send back data with matching tag. Also need to ensure evicted data doesnt get lost - if (tag[3] == o_tag && valid[3]) begin - readdata <= data[3]; - end else if (tag[1] == o_tag && valid[1]) begin - readdata <= data[1]; - end else if (tag[2] == o_tag && valid[2]) begin - readdata <= data[2]; - end else if (tag[0] == o_tag && valid[0]) begin - readdata <= data[0]; - end else begin - readdata <= eviction_data; - end - hit <= 1'b1; - miss <= 1'b0; - end - counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC - eviction_data <= data[counter[1:0]]; - end - -endmodule \ No newline at end of file diff --git a/hw/rtl/cache/d_cache_test_bench.cpp b/hw/rtl/cache/d_cache_test_bench.cpp deleted file mode 100644 index e7fb3214..00000000 --- a/hw/rtl/cache/d_cache_test_bench.cpp +++ /dev/null @@ -1,29 +0,0 @@ - - -#include "d_cache_test_bench.h" - -//#define NUM_TESTS 46 - -int main(int argc, char **argv) -{ - - Verilated::commandArgs(argc, argv); - - Verilated::traceEverOn(true); - - - VX_d_cache v; - - - bool curr = v.simulate(); - //if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl; - //if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl; - if ( curr) std::cerr << GREEN << "Test Passed: " << std::endl; - if (!curr) std::cerr << RED << "Test Failed: " << std::endl; - - return 0; - -} - - - diff --git a/hw/rtl/cache/d_cache_test_bench.h b/hw/rtl/cache/d_cache_test_bench.h deleted file mode 100644 index a9ce9470..00000000 --- a/hw/rtl/cache/d_cache_test_bench.h +++ /dev/null @@ -1,353 +0,0 @@ -// C++ libraries -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "VX_define.h" -#include "VVX_d_cache_encapsulate.h" -#include "verilated.h" - -#include "d_cache_test_bench_debug.h" - - -#ifdef VCD_OUTPUT -#include -#endif - -// void set_Index (auto & var, int index, int size, auto val) -// { -// int real_shift -// } - -class VX_d_cache -{ - public: - VX_d_cache(); - ~VX_d_cache(); - bool simulate(); - bool operation(int, bool); - - VVX_d_cache_encapsulate * vx_d_cache_; - long int curr_cycle; - int stats_total_cycles = 0; - int stats_dram_accesses = 0; - #ifdef VCD_OUTPUT - VerilatedVcdC *m_trace; - #endif -}; - - - -VX_d_cache::VX_d_cache() : curr_cycle(0), stats_total_cycles(0), stats_dram_accesses(0) -{ - - this->vx_d_cache_ = new VVX_d_cache_encapsulate; -#ifdef VCD_OUTPUT - this->m_trace = new VerilatedVcdC; - this->vx_d_cache_->trace(m_trace, 99); - this->m_trace->open("trace.vcd"); -#endif - //this->results.open("../results.txt"); -} - -VX_d_cache::~VX_d_cache() -{ - delete this->vx_d_cache_; -#ifdef VCD_OUTPUT - m_trace->close(); -#endif -} - -bool VX_d_cache::operation(int counter_value, bool do_op) { - if (do_op) { - vx_d_cache_->i_p_initial_request = 1; - } else { - vx_d_cache_->i_p_initial_request = 0; - } - - if (counter_value == 0 && do_op) { // Write to bank 1-4 at index 64 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x7f6f8f6f; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001008; // bank 2 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x30010010; // bank 4 -- This is serviced 1st, then the other 3 banks are at once - } - } - - } else if (counter_value == 1 && do_op) { // Write to bank 4-7 at index 108 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0xd1d2d2d3; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001c14; // bank 5 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 6 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001c1c; // bank 7 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001c10; // bank 4 - } - } - - } else if (counter_value == 2 && do_op) { // Read from bank 1-4 at those indexes - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c18; // bank 5 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x3000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001c1c;; // bank 7 - } - } - } - } else if (counter_value == 3 && do_op) { // Write to Bank 1-5 (evictions will need to take place) - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; - } else { - vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; - } - } - } else if (counter_value == 4 && do_op) { // Read from addresses that were just overwritten above ^^^ - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x20001004; // bank 1 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x20001008; // bank 2 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x2000100c; // bank 3 - } else { - vx_d_cache_->i_p_addr[3] = 0x20001c14; // bank 5 - } - } - } - /* These will check writing multiple threads writing to the same block - } else if (counter_value == 3 && do_op) { // Write to Bank 0 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 1; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb0; - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb1; - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb2; - } else { - vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 - vx_d_cache_->i_p_writedata[j] = 0xaaaabbb3; - } - } - } else if (counter_value == 4 && do_op) { // Read from Bank 0 - vx_d_cache_->i_p_initial_request = 1; - vx_d_cache_->i_p_read_or_write = 0; - vx_d_cache_->i_m_ready = 0; - for (int j = 0; j < NT; j++) { - vx_d_cache_->i_p_valid[j] = 1; - vx_d_cache_->i_p_writedata[j] = 0x23232332; - vx_d_cache_->i_m_readdata[j][0] = 1; - if (j == 0) { - vx_d_cache_->i_p_addr[0] = 0x30001f00; // bank 0 - } else if (j == 1) { - vx_d_cache_->i_p_addr[1] = 0x30001c00; // bank 0 - } else if (j == 2) { - vx_d_cache_->i_p_addr[2] = 0x30001a00; // bank 0 - } else { - vx_d_cache_->i_p_addr[3] = 0x30001904; // bank 1 - } - } - } - */ - // Handle Memory Accesses - unsigned int read_data_from_mem = 0x1111 + counter_value + this->stats_total_cycles; - - if (vx_d_cache_->o_m_valid) { - this->stats_dram_accesses = this->stats_dram_accesses + 1; // (assuming memory access takes 20 cycles) - - this->stats_total_cycles += 1; - vx_d_cache_->clk = 0; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump(2*this->stats_total_cycles); - #endif - vx_d_cache_->clk = 1; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump((2*this->stats_total_cycles)+1); - #endif - - vx_d_cache_->i_m_ready = 1; - for (int j1 = 0; j1 < 8; j1++) { - for (int j2 = 0; j2 < 4; j2++) { - vx_d_cache_->i_m_readdata[j1][j2] = read_data_from_mem; - } - } - } else { - vx_d_cache_->i_m_ready = 0; - } - - - if (vx_d_cache_->o_p_waitrequest == 0) { - return true; - } else { - return false; - } - - -} - - -bool VX_d_cache::simulate() -{ - -// this->instruction_file_name = file_to_simulate; - // this->results << "\n****************\t" << file_to_simulate << "\t****************\n"; - -// this->ProcessFile(); - - // auto start_time = std::chrono::high_resolution_clock::now(); - - - //static bool stop = false; - //static int counter = 0; - //counter = 0; - //stop = false; - - // auto start_time = clock(); - - - vx_d_cache_->clk = 0; - vx_d_cache_->rst = 1; - //vortex->eval(); - //counter = 0; - vx_d_cache_->rst = 0; - - bool cont = false; - bool out_operation = false; - bool do_operation = true; - int other_counter = 0; - //while (this->stop && ((other_counter < 5))) - while (other_counter < 5) - { - - // std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; - // istop = ibus_driver(); - // dstop = !dbus_driver(); - - vx_d_cache_->clk = 1; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump(2*this->stats_total_cycles); - #endif - - //vortex->eval(); - //dstop = !dbus_driver(); - - out_operation = operation(other_counter, do_operation); - vx_d_cache_->clk = 0; - vx_d_cache_->eval(); - #ifdef VCD_OUTPUT - m_trace->dump((2*this->stats_total_cycles)+1); - #endif - //vortex->eval(); - - /* - // stop = istop && dstop; - stop = vortex->out_ebreak; - if (stop || cont) - { - cont = true; - counter++; - } else - { - counter = 0; - } - */ - if (out_operation) { - other_counter++; - do_operation = true; - } else { - do_operation = false; - } - ++(this->stats_total_cycles); - - if (this->stats_total_cycles > 5000) { - break; - } - - } - - std::cerr << "New Total Cycles: " << (this->stats_total_cycles + (this->stats_dram_accesses * 20)) << "\n"; - - //uint32_t status; - //ram.getWord(0, &status); - - //this->print_stats(); - - - - return (true); -} - - - - - - - - - diff --git a/hw/rtl/cache/d_cache_test_bench_debug.h b/hw/rtl/cache/d_cache_test_bench_debug.h deleted file mode 100644 index 54afa11a..00000000 --- a/hw/rtl/cache/d_cache_test_bench_debug.h +++ /dev/null @@ -1 +0,0 @@ -#define VCD_OUTPUT \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_gpu_dcache_dram_req_if.v b/hw/rtl/interfaces/VX_gpu_dcache_dram_req_if.v index 6a66923b..613db546 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_dram_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_dram_req_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_DRAM_DCACHE_REQ `define VX_GPU_DRAM_DCACHE_REQ -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_dcache_dram_req_if #( parameter BANK_LINE_WORDS = 2 diff --git a/hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_if.v b/hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_if.v index 5ef78c62..fc0d9956 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_if.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_dram_rsp_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_DRAM_DCACHE_RSP `define VX_GPU_DRAM_DCACHE_RSP -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_dcache_dram_rsp_if #( parameter BANK_LINE_WORDS = 2 diff --git a/hw/rtl/interfaces/VX_gpu_dcache_req_if.v b/hw/rtl/interfaces/VX_gpu_dcache_req_if.v index 0bad6788..75415295 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_req_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_DCACHE_REQ `define VX_GPU_DCACHE_REQ -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_dcache_req_if #( parameter NUM_REQUESTS = 32 diff --git a/hw/rtl/interfaces/VX_gpu_dcache_rsp_if.v b/hw/rtl/interfaces/VX_gpu_dcache_rsp_if.v index 2a8c96b0..c353e3da 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_rsp_if.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_rsp_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_DCACHE_RSP `define VX_GPU_DCACHE_RSP -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_dcache_rsp_if #( parameter NUM_REQUESTS = 32 diff --git a/hw/rtl/interfaces/VX_gpu_dcache_snp_req_if.v b/hw/rtl/interfaces/VX_gpu_dcache_snp_req_if.v index c4be1cb3..7e361ed2 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_snp_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_snp_req_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_SNP_REQ `define VX_GPU_SNP_REQ -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_dcache_snp_req_if (); // Snoop Req diff --git a/hw/rtl/interfaces/VX_gpu_snp_req_rsp_if.v b/hw/rtl/interfaces/VX_gpu_snp_req_rsp_if.v index e205b6e4..0818df75 100644 --- a/hw/rtl/interfaces/VX_gpu_snp_req_rsp_if.v +++ b/hw/rtl/interfaces/VX_gpu_snp_req_rsp_if.v @@ -1,7 +1,7 @@ `ifndef VX_GPU_SNP_REQ_RSP `define VX_GPU_SNP_REQ_RSP -`include "../generic_cache/VX_cache_config.vh" +`include "../cache/VX_cache_config.vh" interface VX_gpu_snp_req_rsp_if (); diff --git a/hw/rtl/compat/VX_divide.v b/hw/rtl/libs/VX_divide.v similarity index 100% rename from hw/rtl/compat/VX_divide.v rename to hw/rtl/libs/VX_divide.v diff --git a/hw/rtl/compat/VX_tb_divide.sv b/hw/rtl/libs/VX_divide_tb.v similarity index 100% rename from hw/rtl/compat/VX_tb_divide.sv rename to hw/rtl/libs/VX_divide_tb.v diff --git a/hw/rtl/VX_generic_priority_encoder.v b/hw/rtl/libs/VX_generic_priority_encoder.v similarity index 100% rename from hw/rtl/VX_generic_priority_encoder.v rename to hw/rtl/libs/VX_generic_priority_encoder.v diff --git a/hw/rtl/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v similarity index 100% rename from hw/rtl/VX_generic_queue.v rename to hw/rtl/libs/VX_generic_queue.v diff --git a/hw/rtl/VX_generic_queue_ll.v b/hw/rtl/libs/VX_generic_queue_ll.v similarity index 100% rename from hw/rtl/VX_generic_queue_ll.v rename to hw/rtl/libs/VX_generic_queue_ll.v diff --git a/hw/rtl/VX_generic_register.v b/hw/rtl/libs/VX_generic_register.v similarity index 100% rename from hw/rtl/VX_generic_register.v rename to hw/rtl/libs/VX_generic_register.v diff --git a/hw/rtl/VX_generic_stack.v b/hw/rtl/libs/VX_generic_stack.v similarity index 100% rename from hw/rtl/VX_generic_stack.v rename to hw/rtl/libs/VX_generic_stack.v diff --git a/hw/rtl/compat/VX_mult.v b/hw/rtl/libs/VX_mult.v similarity index 100% rename from hw/rtl/compat/VX_mult.v rename to hw/rtl/libs/VX_mult.v diff --git a/hw/rtl/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v similarity index 100% rename from hw/rtl/VX_priority_encoder.v rename to hw/rtl/libs/VX_priority_encoder.v diff --git a/hw/rtl/VX_priority_encoder_w_mask.v b/hw/rtl/libs/VX_priority_encoder_w_mask.v similarity index 100% rename from hw/rtl/VX_priority_encoder_w_mask.v rename to hw/rtl/libs/VX_priority_encoder_w_mask.v diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 8032ed6d..a8844494 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -57,7 +57,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - $(QUARTUS_ROOT)/quartus/bin/quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc vortex.sdc -inc "..;../interfaces;../pipe_regs;../cache;../generic_cache;../shared_memory;../compat" + $(QUARTUS_ROOT)/quartus/bin/quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc vortex.sdc -inc "..;../libs;../interfaces;../pipe_regs;../cache;../shared_memory" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/vx_cache/Makefile b/hw/syn/quartus/vx_cache/Makefile index 1aebc25a..57118708 100755 --- a/hw/syn/quartus/vx_cache/Makefile +++ b/hw/syn/quartus/vx_cache/Makefile @@ -1,6 +1,6 @@ PROJECT = VX_cache TOP_LEVEL_ENTITY = VX_cache -SRC_FILE = ../../../rtl/generic_cache/VX_cache.v +SRC_FILE = ../../../rtl/cache/VX_cache.v PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf QUARTUS_ROOT ?= /tools/reconfig/intel/18.0 diff --git a/hw/syn/yosys/diagram.ys b/hw/syn/yosys/diagram.ys index ffa3808c..2bd113a5 100644 --- a/hw/syn/yosys/diagram.ys +++ b/hw/syn/yosys/diagram.ys @@ -1,5 +1,5 @@ # load design -read_verilog -sv -I../../rtl -I../../rtl/interfaces -I../../rtl/cache -I../../rtl/generic_cache -I../../rtl/shared_memory -I../../rtl/pipe_regs -I../../rtl/compat ../../rtl/Vortex.v +read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/cache -I../../rtl/shared_memory -I../../rtl/pipe_regs ../../rtl/Vortex.v # dump diagram show diff --git a/hw/syn/yosys/synthesis.ys b/hw/syn/yosys/synthesis.ys index cf285a67..c04a41e3 100644 --- a/hw/syn/yosys/synthesis.ys +++ b/hw/syn/yosys/synthesis.ys @@ -1,5 +1,5 @@ # load design -read_verilog -sv -I../../rtl -I../../rtl/interfaces -I../../rtl/cache -I../../rtl/generic_cache -I../../rtl/shared_memory -I../../rtl/pipe_regs -I../../rtl/compat ../../rtl/Vortex.v +read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/cache -I../../rtl/shared_memory -I../../rtl/pipe_regs ../../rtl/Vortex.v # high-level synthesis proc; opt; fsm;; memory -nomap; opt