diff --git a/rtl/VX_define.v b/rtl/VX_define.v index ab496034..f9b9d9f5 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -262,7 +262,53 @@ // ========================================= Dcache Configurable Knobs ========================================= +// ========================================= Icache Configurable Knobs ========================================= +// General Cache Knobs + // Size of cache in bytes + `define ICACHE_SIZE_BYTES 1024 + // Size of line inside a bank in bytes + `define IBANK_LINE_SIZE_BYTES 16 + // Number of banks {1, 2, 4, 8,...} + `define INUMBER_BANKS 8 + // Size of a word in bytes + `define IWORD_SIZE_BYTES 4 + // Number of Word requests per cycle {1, 2, 4, 8, ...} + `define INUMBER_REQUESTS 1 + // Number of cycles to complete stage 1 (read from memory) + `define ISTAGE_1_CYCLES 2 + + // Bank Number of words in a line + `define IBANK_LINE_SIZE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES) + `define IBANK_LINE_SIZE_RNG `IBANK_LINE_SIZE_WORDS-1:0 +// Queues feeding into banks Knobs {1, 2, 4, 8, ...} + + // Core Request Queue Size + `define IREQQ_SIZE `NW + // Miss Reserv Queue Knob + `define IMRVQ_SIZE `DREQQ_SIZE + // Dram Fill Rsp Queue Size + `define IDFPQ_SIZE 2 + // Snoop Req Queue + `define ISNRQ_SIZE 8 + +// Queues for writebacks Knobs {1, 2, 4, 8, ...} + // Core Writeback Queue Size + `define ICWBQ_SIZE `DREQQ_SIZE + // Dram Writeback Queue Size + `define IDWBQ_SIZE 4 + // Dram Fill Req Queue Size + `define IDFQQ_SIZE `DREQQ_SIZE + // Lower Level Cache Hit Queue Size + `define ILLVQ_SIZE 0 + + // Fill Invalidator Size {Fill invalidator must be active} + `define IFILL_INVALIDAOR_SIZE 16 + +// Dram knobs + `define ISIMULATED_DRAM_LATENCY_CYCLES 10 + +// ========================================= Icache Configurable Knobs ========================================= diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 451f65d4..19b31caf 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -3,18 +3,22 @@ module VX_dmem_controller ( input wire clk, input wire reset, - // Dcache + + // Dram <-> Dcache VX_gpu_dcache_dram_req_inter VX_gpu_dcache_dram_req, VX_gpu_dcache_dram_res_inter VX_gpu_dcache_dram_res, + // Dram <-> Icache + VX_gpu_dcache_dram_req_inter VX_gpu_icache_dram_req, + VX_gpu_dcache_dram_res_inter VX_gpu_icache_dram_res, + + // Core <-> Dcache VX_gpu_dcache_res_inter VX_dcache_rsp, VX_gpu_dcache_req_inter VX_dcache_req, - - VX_dram_req_rsp_inter VX_dram_req_rsp_icache, - // MEM-Processor - VX_icache_request_inter VX_icache_req, - VX_icache_response_inter VX_icache_rsp + // Core <-> Icache + VX_gpu_dcache_res_inter VX_icache_rsp, + VX_gpu_dcache_req_inter VX_icache_req ); wire to_shm = VX_dcache_req.core_req_addr[0][31:24] == 8'hFF; @@ -30,17 +34,6 @@ module VX_dmem_controller ( wire[`NT_M1:0] cache_driver_out_valid; // Not used for now wire sm_delay; - // I_Cache Signals - - wire[31:0] icache_instruction_out; - wire icache_delay; - wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid; - wire[31:0] icache_driver_in_address = VX_icache_req.pc_address; - wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read; - wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write; - wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data; - wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid); - VX_shared_memory #( .SM_SIZE (`SHARED_MEMORY_SIZE), @@ -159,56 +152,86 @@ module VX_dmem_controller ( ); -VX_d_cache #( - .CACHE_SIZE (`ICACHE_SIZE), - .CACHE_WAYS (`ICACHE_WAYS), - .CACHE_BLOCK (`ICACHE_BLOCK), - .CACHE_BANKS (`ICACHE_BANKS), - .LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS), - .NUM_REQ (`ICACHE_NUM_REQ), - .LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ), - .NUM_IND (`ICACHE_NUM_IND), - .CACHE_WAY_INDEX (`ICACHE_WAY_INDEX), - .NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK), - .OFFSET_SIZE_START (`ICACHE_OFFSET_ST), - .OFFSET_SIZE_END (`ICACHE_OFFSET_ED), - .TAG_SIZE_START (`ICACHE_TAG_SIZE_START), - .TAG_SIZE_END (`ICACHE_TAG_SIZE_END), - .IND_SIZE_START (`ICACHE_IND_SIZE_START), - .IND_SIZE_END (`ICACHE_IND_SIZE_END), - .ADDR_TAG_START (`ICACHE_ADDR_TAG_START), - .ADDR_TAG_END (`ICACHE_ADDR_TAG_END), - .ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST), - .ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED), - .ADDR_IND_START (`ICACHE_IND_ST), - .ADDR_IND_END (`ICACHE_IND_ED), - .MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK) - ) icache + + VX_cache #( + .CACHE_SIZE_BYTES (`ICACHE_SIZE_BYTES), + .BANK_LINE_SIZE_BYTES (`IBANK_LINE_SIZE_BYTES), + .NUMBER_BANKS (`INUMBER_BANKS), + .WORD_SIZE_BYTES (`IWORD_SIZE_BYTES), + .NUMBER_REQUESTS (`INUMBER_REQUESTS), + .STAGE_1_CYCLES (`ISTAGE_1_CYCLES), + .REQQ_SIZE (`IREQQ_SIZE), + .MRVQ_SIZE (`IMRVQ_SIZE), + .DFPQ_SIZE (`IDFPQ_SIZE), + .SNRQ_SIZE (`ISNRQ_SIZE), + .CWBQ_SIZE (`ICWBQ_SIZE), + .DWBQ_SIZE (`IDWBQ_SIZE), + .DFQQ_SIZE (`IDFQQ_SIZE), + .LLVQ_SIZE (`ILLVQ_SIZE), + .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), + .SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES) + ) + gpu_icache ( - .clk (clk), - .rst (reset), - .i_p_valid (icache_driver_in_valid), - .i_p_addr (icache_driver_in_address), - .i_p_writedata (icache_driver_in_data), - .i_p_read_or_write (read_or_write_ic), - .i_p_mem_read (icache_driver_in_mem_read), - .i_p_mem_write (icache_driver_in_mem_write), - .o_p_readdata (icache_instruction_out), - .o_p_delay (icache_delay), - .o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr), - .o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr), - .o_m_valid (VX_dram_req_rsp_icache.o_m_valid), - .o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata), - .o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write), - .i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata), - .i_m_ready (VX_dram_req_rsp_icache.i_m_ready) - ); + .clk (clk), + .reset (reset), - // assign VX_dcache_rsp.in_cache_driver_out_data = (to_shm && 0) ? sm_driver_out_data : cache_driver_out_data; - // assign VX_dcache_rsp.delay = (sm_delay && 0) || cache_delay; + // Core req + .core_req_valid (VX_icache_req.core_req_valid), + .core_req_addr (VX_icache_req.core_req_addr), + .core_req_writedata(VX_icache_req.core_req_writedata), + .core_req_mem_read (VX_icache_req.core_req_mem_read), + .core_req_mem_write(VX_icache_req.core_req_mem_write), + .core_req_rd (VX_icache_req.core_req_rd), + .core_req_wb (VX_icache_req.core_req_wb), + .core_req_warp_num (VX_icache_req.core_req_warp_num), + .core_req_pc (VX_icache_req.core_req_pc), + + // Delay Core Req + .delay_req (VX_icache_rsp.delay_req), + + // Core Cache Can't WB + .core_no_wb_slot (VX_icache_req.core_no_wb_slot), + + // Cache CWB + .core_wb_valid (VX_icache_rsp.core_wb_valid), + .core_wb_req_rd (VX_icache_rsp.core_wb_req_rd), + .core_wb_req_wb (VX_icache_rsp.core_wb_req_wb), + .core_wb_warp_num (VX_icache_rsp.core_wb_warp_num), + .core_wb_readdata (VX_icache_rsp.core_wb_readdata), + .core_wb_pc (VX_icache_rsp.core_wb_pc), + + // DRAM response + .dram_fill_rsp (VX_gpu_icache_dram_res.dram_fill_rsp), + .dram_fill_rsp_addr(VX_gpu_icache_dram_res.dram_fill_rsp_addr), + .dram_fill_rsp_data(VX_gpu_icache_dram_res.dram_fill_rsp_data), + + // DRAM accept response + .dram_fill_accept (VX_gpu_icache_dram_req.dram_fill_accept), + + // DRAM Req + .dram_req (VX_gpu_icache_dram_req.dram_req), + .dram_req_write (VX_gpu_icache_dram_req.dram_req_write), + .dram_req_read (VX_gpu_icache_dram_req.dram_req_read), + .dram_req_addr (VX_gpu_icache_dram_req.dram_req_addr), + .dram_req_size (VX_gpu_icache_dram_req.dram_req_size), + .dram_req_data (VX_gpu_icache_dram_req.dram_req_data), + + // Snoop Response + .dram_req_because_of_wb(VX_gpu_icache_dram_req.dram_because_of_snp), + .dram_snp_full (VX_gpu_icache_dram_req.dram_snp_full), + + // Snoop Request + .snp_req (0), + .snp_req_addr (0), + + // LLVQ stuff + .llvq_pop (Dllvq_pop), + .llvq_valid (Dllvq_valid), + .llvq_res_addr (Dllvq_res_addr), + .llvq_res_data (Dllvq_res_data) + ); - assign VX_icache_rsp.instruction = icache_instruction_out; - assign VX_icache_rsp.delay = icache_delay; endmodule diff --git a/rtl/VX_front_end.v b/rtl/VX_front_end.v index 0ab8288f..902e9a5c 100644 --- a/rtl/VX_front_end.v +++ b/rtl/VX_front_end.v @@ -8,8 +8,8 @@ module VX_front_end ( VX_warp_ctl_inter VX_warp_ctl, - VX_icache_response_inter icache_response_fe, - VX_icache_request_inter icache_request_fe, + VX_gpu_dcache_res_inter VX_icache_rsp, + VX_gpu_dcache_req_inter VX_icache_req, VX_jal_response_inter VX_jal_rsp, VX_branch_response_inter VX_branch_rsp, @@ -76,15 +76,16 @@ VX_f_d_reg vx_f_i_reg( ); VX_icache_stage VX_icache_stage( - .clk (clk), - .reset (reset), - .icache_stage_delay(icache_stage_delay), + .clk (clk), + .reset (reset), + .total_freeze (total_freeze), + .icache_stage_delay (icache_stage_delay), .icache_stage_valids(icache_stage_valids), - .icache_stage_wid (icache_stage_wid), - .fe_inst_meta_fi (fe_inst_meta_fi2), - .fe_inst_meta_id (fe_inst_meta_id), - .icache_response (icache_response_fe), - .icache_request (icache_request_fe) + .icache_stage_wid (icache_stage_wid), + .fe_inst_meta_fi (fe_inst_meta_fi2), + .fe_inst_meta_id (fe_inst_meta_id), + .VX_icache_rsp (VX_icache_rsp), + .VX_icache_req (VX_icache_req) ); diff --git a/rtl/VX_lsu.v b/rtl/VX_lsu.v index 0081dba8..c8b7aeeb 100644 --- a/rtl/VX_lsu.v +++ b/rtl/VX_lsu.v @@ -56,6 +56,10 @@ module VX_lsu ( assign VX_dcache_req.core_req_warp_num = use_warp_num; assign VX_dcache_req.core_req_pc = use_pc; + // Core can't accept response + assign VX_dcache_req.core_no_wb_slot = no_slot_mem; + + // Cache can't accept request assign out_delay = VX_dcache_rsp.delay_req; @@ -67,33 +71,8 @@ module VX_lsu ( assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata; assign VX_mem_wb.mem_wb_pc = VX_dcache_rsp.core_wb_pc[0]; - // Core can't accept response - assign VX_dcache_req.core_no_wb_slot = no_slot_mem; - // integer curr_t; - // always @(negedge clk) begin - // for (int curr_t = 0; curr_t < `NT; curr_t=curr_t+1) - // if ((VX_dcache_req.out_cache_driver_in_valid[curr_t]) && !out_delay) begin - // if (VX_dcache_req.out_cache_driver_in_mem_read != `NO_MEM_READ) begin - // $display("Reading addr: %x val: %x", address[0], VX_mem_wb.loaded_data[0]); - // end - // if (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) begin - // $display("Writing addr: %x val: %x", address[0], VX_dcache_req.out_cache_driver_in_data[0]); - // end - // end - // end - - // wire zero_temp = 0; - // VX_generic_register #(.N(142)) register_wb_data - // ( - // .clk (clk), - // .reset(reset), - // .stall(zero_temp), - // .flush(out_delay), - // .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}), - // .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num }) - // ); endmodule // Memory diff --git a/rtl/Vortex.v b/rtl/Vortex.v index eacfc257..9fcf3028 100644 --- a/rtl/Vortex.v +++ b/rtl/Vortex.v @@ -27,16 +27,22 @@ module Vortex input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], - // Req I Mem - output reg [31:0] o_m_read_addr_i, - output reg [31:0] o_m_evict_addr_i, - output reg o_m_valid_i, - output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], - output reg o_m_read_or_write_i, + // DRAM Icache Req + output wire I_dram_req, + output wire I_dram_req_write, + output wire I_dram_req_read, + output wire [31:0] I_dram_req_addr, + output wire [31:0] I_dram_req_size, + output wire [31:0] I_dram_req_data[`DBANK_LINE_SIZE_RNG], + output wire [31:0] I_dram_expected_lat, + + // DRAM Icache Res + output wire I_dram_fill_accept, + input wire I_dram_fill_rsp, + input wire [31:0] I_dram_fill_rsp_addr, + input wire [31:0] I_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], + - // Rsp I Mem - input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0], - input wire i_m_ready_i, output wire out_ebreak ); @@ -46,19 +52,6 @@ module Vortex assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1); - reg[31:0] icache_banks = `ICACHE_BANKS; - reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK; - reg[31:0] number_threads = `NT; - reg[31:0] number_warps = `NW; - - always @(posedge clk) begin - icache_banks <= icache_banks; - icache_num_words_per_block <= icache_num_words_per_block; - - number_threads <= number_threads; - number_warps <= number_warps; - end - wire memory_delay; wire exec_delay; wire gpr_stage_delay; @@ -110,30 +103,32 @@ module Vortex assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot; - VX_icache_response_inter icache_response_fe(); - VX_icache_request_inter icache_request_fe(); - VX_dram_req_rsp_inter #( - .NUMBER_BANKS(`ICACHE_BANKS), - .NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache(); + VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp(); + VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req(); - //assign icache_response_fe.instruction = icache_response_instruction; - assign icache_request_pc_address = icache_request_fe.pc_address; - - assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid; - assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr; - assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr; - assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write; - assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i; - genvar curr_bank; - genvar curr_word; + VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req(); + VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res(); -for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin : icache_setup - for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin : icache_banks_setup - assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word]; - assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed - end -end + assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp; + assign VX_gpu_icache_dram_res.dram_fill_rsp_addr = I_dram_fill_rsp_addr; + + assign I_dram_req = VX_gpu_icache_dram_req.dram_req; + assign I_dram_req_write = VX_gpu_icache_dram_req.dram_req_write; + assign I_dram_req_read = VX_gpu_icache_dram_req.dram_req_read; + assign I_dram_req_addr = VX_gpu_icache_dram_req.dram_req_addr; + assign I_dram_req_size = VX_gpu_icache_dram_req.dram_req_size; + assign I_dram_expected_lat = `ISIMULATED_DRAM_LATENCY_CYCLES; + assign I_dram_fill_accept = VX_gpu_icache_dram_req.dram_fill_accept; + + genvar iwordy; + generate + for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin + assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy]; + assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy]; + end + endgenerate + ///////////////////////////////////////////////////////////////////////// @@ -158,8 +153,8 @@ VX_front_end vx_front_end( .VX_warp_ctl (VX_warp_ctl), .VX_bckE_req (VX_bckE_req), .schedule_delay (schedule_delay), - .icache_response_fe (icache_response_fe), - .icache_request_fe (icache_request_fe), + .VX_icache_rsp (VX_icache_rsp), + .VX_icache_req (VX_icache_req), .VX_jal_rsp (VX_jal_rsp), .VX_branch_rsp (VX_branch_rsp), .fetch_ebreak (out_ebreak_unqual) @@ -197,11 +192,20 @@ VX_back_end vx_back_end( VX_dmem_controller VX_dmem_controller( .clk (clk), .reset (reset), + + // Dram <-> Dcache .VX_gpu_dcache_dram_req (VX_gpu_dcache_dram_req), .VX_gpu_dcache_dram_res (VX_gpu_dcache_dram_res), - .VX_dram_req_rsp_icache (VX_dram_req_rsp_icache), - .VX_icache_req (icache_request_fe), - .VX_icache_rsp (icache_response_fe), + + // Dram <-> Icache + .VX_gpu_icache_dram_req (VX_gpu_icache_dram_req), + .VX_gpu_icache_dram_res (VX_gpu_icache_dram_res), + + // Core <-> Icache + .VX_icache_req (VX_icache_req), + .VX_icache_rsp (VX_icache_rsp), + + // Core <-> Dcache .VX_dcache_req (VX_dcache_req_qual), .VX_dcache_rsp (VX_dcache_rsp) ); diff --git a/rtl/simulate/test_bench.h b/rtl/simulate/test_bench.h index 72d26652..69e2d257 100644 --- a/rtl/simulate/test_bench.h +++ b/rtl/simulate/test_bench.h @@ -78,6 +78,7 @@ class Vortex int debug_debugAddr; double stats_sim_time; std::vector dram_req_vec; + std::vector I_dram_req_vec; #ifdef VCD_OUTPUT VerilatedVcdC *m_trace; #endif @@ -165,78 +166,100 @@ void Vortex::print_stats(bool cycle_test) bool Vortex::ibus_driver() { - vortex->i_m_ready_i = false; - + // Iterate through each element, and get pop index + int dequeue_index = -1; + bool dequeue_valid = false; + for (int i = 0; i < this->I_dram_req_vec.size(); i++) { - - // int dcache_num_words_per_block - - if (refill_i) + if (this->I_dram_req_vec[i].cycles_left > 0) { - refill_i = false; - vortex->i_m_ready_i = true; - - for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++) - { - for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++) - { - unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank; - unsigned curr_addr = refill_addr_i + (4*curr_index); - - unsigned curr_value; - ram.getWord(curr_addr, &curr_value); - - vortex->i_m_readdata_i[curr_bank][curr_word] = curr_value; - - } - } - } - else - { - if (vortex->o_m_valid_i) - { - - if (vortex->o_m_read_or_write_i) - { - // fprintf(stderr, "++++++++++++++++++++++++++++++++\n"); - unsigned base_addr = vortex->o_m_evict_addr_i; - - for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++) - { - for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++) - { - unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank; - unsigned curr_addr = base_addr + (4*curr_index); - - unsigned curr_value = vortex->o_m_writedata_i[curr_bank][curr_word]; - - ram.writeWord( curr_addr, &curr_value); - } - } - } - - // Respond next cycle - refill_i = true; - refill_addr_i = vortex->o_m_read_addr_i; - } + this->I_dram_req_vec[i].cycles_left -= 1; } + if ((this->I_dram_req_vec[i].cycles_left == 0) && (!dequeue_valid)) + { + dequeue_index = i; + dequeue_valid = true; + } } + if (vortex->I_dram_req) + { + // std::cout << "Icache Dram Request received!\n"; + if (vortex->I_dram_req_read) + { + // std::cout << "Icache Dram Request is read!\n"; + // Need to add an element + dram_req_t dram_req; + dram_req.cycles_left = vortex->I_dram_expected_lat; + dram_req.data_length = vortex->I_dram_req_size / 4; + dram_req.base_addr = vortex->I_dram_req_addr; + dram_req.data = (unsigned *) malloc(dram_req.data_length * sizeof(unsigned)); + + for (int i = 0; i < dram_req.data_length; i++) + { + unsigned curr_addr = dram_req.base_addr + (i*4); + unsigned data_rd; + ram.getWord(curr_addr, &data_rd); + dram_req.data[i] = data_rd; + } + // std::cout << "Fill Req -> Addr: " << std::hex << dram_req.base_addr << std::dec << "\n"; + this->I_dram_req_vec.push_back(dram_req); + } + + if (vortex->I_dram_req_write) + { + unsigned base_addr = vortex->I_dram_req_addr; + unsigned data_length = vortex->I_dram_req_size / 4; + + for (int i = 0; i < data_length; i++) + { + unsigned curr_addr = base_addr + (i*4); + unsigned data_wr = vortex->I_dram_req_data[i]; + ram.writeWord(curr_addr, &data_wr); + } + } + } + + if (vortex->I_dram_fill_accept && dequeue_valid) + { + // std::cout << "Icache Dram Response Sending...!\n"; + + vortex->I_dram_fill_rsp = 1; + vortex->I_dram_fill_rsp_addr = this->I_dram_req_vec[dequeue_index].base_addr; + // std::cout << "Fill Rsp -> Addr: " << std::hex << (this->I_dram_req_vec[dequeue_index].base_addr) << std::dec << "\n"; + + for (int i = 0; i < this->I_dram_req_vec[dequeue_index].data_length; i++) + { + vortex->I_dram_fill_rsp_data[i] = this->I_dram_req_vec[dequeue_index].data[i]; + } + free(this->I_dram_req_vec[dequeue_index].data); + + this->I_dram_req_vec.erase(this->I_dram_req_vec.begin() + dequeue_index); + } + else + { + vortex->I_dram_fill_rsp = 0; + vortex->I_dram_fill_rsp_addr = 0; + } + return false; } void Vortex::io_handler() { + // std::cout << "Checking\n"; if (vortex->io_valid) { uint32_t data_write = (uint32_t) vortex->io_data; - + // std::cout << "IO VALID!\n"; char c = (char) data_write; std::cerr << c; // std::cout << c; + + std::cout << std::flush; } } @@ -280,6 +303,7 @@ bool Vortex::dbus_driver() ram.getWord(curr_addr, &data_rd); dram_req.data[i] = data_rd; } + // std::cout << "Fill Req -> Addr: " << std::hex << dram_req.base_addr << std::dec << "\n"; this->dram_req_vec.push_back(dram_req); } @@ -301,6 +325,8 @@ bool Vortex::dbus_driver() { vortex->dram_fill_rsp = 1; vortex->dram_fill_rsp_addr = this->dram_req_vec[dequeue_index].base_addr; + // std::cout << "Fill Rsp -> Addr: " << std::hex << (this->dram_req_vec[dequeue_index].base_addr) << std::dec << "\n"; + for (int i = 0; i < this->dram_req_vec[dequeue_index].data_length; i++) { vortex->dram_fill_rsp_data[i] = this->dram_req_vec[dequeue_index].data[i];