Icache working
This commit is contained in:
@@ -262,7 +262,53 @@
|
||||
// ========================================= Dcache Configurable Knobs =========================================
|
||||
|
||||
|
||||
// ========================================= Icache Configurable Knobs =========================================
|
||||
|
||||
// General Cache Knobs
|
||||
// Size of cache in bytes
|
||||
`define ICACHE_SIZE_BYTES 1024
|
||||
// Size of line inside a bank in bytes
|
||||
`define IBANK_LINE_SIZE_BYTES 16
|
||||
// Number of banks {1, 2, 4, 8,...}
|
||||
`define INUMBER_BANKS 8
|
||||
// Size of a word in bytes
|
||||
`define IWORD_SIZE_BYTES 4
|
||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||
`define INUMBER_REQUESTS 1
|
||||
// Number of cycles to complete stage 1 (read from memory)
|
||||
`define ISTAGE_1_CYCLES 2
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define IBANK_LINE_SIZE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES)
|
||||
`define IBANK_LINE_SIZE_RNG `IBANK_LINE_SIZE_WORDS-1:0
|
||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||
|
||||
// Core Request Queue Size
|
||||
`define IREQQ_SIZE `NW
|
||||
// Miss Reserv Queue Knob
|
||||
`define IMRVQ_SIZE `DREQQ_SIZE
|
||||
// Dram Fill Rsp Queue Size
|
||||
`define IDFPQ_SIZE 2
|
||||
// Snoop Req Queue
|
||||
`define ISNRQ_SIZE 8
|
||||
|
||||
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
|
||||
// Core Writeback Queue Size
|
||||
`define ICWBQ_SIZE `DREQQ_SIZE
|
||||
// Dram Writeback Queue Size
|
||||
`define IDWBQ_SIZE 4
|
||||
// Dram Fill Req Queue Size
|
||||
`define IDFQQ_SIZE `DREQQ_SIZE
|
||||
// Lower Level Cache Hit Queue Size
|
||||
`define ILLVQ_SIZE 0
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`define IFILL_INVALIDAOR_SIZE 16
|
||||
|
||||
// Dram knobs
|
||||
`define ISIMULATED_DRAM_LATENCY_CYCLES 10
|
||||
|
||||
// ========================================= Icache Configurable Knobs =========================================
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,18 +3,22 @@
|
||||
module VX_dmem_controller (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Dcache
|
||||
|
||||
// Dram <-> Dcache
|
||||
VX_gpu_dcache_dram_req_inter VX_gpu_dcache_dram_req,
|
||||
VX_gpu_dcache_dram_res_inter VX_gpu_dcache_dram_res,
|
||||
|
||||
// Dram <-> Icache
|
||||
VX_gpu_dcache_dram_req_inter VX_gpu_icache_dram_req,
|
||||
VX_gpu_dcache_dram_res_inter VX_gpu_icache_dram_res,
|
||||
|
||||
// Core <-> Dcache
|
||||
VX_gpu_dcache_res_inter VX_dcache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_dcache_req,
|
||||
|
||||
|
||||
VX_dram_req_rsp_inter VX_dram_req_rsp_icache,
|
||||
// MEM-Processor
|
||||
VX_icache_request_inter VX_icache_req,
|
||||
VX_icache_response_inter VX_icache_rsp
|
||||
// Core <-> Icache
|
||||
VX_gpu_dcache_res_inter VX_icache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_icache_req
|
||||
);
|
||||
|
||||
wire to_shm = VX_dcache_req.core_req_addr[0][31:24] == 8'hFF;
|
||||
@@ -30,17 +34,6 @@ module VX_dmem_controller (
|
||||
wire[`NT_M1:0] cache_driver_out_valid; // Not used for now
|
||||
wire sm_delay;
|
||||
|
||||
// I_Cache Signals
|
||||
|
||||
wire[31:0] icache_instruction_out;
|
||||
wire icache_delay;
|
||||
wire icache_driver_in_valid = VX_icache_req.out_cache_driver_in_valid;
|
||||
wire[31:0] icache_driver_in_address = VX_icache_req.pc_address;
|
||||
wire[2:0] icache_driver_in_mem_read = !(|icache_driver_in_valid) ? `NO_MEM_READ : VX_icache_req.out_cache_driver_in_mem_read;
|
||||
wire[2:0] icache_driver_in_mem_write = !(|icache_driver_in_valid) ? `NO_MEM_WRITE : VX_icache_req.out_cache_driver_in_mem_write;
|
||||
wire[31:0] icache_driver_in_data = VX_icache_req.out_cache_driver_in_data;
|
||||
wire read_or_write_ic = (VX_icache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) && (|icache_driver_in_valid);
|
||||
|
||||
|
||||
VX_shared_memory #(
|
||||
.SM_SIZE (`SHARED_MEMORY_SIZE),
|
||||
@@ -159,56 +152,86 @@ module VX_dmem_controller (
|
||||
);
|
||||
|
||||
|
||||
VX_d_cache #(
|
||||
.CACHE_SIZE (`ICACHE_SIZE),
|
||||
.CACHE_WAYS (`ICACHE_WAYS),
|
||||
.CACHE_BLOCK (`ICACHE_BLOCK),
|
||||
.CACHE_BANKS (`ICACHE_BANKS),
|
||||
.LOG_NUM_BANKS (`ICACHE_LOG_NUM_BANKS),
|
||||
.NUM_REQ (`ICACHE_NUM_REQ),
|
||||
.LOG_NUM_REQ (`ICACHE_LOG_NUM_REQ),
|
||||
.NUM_IND (`ICACHE_NUM_IND),
|
||||
.CACHE_WAY_INDEX (`ICACHE_WAY_INDEX),
|
||||
.NUM_WORDS_PER_BLOCK (`ICACHE_NUM_WORDS_PER_BLOCK),
|
||||
.OFFSET_SIZE_START (`ICACHE_OFFSET_ST),
|
||||
.OFFSET_SIZE_END (`ICACHE_OFFSET_ED),
|
||||
.TAG_SIZE_START (`ICACHE_TAG_SIZE_START),
|
||||
.TAG_SIZE_END (`ICACHE_TAG_SIZE_END),
|
||||
.IND_SIZE_START (`ICACHE_IND_SIZE_START),
|
||||
.IND_SIZE_END (`ICACHE_IND_SIZE_END),
|
||||
.ADDR_TAG_START (`ICACHE_ADDR_TAG_START),
|
||||
.ADDR_TAG_END (`ICACHE_ADDR_TAG_END),
|
||||
.ADDR_OFFSET_START (`ICACHE_ADDR_OFFSET_ST),
|
||||
.ADDR_OFFSET_END (`ICACHE_ADDR_OFFSET_ED),
|
||||
.ADDR_IND_START (`ICACHE_IND_ST),
|
||||
.ADDR_IND_END (`ICACHE_IND_ED),
|
||||
.MEM_ADDR_REQ_MASK (`ICACHE_MEM_REQ_ADDR_MASK)
|
||||
) icache
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`ICACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (`IBANK_LINE_SIZE_BYTES),
|
||||
.NUMBER_BANKS (`INUMBER_BANKS),
|
||||
.WORD_SIZE_BYTES (`IWORD_SIZE_BYTES),
|
||||
.NUMBER_REQUESTS (`INUMBER_REQUESTS),
|
||||
.STAGE_1_CYCLES (`ISTAGE_1_CYCLES),
|
||||
.REQQ_SIZE (`IREQQ_SIZE),
|
||||
.MRVQ_SIZE (`IMRVQ_SIZE),
|
||||
.DFPQ_SIZE (`IDFPQ_SIZE),
|
||||
.SNRQ_SIZE (`ISNRQ_SIZE),
|
||||
.CWBQ_SIZE (`ICWBQ_SIZE),
|
||||
.DWBQ_SIZE (`IDWBQ_SIZE),
|
||||
.DFQQ_SIZE (`IDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`ILLVQ_SIZE),
|
||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_icache
|
||||
(
|
||||
.clk (clk),
|
||||
.rst (reset),
|
||||
.i_p_valid (icache_driver_in_valid),
|
||||
.i_p_addr (icache_driver_in_address),
|
||||
.i_p_writedata (icache_driver_in_data),
|
||||
.i_p_read_or_write (read_or_write_ic),
|
||||
.i_p_mem_read (icache_driver_in_mem_read),
|
||||
.i_p_mem_write (icache_driver_in_mem_write),
|
||||
.o_p_readdata (icache_instruction_out),
|
||||
.o_p_delay (icache_delay),
|
||||
.o_m_evict_addr (VX_dram_req_rsp_icache.o_m_evict_addr),
|
||||
.o_m_read_addr (VX_dram_req_rsp_icache.o_m_read_addr),
|
||||
.o_m_valid (VX_dram_req_rsp_icache.o_m_valid),
|
||||
.o_m_writedata (VX_dram_req_rsp_icache.o_m_writedata),
|
||||
.o_m_read_or_write (VX_dram_req_rsp_icache.o_m_read_or_write),
|
||||
.i_m_readdata (VX_dram_req_rsp_icache.i_m_readdata),
|
||||
.i_m_ready (VX_dram_req_rsp_icache.i_m_ready)
|
||||
);
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// assign VX_dcache_rsp.in_cache_driver_out_data = (to_shm && 0) ? sm_driver_out_data : cache_driver_out_data;
|
||||
// assign VX_dcache_rsp.delay = (sm_delay && 0) || cache_delay;
|
||||
// Core req
|
||||
.core_req_valid (VX_icache_req.core_req_valid),
|
||||
.core_req_addr (VX_icache_req.core_req_addr),
|
||||
.core_req_writedata(VX_icache_req.core_req_writedata),
|
||||
.core_req_mem_read (VX_icache_req.core_req_mem_read),
|
||||
.core_req_mem_write(VX_icache_req.core_req_mem_write),
|
||||
.core_req_rd (VX_icache_req.core_req_rd),
|
||||
.core_req_wb (VX_icache_req.core_req_wb),
|
||||
.core_req_warp_num (VX_icache_req.core_req_warp_num),
|
||||
.core_req_pc (VX_icache_req.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (VX_icache_rsp.delay_req),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (VX_icache_req.core_no_wb_slot),
|
||||
|
||||
// Cache CWB
|
||||
.core_wb_valid (VX_icache_rsp.core_wb_valid),
|
||||
.core_wb_req_rd (VX_icache_rsp.core_wb_req_rd),
|
||||
.core_wb_req_wb (VX_icache_rsp.core_wb_req_wb),
|
||||
.core_wb_warp_num (VX_icache_rsp.core_wb_warp_num),
|
||||
.core_wb_readdata (VX_icache_rsp.core_wb_readdata),
|
||||
.core_wb_pc (VX_icache_rsp.core_wb_pc),
|
||||
|
||||
// DRAM response
|
||||
.dram_fill_rsp (VX_gpu_icache_dram_res.dram_fill_rsp),
|
||||
.dram_fill_rsp_addr(VX_gpu_icache_dram_res.dram_fill_rsp_addr),
|
||||
.dram_fill_rsp_data(VX_gpu_icache_dram_res.dram_fill_rsp_data),
|
||||
|
||||
// DRAM accept response
|
||||
.dram_fill_accept (VX_gpu_icache_dram_req.dram_fill_accept),
|
||||
|
||||
// DRAM Req
|
||||
.dram_req (VX_gpu_icache_dram_req.dram_req),
|
||||
.dram_req_write (VX_gpu_icache_dram_req.dram_req_write),
|
||||
.dram_req_read (VX_gpu_icache_dram_req.dram_req_read),
|
||||
.dram_req_addr (VX_gpu_icache_dram_req.dram_req_addr),
|
||||
.dram_req_size (VX_gpu_icache_dram_req.dram_req_size),
|
||||
.dram_req_data (VX_gpu_icache_dram_req.dram_req_data),
|
||||
|
||||
// Snoop Response
|
||||
.dram_req_because_of_wb(VX_gpu_icache_dram_req.dram_because_of_snp),
|
||||
.dram_snp_full (VX_gpu_icache_dram_req.dram_snp_full),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req (0),
|
||||
.snp_req_addr (0),
|
||||
|
||||
// LLVQ stuff
|
||||
.llvq_pop (Dllvq_pop),
|
||||
.llvq_valid (Dllvq_valid),
|
||||
.llvq_res_addr (Dllvq_res_addr),
|
||||
.llvq_res_data (Dllvq_res_data)
|
||||
);
|
||||
|
||||
assign VX_icache_rsp.instruction = icache_instruction_out;
|
||||
assign VX_icache_rsp.delay = icache_delay;
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -8,8 +8,8 @@ module VX_front_end (
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
|
||||
VX_icache_response_inter icache_response_fe,
|
||||
VX_icache_request_inter icache_request_fe,
|
||||
VX_gpu_dcache_res_inter VX_icache_rsp,
|
||||
VX_gpu_dcache_req_inter VX_icache_req,
|
||||
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
@@ -76,15 +76,16 @@ VX_f_d_reg vx_f_i_reg(
|
||||
);
|
||||
|
||||
VX_icache_stage VX_icache_stage(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.icache_stage_delay(icache_stage_delay),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.total_freeze (total_freeze),
|
||||
.icache_stage_delay (icache_stage_delay),
|
||||
.icache_stage_valids(icache_stage_valids),
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.icache_response (icache_response_fe),
|
||||
.icache_request (icache_request_fe)
|
||||
.icache_stage_wid (icache_stage_wid),
|
||||
.fe_inst_meta_fi (fe_inst_meta_fi2),
|
||||
.fe_inst_meta_id (fe_inst_meta_id),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.VX_icache_req (VX_icache_req)
|
||||
);
|
||||
|
||||
|
||||
|
||||
29
rtl/VX_lsu.v
29
rtl/VX_lsu.v
@@ -56,6 +56,10 @@ module VX_lsu (
|
||||
assign VX_dcache_req.core_req_warp_num = use_warp_num;
|
||||
assign VX_dcache_req.core_req_pc = use_pc;
|
||||
|
||||
// Core can't accept response
|
||||
assign VX_dcache_req.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
|
||||
// Cache can't accept request
|
||||
assign out_delay = VX_dcache_rsp.delay_req;
|
||||
|
||||
@@ -67,33 +71,8 @@ module VX_lsu (
|
||||
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
|
||||
assign VX_mem_wb.mem_wb_pc = VX_dcache_rsp.core_wb_pc[0];
|
||||
|
||||
// Core can't accept response
|
||||
assign VX_dcache_req.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
// integer curr_t;
|
||||
// always @(negedge clk) begin
|
||||
// for (int curr_t = 0; curr_t < `NT; curr_t=curr_t+1)
|
||||
// if ((VX_dcache_req.out_cache_driver_in_valid[curr_t]) && !out_delay) begin
|
||||
// if (VX_dcache_req.out_cache_driver_in_mem_read != `NO_MEM_READ) begin
|
||||
// $display("Reading addr: %x val: %x", address[0], VX_mem_wb.loaded_data[0]);
|
||||
// end
|
||||
|
||||
// if (VX_dcache_req.out_cache_driver_in_mem_write != `NO_MEM_WRITE) begin
|
||||
// $display("Writing addr: %x val: %x", address[0], VX_dcache_req.out_cache_driver_in_data[0]);
|
||||
// end
|
||||
// end
|
||||
// end
|
||||
|
||||
// wire zero_temp = 0;
|
||||
// VX_generic_register #(.N(142)) register_wb_data
|
||||
// (
|
||||
// .clk (clk),
|
||||
// .reset(reset),
|
||||
// .stall(zero_temp),
|
||||
// .flush(out_delay),
|
||||
// .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}),
|
||||
// .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num })
|
||||
// );
|
||||
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
100
rtl/Vortex.v
100
rtl/Vortex.v
@@ -27,16 +27,22 @@ module Vortex
|
||||
input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
|
||||
|
||||
|
||||
// Req I Mem
|
||||
output reg [31:0] o_m_read_addr_i,
|
||||
output reg [31:0] o_m_evict_addr_i,
|
||||
output reg o_m_valid_i,
|
||||
output reg [31:0] o_m_writedata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
output reg o_m_read_or_write_i,
|
||||
// DRAM Icache Req
|
||||
output wire I_dram_req,
|
||||
output wire I_dram_req_write,
|
||||
output wire I_dram_req_read,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [31:0] I_dram_req_size,
|
||||
output wire [31:0] I_dram_req_data[`DBANK_LINE_SIZE_RNG],
|
||||
output wire [31:0] I_dram_expected_lat,
|
||||
|
||||
// DRAM Icache Res
|
||||
output wire I_dram_fill_accept,
|
||||
input wire I_dram_fill_rsp,
|
||||
input wire [31:0] I_dram_fill_rsp_addr,
|
||||
input wire [31:0] I_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
|
||||
|
||||
|
||||
// Rsp I Mem
|
||||
input wire [31:0] i_m_readdata_i[`ICACHE_BANKS - 1:0][`ICACHE_NUM_WORDS_PER_BLOCK-1:0],
|
||||
input wire i_m_ready_i,
|
||||
output wire out_ebreak
|
||||
);
|
||||
|
||||
@@ -46,19 +52,6 @@ module Vortex
|
||||
assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
|
||||
|
||||
|
||||
reg[31:0] icache_banks = `ICACHE_BANKS;
|
||||
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
|
||||
reg[31:0] number_threads = `NT;
|
||||
reg[31:0] number_warps = `NW;
|
||||
|
||||
always @(posedge clk) begin
|
||||
icache_banks <= icache_banks;
|
||||
icache_num_words_per_block <= icache_num_words_per_block;
|
||||
|
||||
number_threads <= number_threads;
|
||||
number_warps <= number_warps;
|
||||
end
|
||||
|
||||
wire memory_delay;
|
||||
wire exec_delay;
|
||||
wire gpr_stage_delay;
|
||||
@@ -110,30 +103,32 @@ module Vortex
|
||||
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
|
||||
|
||||
VX_icache_response_inter icache_response_fe();
|
||||
VX_icache_request_inter icache_request_fe();
|
||||
VX_dram_req_rsp_inter #(
|
||||
.NUMBER_BANKS(`ICACHE_BANKS),
|
||||
.NUM_WORDS_PER_BLOCK(`ICACHE_NUM_WORDS_PER_BLOCK)) VX_dram_req_rsp_icache();
|
||||
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req();
|
||||
|
||||
//assign icache_response_fe.instruction = icache_response_instruction;
|
||||
assign icache_request_pc_address = icache_request_fe.pc_address;
|
||||
|
||||
assign o_m_valid_i = VX_dram_req_rsp_icache.o_m_valid;
|
||||
assign o_m_read_addr_i = VX_dram_req_rsp_icache.o_m_read_addr;
|
||||
assign o_m_evict_addr_i = VX_dram_req_rsp_icache.o_m_evict_addr;
|
||||
assign o_m_read_or_write_i = VX_dram_req_rsp_icache.o_m_read_or_write;
|
||||
assign VX_dram_req_rsp_icache.i_m_ready = i_m_ready_i;
|
||||
genvar curr_bank;
|
||||
genvar curr_word;
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res();
|
||||
|
||||
|
||||
for (curr_bank = 0; curr_bank < `ICACHE_BANKS; curr_bank = curr_bank + 1) begin : icache_setup
|
||||
for (curr_word = 0; curr_word < `ICACHE_NUM_WORDS_PER_BLOCK; curr_word = curr_word + 1) begin : icache_banks_setup
|
||||
assign o_m_writedata_i[curr_bank][curr_word] = VX_dram_req_rsp_icache.o_m_writedata[curr_bank][curr_word];
|
||||
assign VX_dram_req_rsp_icache.i_m_readdata[curr_bank][curr_word] = i_m_readdata_i[curr_bank][curr_word]; // fixed
|
||||
end
|
||||
end
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp;
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_addr = I_dram_fill_rsp_addr;
|
||||
|
||||
assign I_dram_req = VX_gpu_icache_dram_req.dram_req;
|
||||
assign I_dram_req_write = VX_gpu_icache_dram_req.dram_req_write;
|
||||
assign I_dram_req_read = VX_gpu_icache_dram_req.dram_req_read;
|
||||
assign I_dram_req_addr = VX_gpu_icache_dram_req.dram_req_addr;
|
||||
assign I_dram_req_size = VX_gpu_icache_dram_req.dram_req_size;
|
||||
assign I_dram_expected_lat = `ISIMULATED_DRAM_LATENCY_CYCLES;
|
||||
assign I_dram_fill_accept = VX_gpu_icache_dram_req.dram_fill_accept;
|
||||
|
||||
genvar iwordy;
|
||||
generate
|
||||
for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy];
|
||||
assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -158,8 +153,8 @@ VX_front_end vx_front_end(
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_response_fe (icache_response_fe),
|
||||
.icache_request_fe (icache_request_fe),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
.VX_icache_req (VX_icache_req),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak_unqual)
|
||||
@@ -197,11 +192,20 @@ VX_back_end vx_back_end(
|
||||
VX_dmem_controller VX_dmem_controller(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Dram <-> Dcache
|
||||
.VX_gpu_dcache_dram_req (VX_gpu_dcache_dram_req),
|
||||
.VX_gpu_dcache_dram_res (VX_gpu_dcache_dram_res),
|
||||
.VX_dram_req_rsp_icache (VX_dram_req_rsp_icache),
|
||||
.VX_icache_req (icache_request_fe),
|
||||
.VX_icache_rsp (icache_response_fe),
|
||||
|
||||
// Dram <-> Icache
|
||||
.VX_gpu_icache_dram_req (VX_gpu_icache_dram_req),
|
||||
.VX_gpu_icache_dram_res (VX_gpu_icache_dram_res),
|
||||
|
||||
// Core <-> Icache
|
||||
.VX_icache_req (VX_icache_req),
|
||||
.VX_icache_rsp (VX_icache_rsp),
|
||||
|
||||
// Core <-> Dcache
|
||||
.VX_dcache_req (VX_dcache_req_qual),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
);
|
||||
|
||||
@@ -78,6 +78,7 @@ class Vortex
|
||||
int debug_debugAddr;
|
||||
double stats_sim_time;
|
||||
std::vector<dram_req_t> dram_req_vec;
|
||||
std::vector<dram_req_t> I_dram_req_vec;
|
||||
#ifdef VCD_OUTPUT
|
||||
VerilatedVcdC *m_trace;
|
||||
#endif
|
||||
@@ -165,78 +166,100 @@ void Vortex::print_stats(bool cycle_test)
|
||||
bool Vortex::ibus_driver()
|
||||
{
|
||||
|
||||
vortex->i_m_ready_i = false;
|
||||
|
||||
// Iterate through each element, and get pop index
|
||||
int dequeue_index = -1;
|
||||
bool dequeue_valid = false;
|
||||
for (int i = 0; i < this->I_dram_req_vec.size(); i++)
|
||||
{
|
||||
|
||||
// int dcache_num_words_per_block
|
||||
|
||||
if (refill_i)
|
||||
if (this->I_dram_req_vec[i].cycles_left > 0)
|
||||
{
|
||||
refill_i = false;
|
||||
vortex->i_m_ready_i = true;
|
||||
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
|
||||
{
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
|
||||
unsigned curr_addr = refill_addr_i + (4*curr_index);
|
||||
|
||||
unsigned curr_value;
|
||||
ram.getWord(curr_addr, &curr_value);
|
||||
|
||||
vortex->i_m_readdata_i[curr_bank][curr_word] = curr_value;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (vortex->o_m_valid_i)
|
||||
{
|
||||
|
||||
if (vortex->o_m_read_or_write_i)
|
||||
{
|
||||
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
|
||||
unsigned base_addr = vortex->o_m_evict_addr_i;
|
||||
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
|
||||
{
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
|
||||
unsigned curr_addr = base_addr + (4*curr_index);
|
||||
|
||||
unsigned curr_value = vortex->o_m_writedata_i[curr_bank][curr_word];
|
||||
|
||||
ram.writeWord( curr_addr, &curr_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Respond next cycle
|
||||
refill_i = true;
|
||||
refill_addr_i = vortex->o_m_read_addr_i;
|
||||
}
|
||||
this->I_dram_req_vec[i].cycles_left -= 1;
|
||||
}
|
||||
|
||||
if ((this->I_dram_req_vec[i].cycles_left == 0) && (!dequeue_valid))
|
||||
{
|
||||
dequeue_index = i;
|
||||
dequeue_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (vortex->I_dram_req)
|
||||
{
|
||||
// std::cout << "Icache Dram Request received!\n";
|
||||
if (vortex->I_dram_req_read)
|
||||
{
|
||||
// std::cout << "Icache Dram Request is read!\n";
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = vortex->I_dram_expected_lat;
|
||||
dram_req.data_length = vortex->I_dram_req_size / 4;
|
||||
dram_req.base_addr = vortex->I_dram_req_addr;
|
||||
dram_req.data = (unsigned *) malloc(dram_req.data_length * sizeof(unsigned));
|
||||
|
||||
for (int i = 0; i < dram_req.data_length; i++)
|
||||
{
|
||||
unsigned curr_addr = dram_req.base_addr + (i*4);
|
||||
unsigned data_rd;
|
||||
ram.getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
// std::cout << "Fill Req -> Addr: " << std::hex << dram_req.base_addr << std::dec << "\n";
|
||||
this->I_dram_req_vec.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex->I_dram_req_write)
|
||||
{
|
||||
unsigned base_addr = vortex->I_dram_req_addr;
|
||||
unsigned data_length = vortex->I_dram_req_size / 4;
|
||||
|
||||
for (int i = 0; i < data_length; i++)
|
||||
{
|
||||
unsigned curr_addr = base_addr + (i*4);
|
||||
unsigned data_wr = vortex->I_dram_req_data[i];
|
||||
ram.writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex->I_dram_fill_accept && dequeue_valid)
|
||||
{
|
||||
// std::cout << "Icache Dram Response Sending...!\n";
|
||||
|
||||
vortex->I_dram_fill_rsp = 1;
|
||||
vortex->I_dram_fill_rsp_addr = this->I_dram_req_vec[dequeue_index].base_addr;
|
||||
// std::cout << "Fill Rsp -> Addr: " << std::hex << (this->I_dram_req_vec[dequeue_index].base_addr) << std::dec << "\n";
|
||||
|
||||
for (int i = 0; i < this->I_dram_req_vec[dequeue_index].data_length; i++)
|
||||
{
|
||||
vortex->I_dram_fill_rsp_data[i] = this->I_dram_req_vec[dequeue_index].data[i];
|
||||
}
|
||||
free(this->I_dram_req_vec[dequeue_index].data);
|
||||
|
||||
this->I_dram_req_vec.erase(this->I_dram_req_vec.begin() + dequeue_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
vortex->I_dram_fill_rsp = 0;
|
||||
vortex->I_dram_fill_rsp_addr = 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
void Vortex::io_handler()
|
||||
{
|
||||
// std::cout << "Checking\n";
|
||||
if (vortex->io_valid)
|
||||
{
|
||||
uint32_t data_write = (uint32_t) vortex->io_data;
|
||||
|
||||
// std::cout << "IO VALID!\n";
|
||||
char c = (char) data_write;
|
||||
std::cerr << c;
|
||||
// std::cout << c;
|
||||
|
||||
std::cout << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,6 +303,7 @@ bool Vortex::dbus_driver()
|
||||
ram.getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
// std::cout << "Fill Req -> Addr: " << std::hex << dram_req.base_addr << std::dec << "\n";
|
||||
this->dram_req_vec.push_back(dram_req);
|
||||
}
|
||||
|
||||
@@ -301,6 +325,8 @@ bool Vortex::dbus_driver()
|
||||
{
|
||||
vortex->dram_fill_rsp = 1;
|
||||
vortex->dram_fill_rsp_addr = this->dram_req_vec[dequeue_index].base_addr;
|
||||
// std::cout << "Fill Rsp -> Addr: " << std::hex << (this->dram_req_vec[dequeue_index].base_addr) << std::dec << "\n";
|
||||
|
||||
for (int i = 0; i < this->dram_req_vec[dequeue_index].data_length; i++)
|
||||
{
|
||||
vortex->dram_fill_rsp_data[i] = this->dram_req_vec[dequeue_index].data[i];
|
||||
|
||||
Reference in New Issue
Block a user