From ec1aad1591c49b611915ae1bb68afcf6156c7dff Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Sun, 8 Mar 2020 14:04:55 -0700 Subject: [PATCH] Icache stage mods + removed shared memory --- rtl/VX_dmem_controller.v | 72 ++++++++++++++++++++-------------------- rtl/VX_icache_stage.v | 52 +++++++++++++++++++++-------- 2 files changed, 74 insertions(+), 50 deletions(-) diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 19b31caf..00a6aa37 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -25,45 +25,45 @@ module VX_dmem_controller ( wire[`NT_M1:0] cache_driver_in_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}}; - wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}}; - wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.core_req_mem_read; - wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.core_req_mem_write; + // wire[`NT_M1:0] sm_driver_in_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}}; + // wire[2:0] sm_driver_in_mem_read = !(|sm_driver_in_valid) ? `NO_MEM_READ : VX_dcache_req.core_req_mem_read; + // wire[2:0] sm_driver_in_mem_write = !(|sm_driver_in_valid) ? `NO_MEM_WRITE : VX_dcache_req.core_req_mem_write; - wire[`NT_M1:0][31:0] cache_driver_out_data; - wire[`NT_M1:0][31:0] sm_driver_out_data; - wire[`NT_M1:0] cache_driver_out_valid; // Not used for now - wire sm_delay; + // wire[`NT_M1:0][31:0] cache_driver_out_data; + // wire[`NT_M1:0][31:0] sm_driver_out_data; + // wire[`NT_M1:0] cache_driver_out_valid; // Not used for now + // wire sm_delay; - VX_shared_memory #( - .SM_SIZE (`SHARED_MEMORY_SIZE), - .SM_BANKS (`SHARED_MEMORY_BANKS), - .SM_BYTES_PER_READ (`SHARED_MEMORY_BYTES_PER_READ), - .SM_WORDS_PER_READ (`SHARED_MEMORY_WORDS_PER_READ), - .SM_LOG_WORDS_PER_READ (`SHARED_MEMORY_LOG_WORDS_PER_READ), - .SM_BANK_OFFSET_START (`SHARED_MEMORY_BANK_OFFSET_ST), - .SM_BANK_OFFSET_END (`SHARED_MEMORY_BANK_OFFSET_ED), - .SM_BLOCK_OFFSET_START (`SHARED_MEMORY_BLOCK_OFFSET_ST), - .SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED), - .SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST), - .SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED), - .SM_HEIGHT (`SHARED_MEMORY_HEIGHT), - .NUM_REQ (`SHARED_MEMORY_NUM_REQ), - .BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK) - ) - shared_memory - ( - .clk (clk), - .reset (reset), - .in_valid (sm_driver_in_valid), - .in_address(VX_dcache_req.core_req_addr), - .in_data (VX_dcache_req.core_req_writedata), - .mem_read (sm_driver_in_mem_read), - .mem_write (sm_driver_in_mem_write), - .out_valid (cache_driver_out_valid), - .out_data (sm_driver_out_data), - .stall (sm_delay) - ); + // VX_shared_memory #( + // .SM_SIZE (`SHARED_MEMORY_SIZE), + // .SM_BANKS (`SHARED_MEMORY_BANKS), + // .SM_BYTES_PER_READ (`SHARED_MEMORY_BYTES_PER_READ), + // .SM_WORDS_PER_READ (`SHARED_MEMORY_WORDS_PER_READ), + // .SM_LOG_WORDS_PER_READ (`SHARED_MEMORY_LOG_WORDS_PER_READ), + // .SM_BANK_OFFSET_START (`SHARED_MEMORY_BANK_OFFSET_ST), + // .SM_BANK_OFFSET_END (`SHARED_MEMORY_BANK_OFFSET_ED), + // .SM_BLOCK_OFFSET_START (`SHARED_MEMORY_BLOCK_OFFSET_ST), + // .SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED), + // .SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST), + // .SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED), + // .SM_HEIGHT (`SHARED_MEMORY_HEIGHT), + // .NUM_REQ (`SHARED_MEMORY_NUM_REQ), + // .BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK) + // ) + // shared_memory + // ( + // .clk (clk), + // .reset (reset), + // .in_valid (sm_driver_in_valid), + // .in_address(VX_dcache_req.core_req_addr), + // .in_data (VX_dcache_req.core_req_writedata), + // .mem_read (sm_driver_in_mem_read), + // .mem_write (sm_driver_in_mem_write), + // .out_valid (cache_driver_out_valid), + // .out_data (sm_driver_out_data), + // .stall (sm_delay) + // ); wire Dllvq_pop; diff --git a/rtl/VX_icache_stage.v b/rtl/VX_icache_stage.v index 54233e1b..373c11f7 100644 --- a/rtl/VX_icache_stage.v +++ b/rtl/VX_icache_stage.v @@ -3,34 +3,58 @@ module VX_icache_stage ( input wire clk, input wire reset, + input wire total_freeze, output wire icache_stage_delay, output wire[`NW_M1:0] icache_stage_wid, output wire[`NT-1:0] icache_stage_valids, VX_inst_meta_inter fe_inst_meta_fi, VX_inst_meta_inter fe_inst_meta_id, - VX_icache_response_inter icache_response, - VX_icache_request_inter icache_request + + VX_gpu_dcache_res_inter VX_icache_rsp, + VX_gpu_dcache_req_inter VX_icache_req ); + reg[`NT-1:0] threads_active[`NW-1:0]; + wire valid_inst = (|fe_inst_meta_fi.valid); - assign icache_request.pc_address = fe_inst_meta_fi.inst_pc; - assign icache_request.out_cache_driver_in_valid = fe_inst_meta_fi.valid != 0; - assign icache_request.out_cache_driver_in_mem_read = `LW_MEM_READ; - assign icache_request.out_cache_driver_in_mem_write = `NO_MEM_WRITE; - assign icache_request.out_cache_driver_in_data = 32'b0; + // Icache Request + assign VX_icache_req.core_req_valid = valid_inst && !total_freeze; + assign VX_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc; + assign VX_icache_req.core_req_writedata = 32'b0; + assign VX_icache_req.core_req_mem_read = `LW_MEM_READ; + assign VX_icache_req.core_req_mem_write = `NO_MEM_WRITE; + assign VX_icache_req.core_req_rd = 5'b0; + assign VX_icache_req.core_req_wb = 2'b0; + assign VX_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num; + assign VX_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc; + assign fe_inst_meta_id.instruction = VX_icache_rsp.core_wb_readdata[0][31:0]; + assign fe_inst_meta_id.inst_pc = VX_icache_rsp.core_wb_pc[0]; + assign fe_inst_meta_id.warp_num = VX_icache_rsp.core_wb_warp_num; + assign fe_inst_meta_id.valid = VX_icache_rsp.core_wb_valid ? threads_active[VX_icache_rsp.core_wb_warp_num] : 0; - assign icache_stage_delay = icache_response.delay; + assign icache_stage_wid = fe_inst_meta_id.warp_num; + assign icache_stage_valids = fe_inst_meta_id.valid & {`NT{!icache_stage_delay}}; - assign fe_inst_meta_id.instruction = (!valid_inst || icache_response.delay) ? 32'b0 : icache_response.instruction; - assign fe_inst_meta_id.inst_pc = fe_inst_meta_fi.inst_pc; - assign fe_inst_meta_id.warp_num = fe_inst_meta_fi.warp_num; - assign fe_inst_meta_id.valid = fe_inst_meta_fi.valid & {`NT{!icache_stage_delay}}; + // Cache can't accept request + assign icache_stage_delay = VX_icache_rsp.delay_req; + + // Core can't accept response + assign VX_icache_req.core_no_wb_slot = total_freeze; + + integer curr_w; + always @(posedge clk) begin + if (reset) begin + for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) threads_active[curr_w] <= 0; + end else begin + if (valid_inst && !icache_stage_delay) begin + threads_active[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid; + end + end + end - assign icache_stage_wid = fe_inst_meta_fi.warp_num; - assign icache_stage_valids = fe_inst_meta_fi.valid & {`NT{!icache_stage_delay}}; endmodule \ No newline at end of file