diff --git a/rtl/Makefile b/rtl/Makefile index c9b2a3fb..5fb2eb5b 100644 --- a/rtl/Makefile +++ b/rtl/Makefile @@ -46,5 +46,8 @@ debug: compdebug w: VERILATORnoWarnings $(MAKECPP) +run: w + (cd obj_dir && ./VVortex) + clean: rm obj_dir/* diff --git a/rtl/VX_cache/VX_bank.v b/rtl/VX_cache/VX_bank.v index 6e1468f6..f12018da 100644 --- a/rtl/VX_cache/VX_bank.v +++ b/rtl/VX_cache/VX_bank.v @@ -50,11 +50,12 @@ module VX_bank // Input Core Request input wire delay_req, - input wire [NUMBER_REQUESTS-1:0] bank_valids, - input wire [NUMBER_REQUESTS-1:0][31:0] bank_addr, - input wire [NUMBER_REQUESTS-1:0][31:0] bank_writedata, + input wire [NUMBER_REQUESTS-1:0] bank_valids, + input wire [NUMBER_REQUESTS-1:0][31:0] bank_addr, + input wire [NUMBER_REQUESTS-1:0][31:0] bank_writedata, input wire [4:0] bank_rd, input wire [1:0] bank_wb, + input wire [31:0] bank_pc, input wire [`NW_M1:0] bank_warp_num, input wire [2:0] bank_mem_read, input wire [2:0] bank_mem_write, @@ -63,11 +64,12 @@ module VX_bank // Output Core WB input wire bank_wb_pop, output wire bank_wb_valid, - output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid, + output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid, output wire [4:0] bank_wb_rd, output wire [1:0] bank_wb_wb, output wire [`NW_M1:0] bank_wb_warp_num, output wire [31:0] bank_wb_data, + output wire [31:0] bank_wb_pc, // Dram Fill Requests output wire dram_fill_req, @@ -158,6 +160,7 @@ module VX_bank wire [2:0] reqq_req_mem_read_st0; wire [2:0] reqq_req_mem_write_st0; reg reqq_hazard_st0; + wire [31:0] reqq_req_pc_st0; assign reqq_push = !delay_req && (|bank_valids); @@ -189,6 +192,7 @@ module VX_bank .bank_addr (bank_addr), .bank_writedata (bank_writedata), .bank_rd (bank_rd), + .bank_pc (bank_pc), .bank_wb (bank_wb), .bank_warp_num (bank_warp_num), .bank_mem_read (bank_mem_read), @@ -205,6 +209,7 @@ module VX_bank .reqq_req_warp_num_st0 (reqq_req_warp_num_st0), .reqq_req_mem_read_st0 (reqq_req_mem_read_st0), .reqq_req_mem_write_st0(reqq_req_mem_write_st0), + .reqq_req_pc_st0 (reqq_req_pc_st0), .reqq_empty (reqq_empty), .reqq_full (reqq_full) ); @@ -217,6 +222,7 @@ module VX_bank wire [31:0] mrvq_writeword_st0; wire [4:0] mrvq_rd_st0; wire [1:0] mrvq_wb_st0; + wire [31:0] miss_resrv_pc_st0; wire [`NW_M1:0] mrvq_warp_num_st0; wire [2:0] mrvq_mem_read_st0; wire [2:0] mrvq_mem_write_st0; @@ -225,14 +231,16 @@ module VX_bank wire miss_add; wire[31:0] miss_add_addr; wire[31:0] miss_add_data; - wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid; + wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid; wire[4:0] miss_add_rd; wire[1:0] miss_add_wb; wire[`NW_M1:0] miss_add_warp_num; wire[2:0] miss_add_mem_read; wire[2:0] miss_add_mem_write; - VX_cache_miss_resrv #( + wire[31:0] miss_add_pc; + + VX_cache_miss_resrv #( .CACHE_SIZE_BYTES (CACHE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES), .NUMBER_BANKS (NUMBER_BANKS), @@ -264,6 +272,7 @@ module VX_bank .miss_add_warp_num (miss_add_warp_num), .miss_add_mem_read (miss_add_mem_read), .miss_add_mem_write (miss_add_mem_write), + .miss_add_pc (miss_add_pc), .miss_resrv_full (mrvq_full), // Broadcast @@ -278,6 +287,7 @@ module VX_bank .miss_resrv_tid_st0 (mrvq_tid_st0), .miss_resrv_rd_st0 (mrvq_rd_st0), .miss_resrv_wb_st0 (mrvq_wb_st0), + .miss_resrv_pc_st0 (miss_resrv_pc_st0), .miss_resrv_warp_num_st0 (mrvq_warp_num_st0), .miss_resrv_mem_read_st0 (mrvq_mem_read_st0), .miss_resrv_mem_write_st0(mrvq_mem_write_st0) @@ -320,6 +330,7 @@ module VX_bank wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0; wire qual_going_to_write_st0; wire qual_is_snp; + wire [31:0] qual_pc_st0; wire valid_st1 [STAGE_1_CYCLES-1:0]; wire going_to_write_st1[STAGE_1_CYCLES-1:0]; @@ -329,6 +340,7 @@ module VX_bank wire is_fill_st1 [STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_SIZE_RNG][31:0] writedata_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0]; + wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0]; assign qual_is_fill_st0 = dfpq_pop; assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; @@ -355,27 +367,32 @@ module VX_bank (snrq_pop) ? 1 : 0; + assign qual_pc_st0 = (reqq_pop) ? reqq_req_pc_st0 : + (mrvq_pop) ? miss_resrv_pc_st0 : + (dfpq_pop) ? 32'hdeadbeef : + (snrq_pop) ? 32'hb00b0000 : + 32'h0; assign qual_is_snp = snrq_pop ? 1 : 0; - VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_c0 ( + VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1 + 32)) s0_1_c0 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({qual_is_snp , qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({is_snp_st1[0], going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0]}) + .in ({qual_is_snp , qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0, qual_pc_st0 }), + .out ({is_snp_st1[0], going_to_write_st1[0] , valid_st1[0] , addr_st1[0] , writeword_st1[0] , inst_meta_st1[0] , is_fill_st1[0] , writedata_st1[0] , pc_st1[0]}) ); genvar curr_stage; generate for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin - VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1)) s0_1_cc ( + VX_generic_register #(.N( 1 + 1 + 1 + 32 + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*32) + 1 + 32)) s0_1_cc ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({is_snp_st1[curr_stage-1], going_to_write_st1[curr_stage-1], valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1]}), - .out ({is_snp_st1[curr_stage] , going_to_write_st1[curr_stage] , valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] }) + .in ({is_snp_st1[curr_stage-1], going_to_write_st1[curr_stage-1], valid_st1[curr_stage-1], addr_st1[curr_stage-1], writeword_st1[curr_stage-1], inst_meta_st1[curr_stage-1], is_fill_st1[curr_stage-1] , writedata_st1[curr_stage-1], pc_st1[curr_stage-1]}), + .out ({is_snp_st1[curr_stage] , going_to_write_st1[curr_stage] , valid_st1[curr_stage] , addr_st1[curr_stage] , writeword_st1[curr_stage] , inst_meta_st1[curr_stage] , is_fill_st1[curr_stage] , writedata_st1[curr_stage] , pc_st1[curr_stage]}) ); end endgenerate @@ -386,6 +403,7 @@ module VX_bank wire[`TAG_SELECT_SIZE_RNG] readtag_st1e; wire miss_st1e; wire dirty_st1e; + wire[31:0] pc_st1e; wire [4:0] rd_st1e; @@ -398,7 +416,7 @@ module VX_bank wire is_snp_st1e; assign is_snp_st1e = is_snp_st1[STAGE_1_CYCLES-1]; - + assign pc_st1e = pc_st1[STAGE_1_CYCLES-1]; assign {rd_st1e, wb_st1e, warp_num_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; @@ -464,14 +482,15 @@ module VX_bank wire is_fill_st2; wire fill_saw_dirty_st2; wire is_snp_st2; + wire [31:0] pc_st2; - VX_generic_register #(.N( 1 + 1 + 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS)) st_1e_2 ( + VX_generic_register #(.N( 1 + 1 + 1 + 1 + 32 + 32 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1 + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32)) st_1e_2 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), - .out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) + .in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, pc_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , pc_st2 , inst_meta_st2 }) ); @@ -485,23 +504,24 @@ module VX_bank // Enqueue to CWB Queue wire cwbq_push = (valid_st2 && !miss_st2); wire [31:0] cwbq_data = readword_st2; - wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; + wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [4:0] cwbq_rd = miss_add_rd; wire [1:0] cwbq_wb = miss_add_wb; wire [`NW_M1:0] cwbq_warp_num = miss_add_warp_num; + wire [31:0] cwbq_pc = pc_st2; wire cwbq_full; wire cwbq_empty; assign bank_wb_valid = !cwbq_empty; - VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + 32), .SIZE(CWBQ_SIZE)) cwb_queue( + VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue( .clk (clk), .reset (reset), .push (cwbq_push), - .in_data ({cwbq_tid, cwbq_rd, cwbq_wb, cwbq_warp_num, cwbq_data}), + .in_data ({cwbq_tid, cwbq_rd, cwbq_wb, cwbq_warp_num, cwbq_data, cwbq_pc}), .pop (bank_wb_pop), - .out_data({bank_wb_tid, bank_wb_rd, bank_wb_wb, bank_wb_warp_num, bank_wb_data}), + .out_data({bank_wb_tid, bank_wb_rd, bank_wb_wb, bank_wb_warp_num, bank_wb_data, bank_wb_pc}), .empty (cwbq_empty), .full (cwbq_full) ); @@ -552,7 +572,7 @@ module VX_bank assign dram_fill_req_addr = addr_st2; assign dram_wb_req = !dwbq_empty; - VX_generic_queue_ll #(.DATAW( 1 + 32 + (`BANK_LINE_SIZE_WORDS * 32) + 1 + 1), .SIZE(DWBQ_SIZE)) dwb_queue( + VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * 32)), .SIZE(DWBQ_SIZE)) dwb_queue( .clk (clk), .reset (reset), @@ -571,7 +591,7 @@ module VX_bank wire llvq_full; wire llvq_push = valid_st2 && !miss_st2; wire[`BANK_LINE_SIZE_RNG][31:0] llvq_push_data = readdata_st2; - wire llvq_addr = addr_st2; + wire[31:0] llvq_addr = addr_st2; wire[`vx_clog2(NUMBER_REQUESTS)-1:0] llvq_tid = miss_add_tid; assign llvq_valid = !llvq_empty; diff --git a/rtl/VX_cache/VX_cache.v b/rtl/VX_cache/VX_cache.v index e00680ef..c8f5d352 100644 --- a/rtl/VX_cache/VX_cache.v +++ b/rtl/VX_cache/VX_cache.v @@ -59,15 +59,17 @@ module VX_cache input wire [4:0] core_req_rd, input wire [1:0] core_req_wb, input wire [`NW_M1:0] core_req_warp_num, + input wire [31:0] core_req_pc, output wire delay_req, // Core Writeback input wire core_no_wb_slot, - output wire [NUMBER_REQUESTS-1:0] core_wb_valid, + output wire [NUMBER_REQUESTS-1:0] core_wb_valid, output wire [4:0] core_wb_req_rd, output wire [1:0] core_wb_req_wb, output wire [`NW_M1:0] core_wb_warp_num, - output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata, + output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata, + output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, // Dram Fill Response @@ -93,24 +95,25 @@ module VX_cache // Lower Level Cache input wire llvq_pop, - output wire[NUMBER_REQUESTS-1:0] llvq_valid, - output wire[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, + output wire[NUMBER_REQUESTS-1:0] llvq_valid, + output wire[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, output wire[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data ); - wire [NUMBER_BANKS-1:0][NUMBER_REQUESTS-1:0] per_bank_valids; + wire [NUMBER_BANKS-1:0][NUMBER_REQUESTS-1:0] per_bank_valids; wire [NUMBER_BANKS-1:0] per_bank_wb_pop; wire [NUMBER_BANKS-1:0] per_bank_wb_valid; - wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid; + wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid; wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd; wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb; wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num; wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_data; + wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc; - wire dfqq_full; + wire dfqq_full; wire[NUMBER_BANKS-1:0] per_bank_dram_fill_req; wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_fill_req_addr; wire[NUMBER_BANKS-1:0] per_bank_dram_fill_accept; @@ -128,7 +131,7 @@ module VX_cache wire[NUMBER_BANKS-1:0] per_bank_llvq_valid; wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr; wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_llvq_res_data; - wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid; + wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid; assign delay_req = (|per_bank_reqq_full); @@ -256,6 +259,7 @@ module VX_cache .per_bank_wb_valid (per_bank_wb_valid), .per_bank_wb_tid (per_bank_wb_tid), .per_bank_wb_rd (per_bank_wb_rd), + .per_bank_wb_pc (per_bank_wb_pc), .per_bank_wb_wb (per_bank_wb_wb), .per_bank_wb_warp_num(per_bank_wb_warp_num), .per_bank_wb_data (per_bank_wb_data), @@ -266,7 +270,8 @@ module VX_cache .core_wb_req_rd (core_wb_req_rd), .core_wb_req_wb (core_wb_req_wb), .core_wb_warp_num (core_wb_warp_num), - .core_wb_readdata (core_wb_readdata) + .core_wb_readdata (core_wb_readdata), + .core_wb_pc (core_wb_pc) ); genvar curr_bank; @@ -280,10 +285,12 @@ module VX_cache wire [`NW_M1:0] curr_bank_warp_num; wire [2:0] curr_bank_mem_read; wire [2:0] curr_bank_mem_write; + wire [31:0] curr_bank_pc; wire curr_bank_wb_pop; wire curr_bank_wb_valid; - wire [`vx_clog2(NUMBER_REQUESTS)-1:0] curr_bank_wb_tid; + wire [`vx_clog2(NUMBER_REQUESTS)-1:0] curr_bank_wb_tid; + wire [31:0] curr_bank_wb_pc; wire [4:0] curr_bank_wb_rd; wire [1:0] curr_bank_wb_wb; wire [`NW_M1:0] curr_bank_wb_warp_num; @@ -324,6 +331,7 @@ module VX_cache assign curr_bank_writedata = core_req_writedata; assign curr_bank_rd = core_req_rd; assign curr_bank_wb = core_req_wb; + assign curr_bank_pc = core_req_pc; assign curr_bank_warp_num = core_req_warp_num; assign curr_bank_mem_read = core_req_mem_read; assign curr_bank_mem_write = core_req_mem_write; @@ -337,6 +345,7 @@ module VX_cache assign per_bank_wb_wb [curr_bank] = curr_bank_wb_wb; assign per_bank_wb_warp_num[curr_bank] = curr_bank_wb_warp_num; assign per_bank_wb_data [curr_bank] = curr_bank_wb_data; + assign per_bank_wb_pc [curr_bank] = curr_bank_wb_pc; // Dram fill request assign curr_bank_dfqq_full = dfqq_full; @@ -397,6 +406,7 @@ module VX_cache .bank_writedata (curr_bank_writedata), .bank_rd (curr_bank_rd), .bank_wb (curr_bank_wb), + .bank_pc (curr_bank_pc), .bank_warp_num (curr_bank_warp_num), .bank_mem_read (curr_bank_mem_read), .bank_mem_write (curr_bank_mem_write), @@ -410,6 +420,7 @@ module VX_cache .bank_wb_wb (curr_bank_wb_wb), .bank_wb_warp_num (curr_bank_wb_warp_num), .bank_wb_data (curr_bank_wb_data), + .bank_wb_pc (curr_bank_wb_pc), // Dram fill req .dram_fill_req (curr_bank_dram_fill_req), diff --git a/rtl/VX_cache/VX_cache_config.v b/rtl/VX_cache/VX_cache_config.v index 0757d15a..5dba0c7e 100644 --- a/rtl/VX_cache/VX_cache_config.v +++ b/rtl/VX_cache/VX_cache_config.v @@ -46,12 +46,14 @@ // `vx_clog2_h(value, 31) : \ // 0 - +// 128 `define BANK_SIZE_BYTES CACHE_SIZE_BYTES/NUMBER_BANKS - +// 8 `define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES) -`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / NUMBER_BANKS) +// 4 +`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES) +// 3:0 `define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0 // Offset is fixed @@ -62,31 +64,54 @@ `define OFFSET_ADDR_RNG `OFFSET_ADDR_END:`OFFSET_ADDR_START `define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0 -`define WORD_SELECT_NUM_BITS $clog2(`BANK_LINE_SIZE_WORDS) -`define WORD_SELECT_SIZE_END `WORD_SELECT_NUM_BITS -`define WORD_SELECT_ADDR_START 1+`OFFSET_ADDR_END -`define WORD_SELECT_ADDR_END `WORD_SELECT_SIZE_END+`OFFSET_ADDR_END +// 2 +`define WORD_SELECT_NUM_BITS ($clog2(`BANK_LINE_SIZE_WORDS)) +// 2 +`define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS) +// 2 +`define WORD_SELECT_ADDR_START (1+`OFFSET_ADDR_END) +// 3 +`define WORD_SELECT_ADDR_END (`WORD_SELECT_SIZE_END+`OFFSET_ADDR_END) +// 3:2 `define WORD_SELECT_ADDR_RNG `WORD_SELECT_ADDR_END:`WORD_SELECT_ADDR_START `define WORD_SELECT_SIZE_RNG `WORD_SELECT_SIZE_END-1:0 -`define BANK_SELECT_NUM_BITS $clog2(NUMBER_BANKS) -`define BANK_SELECT_SIZE_END `BANK_SELECT_NUM_BITS -`define BANK_SELECT_ADDR_START 1+`WORD_SELECT_ADDR_END -`define BANK_SELECT_ADDR_END `BANK_SELECT_SIZE_END+`BANK_SELECT_ADDR_START +// 3 +`define BANK_SELECT_NUM_BITS ($clog2(NUMBER_BANKS)) +// 3 +`define BANK_SELECT_SIZE_END (`BANK_SELECT_NUM_BITS) +// 4 +`define BANK_SELECT_ADDR_START (1+`WORD_SELECT_ADDR_END) +// 6 +`define BANK_SELECT_ADDR_END (`BANK_SELECT_SIZE_END+`BANK_SELECT_ADDR_START-1) +// 6:4 `define BANK_SELECT_ADDR_RNG `BANK_SELECT_ADDR_END:`BANK_SELECT_ADDR_START +// 2:0 `define BANK_SELECT_SIZE_RNG `BANK_SELECT_SIZE_END-1:0 -`define LINE_SELECT_NUM_BITS $clog2(`BANK_LINE_COUNT) -`define LINE_SELECT_SIZE_END `LINE_SELECT_NUM_BITS -`define LINE_SELECT_ADDR_START 1+`BANK_SELECT_ADDR_END -`define LINE_SELECT_ADDR_END `LINE_SELECT_SIZE_END+`LINE_SELECT_ADDR_START +// 3 +`define LINE_SELECT_NUM_BITS ($clog2(`BANK_LINE_COUNT)) +// 3 +`define LINE_SELECT_SIZE_END (`LINE_SELECT_NUM_BITS) +// 7 +`define LINE_SELECT_ADDR_START (1+`BANK_SELECT_ADDR_END) +// 9 +`define LINE_SELECT_ADDR_END (`LINE_SELECT_SIZE_END+`LINE_SELECT_ADDR_START-1) +// 9:7 `define LINE_SELECT_ADDR_RNG `LINE_SELECT_ADDR_END:`LINE_SELECT_ADDR_START +// 2:0 `define LINE_SELECT_SIZE_RNG `LINE_SELECT_SIZE_END-1:0 -`define TAG_SELECT_NUM_BITS 32-(`OFFSET_ADDR_NUM_BITS + `WORD_SELECT_NUM_BITS + `BANK_SELECT_NUM_BITS + `LINE_SELECT_NUM_BITS) -`define TAG_SELECT_SIZE_END `TAG_SELECT_NUM_BITS -`define TAG_SELECT_ADDR_START 1+`LINE_SELECT_ADDR_END + +// 10 +`define TAG_SELECT_ADDR_START (1+`LINE_SELECT_ADDR_END) +// 31:10 `define TAG_SELECT_ADDR_RNG 31:`TAG_SELECT_ADDR_START +// 22 +`define TAG_SELECT_NUM_BITS (32-`TAG_SELECT_ADDR_START) +// 22 +`define TAG_SELECT_SIZE_END (`TAG_SELECT_NUM_BITS) +// 21:0 `define TAG_SELECT_SIZE_RNG `TAG_SELECT_SIZE_END-1:0 diff --git a/rtl/VX_cache/VX_cache_miss_resrv.v b/rtl/VX_cache/VX_cache_miss_resrv.v index 9c2be799..d175aeeb 100644 --- a/rtl/VX_cache/VX_cache_miss_resrv.v +++ b/rtl/VX_cache/VX_cache_miss_resrv.v @@ -53,12 +53,13 @@ module VX_cache_miss_resrv input wire miss_add, input wire[31:0] miss_add_addr, input wire[31:0] miss_add_data, - input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid, + input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid, input wire[4:0] miss_add_rd, input wire[1:0] miss_add_wb, input wire[`NW_M1:0] miss_add_warp_num, input wire[2:0] miss_add_mem_read, input wire[2:0] miss_add_mem_write, + input wire[31:0] miss_add_pc, output wire miss_resrv_full, // Broadcast Fill @@ -75,17 +76,19 @@ module VX_cache_miss_resrv output wire[1:0] miss_resrv_wb_st0, output wire[`NW_M1:0] miss_resrv_warp_num_st0, output wire[2:0] miss_resrv_mem_read_st0, + output wire[31:0] miss_resrv_pc_st0, output wire[2:0] miss_resrv_mem_write_st0 ); // Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0]; - reg[MRVQ_SIZE-1:0][31:0] addr_table; - reg[MRVQ_SIZE-1:0] valid_table; - reg[MRVQ_SIZE-1:0] ready_table; - reg[`vx_clog2(MRVQ_SIZE)-1:0] head_ptr; - reg[`vx_clog2(MRVQ_SIZE)-1:0] tail_ptr; + reg[MRVQ_SIZE-1:0][31:0] addr_table; + reg[MRVQ_SIZE-1:0][31:0] pc_table; + reg[MRVQ_SIZE-1:0] valid_table; + reg[MRVQ_SIZE-1:0] ready_table; + reg[`vx_clog2(MRVQ_SIZE)-1:0] head_ptr; + reg[`vx_clog2(MRVQ_SIZE)-1:0] tail_ptr; assign miss_resrv_full = (MRVQ_SIZE != 2) && (tail_ptr+1) == head_ptr; @@ -108,7 +111,7 @@ module VX_cache_miss_resrv wire[`vx_clog2(MRVQ_SIZE)-1:0] dequeue_index = head_ptr; assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible; - + assign miss_resrv_pc_st0 = pc_table[dequeue_index]; assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index]; @@ -120,10 +123,12 @@ module VX_cache_miss_resrv valid_table <= 0; ready_table <= 0; addr_table <= 0; + pc_table <= 0; end else begin if (miss_add && enqueue_possible && (MRVQ_SIZE != 2)) begin valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= 0; + pc_table[enqueue_index] <= miss_add_pc; addr_table[enqueue_index] <= miss_add_addr; metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write}; tail_ptr <= tail_ptr + 1; @@ -138,6 +143,7 @@ module VX_cache_miss_resrv ready_table[dequeue_index] <= 0; addr_table[dequeue_index] <= 0; metadata_table[dequeue_index] <= 0; + pc_table[dequeue_index] <= 0; head_ptr <= head_ptr + 1; end diff --git a/rtl/VX_cache/VX_cache_req_queue.v b/rtl/VX_cache/VX_cache_req_queue.v index cbc4ce67..d9c71294 100644 --- a/rtl/VX_cache/VX_cache_req_queue.v +++ b/rtl/VX_cache/VX_cache_req_queue.v @@ -58,6 +58,7 @@ module VX_cache_req_queue input wire [`NW_M1:0] bank_warp_num, input wire [2:0] bank_mem_read, input wire [2:0] bank_mem_write, + input wire [31:0] bank_pc, // Dequeue Data input wire reqq_pop, @@ -70,6 +71,7 @@ module VX_cache_req_queue output wire [`NW_M1:0] reqq_req_warp_num_st0, output wire [2:0] reqq_req_mem_read_st0, output wire [2:0] reqq_req_mem_write_st0, + output wire [31:0] reqq_req_pc_st0, // State Data output wire reqq_empty, @@ -84,6 +86,7 @@ module VX_cache_req_queue wire [`NW_M1:0] out_per_warp_num; wire [2:0] out_per_mem_read; wire [2:0] out_per_mem_write; + wire [31:0] out_per_pc; reg [NUMBER_REQUESTS-1:0] use_per_valids; @@ -91,6 +94,7 @@ module VX_cache_req_queue reg [NUMBER_REQUESTS-1:0][31:0] use_per_writedata; reg [4:0] use_per_rd; reg [1:0] use_per_wb; + reg [31:0] use_per_pc; reg [`NW_M1:0] use_per_warp_num; reg [2:0] use_per_mem_read; reg [2:0] use_per_mem_write; @@ -104,6 +108,7 @@ module VX_cache_req_queue wire [`NW_M1:0] qual_warp_num; wire [2:0] qual_mem_read; wire [2:0] qual_mem_write; + wire [31:0] qual_pc; wire[NUMBER_REQUESTS-1:0] updated_valids; @@ -115,13 +120,13 @@ module VX_cache_req_queue wire push_qual = reqq_push && !reqq_full; wire pop_qual = reqq_pop && use_empty && !out_empty; - VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+32)) + 5 + 2 + (`NW_M1+1) + 3 + 3 ), .SIZE(REQQ_SIZE)) reqq_queue( + VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+32)) + 5 + 2 + (`NW_M1+1) + 3 + 3 + 32 ), .SIZE(REQQ_SIZE)) reqq_queue( .clk (clk), .reset (reset), .push (push_qual), - .in_data ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write}), + .in_data ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write , bank_pc}), .pop (pop_qual), - .out_data({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write}), + .out_data({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write, out_per_pc}), .empty (o_empty), .full (reqq_full) ); @@ -137,6 +142,7 @@ module VX_cache_req_queue assign qual_warp_num = use_empty ? out_per_warp_num : use_per_warp_num; assign qual_mem_read = use_empty ? out_per_mem_read : use_per_mem_read; assign qual_mem_write = use_empty ? out_per_mem_write : use_per_mem_write; + assign qual_pc = use_empty ? out_per_pc : use_per_pc; wire[`vx_clog2(NUMBER_REQUESTS)-1:0] qual_request_index; wire qual_has_request; @@ -156,6 +162,7 @@ module VX_cache_req_queue assign reqq_req_warp_num_st0 = qual_warp_num; assign reqq_req_mem_read_st0 = qual_mem_read; assign reqq_req_mem_write_st0 = qual_mem_write; + assign reqq_req_pc_st0 = qual_pc; assign updated_valids = qual_valids & (~(1 << qual_request_index)); @@ -169,6 +176,7 @@ module VX_cache_req_queue use_per_warp_num <= 0; use_per_mem_read <= 0; use_per_mem_write <= 0; + use_per_pc <= 0; end else begin if (reqq_pop && qual_has_request) begin use_per_valids <= updated_valids; @@ -179,6 +187,7 @@ module VX_cache_req_queue use_per_warp_num <= qual_warp_num; use_per_mem_read <= qual_mem_read; use_per_mem_write <= qual_mem_write; + use_per_pc <= qual_pc; end // else if (reqq_pop) begin // use_per_valids[qual_request_index] <= updated_valids; diff --git a/rtl/VX_cache/VX_cache_wb_sel_merge.v b/rtl/VX_cache/VX_cache_wb_sel_merge.v index 99048f0a..99c5d815 100644 --- a/rtl/VX_cache/VX_cache_wb_sel_merge.v +++ b/rtl/VX_cache/VX_cache_wb_sel_merge.v @@ -48,18 +48,20 @@ module VX_cache_wb_sel_merge // Per Bank WB input wire [NUMBER_BANKS-1:0] per_bank_wb_valid, - input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid, + input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid, input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd, input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb, input wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num, input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_data, + input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc, output wire [NUMBER_BANKS-1:0] per_bank_wb_pop, // Core Writeback input wire core_no_wb_slot, - output reg [NUMBER_REQUESTS-1:0] core_wb_valid, - output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata, + output reg [NUMBER_REQUESTS-1:0] core_wb_valid, + output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata, + output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, output wire [4:0] core_wb_req_rd, output wire [1:0] core_wb_req_wb, output wire [`NW_M1:0] core_wb_warp_num @@ -96,10 +98,12 @@ module VX_cache_wb_sel_merge always @(*) begin core_wb_valid = 0; core_wb_readdata = 0; + core_wb_pc = 0; for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin if (found_bank && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin core_wb_valid[per_bank_wb_tid[this_bank]] = 1; core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; + core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; per_bank_wb_pop_unqual[this_bank] = 1; end else begin per_bank_wb_pop_unqual[this_bank] = 0; diff --git a/rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v b/rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v index 61dec330..ef0863e8 100644 --- a/rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v +++ b/rtl/VX_cache/VX_dcache_llv_resp_bank_sel.v @@ -52,8 +52,8 @@ module VX_dcache_llv_resp_bank_sel input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid, input wire llvq_pop, - output reg[NUMBER_REQUESTS-1:0] llvq_valid, - output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, + output reg[NUMBER_REQUESTS-1:0] llvq_valid, + output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data @@ -75,10 +75,10 @@ module VX_dcache_llv_resp_bank_sel llvq_res_data = 0; per_bank_llvq_pop = 0; if (found_bank && llvq_pop) begin - llvq_valid [per_bank_llvq_res_tid] = 1; - llvq_res_addr[per_bank_llvq_res_tid] = per_bank_llvq_res_addr[main_bank_index]; - llvq_res_data[per_bank_llvq_res_tid] = per_bank_llvq_res_data[main_bank_index]; - per_bank_llvq_pop[main_bank_index] = 1; + llvq_valid [per_bank_llvq_res_tid[main_bank_index]] = 1'b1; + llvq_res_addr[per_bank_llvq_res_tid[main_bank_index]] = per_bank_llvq_res_addr[main_bank_index]; + llvq_res_data[per_bank_llvq_res_tid[main_bank_index]] = per_bank_llvq_res_data[main_bank_index]; + per_bank_llvq_pop[main_bank_index] = 1'b1; end end diff --git a/rtl/VX_cache/VX_fill_invalidator.v b/rtl/VX_cache/VX_fill_invalidator.v index decdfb15..33e6eead 100644 --- a/rtl/VX_cache/VX_fill_invalidator.v +++ b/rtl/VX_cache/VX_fill_invalidator.v @@ -69,7 +69,7 @@ module VX_fill_invalidator reg success_found; - reg[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] success_index; + reg[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0] success_index; integer curr_fill; always @(*) begin @@ -85,7 +85,7 @@ module VX_fill_invalidator if (success_fill) begin success_found = 1; - success_index = curr_fill; + success_index = curr_fill[(`vx_clog2(FILL_INVALIDAOR_SIZE))-1:0]; end end end diff --git a/rtl/VX_cache/VX_tag_data_structure.v b/rtl/VX_cache/VX_tag_data_structure.v index c817698d..d869f35e 100644 --- a/rtl/VX_cache/VX_tag_data_structure.v +++ b/rtl/VX_cache/VX_tag_data_structure.v @@ -69,6 +69,8 @@ module VX_tag_data_structure reg dirty[`BANK_LINE_COUNT-1:0]; + wire[`TAG_SELECT_SIZE_RNG] kkkkkk = write_addr[`TAG_SELECT_ADDR_RNG]; + assign read_valid = valid[read_addr[`LINE_SELECT_ADDR_RNG]]; assign read_dirty = dirty[read_addr[`LINE_SELECT_ADDR_RNG]]; assign read_tag = tag [read_addr[`LINE_SELECT_ADDR_RNG]]; diff --git a/rtl/VX_define.v b/rtl/VX_define.v index b2938dcf..ab496034 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -230,7 +230,7 @@ `define DSTAGE_1_CYCLES 2 // Bank Number of words in a line - `define DBANK_LINE_SIZE_WORDS (`DBANK_LINE_SIZE_BYTES / `DNUMBER_BANKS) + `define DBANK_LINE_SIZE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES) `define DBANK_LINE_SIZE_RNG `DBANK_LINE_SIZE_WORDS-1:0 // Queues feeding into banks Knobs {1, 2, 4, 8, ...} diff --git a/rtl/VX_dmem_controller.v b/rtl/VX_dmem_controller.v index 0a3cecd4..451f65d4 100644 --- a/rtl/VX_dmem_controller.v +++ b/rtl/VX_dmem_controller.v @@ -54,9 +54,9 @@ module VX_dmem_controller ( .SM_BLOCK_OFFSET_END (`SHARED_MEMORY_BLOCK_OFFSET_ED), .SM_INDEX_START (`SHARED_MEMORY_INDEX_OFFSET_ST), .SM_INDEX_END (`SHARED_MEMORY_INDEX_OFFSET_ED), - .SM_HEIGHT (`SHARED_MEMORY_HEIGHT), + .SM_HEIGHT (`SHARED_MEMORY_HEIGHT), .NUM_REQ (`SHARED_MEMORY_NUM_REQ), - .BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK) + .BITS_PER_BANK (`SHARED_MEMORY_BITS_PER_BANK) ) shared_memory ( @@ -73,6 +73,12 @@ module VX_dmem_controller ( ); + wire Dllvq_pop; + wire[`DNUMBER_REQUESTS-1:0] Dllvq_valid; + wire[`DNUMBER_REQUESTS-1:0][31:0] Dllvq_res_addr; + wire[`DNUMBER_REQUESTS-1:0][`DBANK_LINE_SIZE_RNG][31:0] Dllvq_res_data; + + assign Dllvq_pop = 0; VX_cache #( .CACHE_SIZE_BYTES (`DCACHE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (`DBANK_LINE_SIZE_BYTES), @@ -105,6 +111,7 @@ module VX_dmem_controller ( .core_req_rd (VX_dcache_req.core_req_rd), .core_req_wb (VX_dcache_req.core_req_wb), .core_req_warp_num (VX_dcache_req.core_req_warp_num), + .core_req_pc (VX_dcache_req.core_req_pc), // Delay Core Req .delay_req (VX_dcache_rsp.delay_req), @@ -118,6 +125,7 @@ module VX_dmem_controller ( .core_wb_req_wb (VX_dcache_rsp.core_wb_req_wb), .core_wb_warp_num (VX_dcache_rsp.core_wb_warp_num), .core_wb_readdata (VX_dcache_rsp.core_wb_readdata), + .core_wb_pc (VX_dcache_rsp.core_wb_pc), // DRAM response .dram_fill_rsp (VX_gpu_dcache_dram_res.dram_fill_rsp), @@ -141,58 +149,16 @@ module VX_dmem_controller ( // Snoop Request .snp_req (0), - .snp_req_addr (0) + .snp_req_addr (0), + + // LLVQ stuff + .llvq_pop (Dllvq_pop), + .llvq_valid (Dllvq_valid), + .llvq_res_addr (Dllvq_res_addr), + .llvq_res_data (Dllvq_res_data) ); - - // VX_d_cache#( - // .CACHE_SIZE (`DCACHE_SIZE), - // .CACHE_WAYS (`DCACHE_WAYS), - // .CACHE_BLOCK (`DCACHE_BLOCK), - // .CACHE_BANKS (`DCACHE_BANKS), - // .LOG_NUM_BANKS (`DCACHE_LOG_NUM_BANKS), - // .NUM_REQ (`DCACHE_NUM_REQ), - // .LOG_NUM_REQ (`DCACHE_LOG_NUM_REQ), - // .NUM_IND (`DCACHE_NUM_IND), - // .CACHE_WAY_INDEX (`DCACHE_WAY_INDEX), - // .NUM_WORDS_PER_BLOCK (`DCACHE_NUM_WORDS_PER_BLOCK), - // .OFFSET_SIZE_START (`DCACHE_OFFSET_ST), - // .OFFSET_SIZE_END (`DCACHE_OFFSET_ED), - // .TAG_SIZE_START (`DCACHE_TAG_SIZE_START), - // .TAG_SIZE_END (`DCACHE_TAG_SIZE_END), - // .IND_SIZE_START (`DCACHE_IND_SIZE_START), - // .IND_SIZE_END (`DCACHE_IND_SIZE_END), - // .ADDR_TAG_START (`DCACHE_ADDR_TAG_START), - // .ADDR_TAG_END (`DCACHE_ADDR_TAG_END), - // .ADDR_OFFSET_START (`DCACHE_ADDR_OFFSET_ST), - // .ADDR_OFFSET_END (`DCACHE_ADDR_OFFSET_ED), - // .ADDR_IND_START (`DCACHE_IND_ST), - // .ADDR_IND_END (`DCACHE_IND_ED), - // .MEM_ADDR_REQ_MASK (`DCACHE_MEM_REQ_ADDR_MASK) - // ) - // dcache - // ( - // .clk (clk), - // .rst (reset), - // .i_p_valid (cache_driver_in_valid), - // .i_p_addr (cache_driver_in_address), - // .i_p_writedata (cache_driver_in_data), - // .i_p_read_or_write (read_or_write), - // .i_p_mem_read (cache_driver_in_mem_read), - // .i_p_mem_write (cache_driver_in_mem_write), - // .o_p_readdata (cache_driver_out_data), - // .o_p_delay (cache_delay), - // .o_m_evict_addr (VX_dram_req_rsp.o_m_evict_addr), - // .o_m_read_addr (VX_dram_req_rsp.o_m_read_addr), - // .o_m_valid (VX_dram_req_rsp.o_m_valid), - // .o_m_writedata (VX_dram_req_rsp.o_m_writedata), - // .o_m_read_or_write (VX_dram_req_rsp.o_m_read_or_write), - // .i_m_readdata (VX_dram_req_rsp.i_m_readdata), - // .i_m_ready (VX_dram_req_rsp.i_m_ready) - // ); - - VX_d_cache #( .CACHE_SIZE (`ICACHE_SIZE), .CACHE_WAYS (`ICACHE_WAYS), diff --git a/rtl/VX_generic_queue_ll.v b/rtl/VX_generic_queue_ll.v index a54a2403..4ffe34e6 100644 --- a/rtl/VX_generic_queue_ll.v +++ b/rtl/VX_generic_queue_ll.v @@ -81,6 +81,12 @@ module VX_generic_queue_ll end end end + + always @(posedge clk) begin + if (writing) begin + data[wr_ctr_r] <= in_data; + end + end always @(posedge clk) begin if (reset) begin @@ -98,19 +104,17 @@ module VX_generic_queue_ll end end - if (!(!reading && bypass_r)) begin - bypass_r <= writing && (empty_r || (1 == size_r && reading)); - curr_r <= in_data; - end + bypass_r <= writing && (empty_r || (1 == size_r) && reading); + curr_r <= in_data; head_r <= data[reading ? rd_next_ptr_r : rd_ptr_r]; end end - + assign out_data = bypass_r ? curr_r : head_r; assign empty = empty_r; assign full = full_r; end - + end diff --git a/rtl/VX_lsu.v b/rtl/VX_lsu.v index c177480e..0081dba8 100644 --- a/rtl/VX_lsu.v +++ b/rtl/VX_lsu.v @@ -54,6 +54,7 @@ module VX_lsu ( assign VX_dcache_req.core_req_rd = use_rd; assign VX_dcache_req.core_req_wb = use_wb; assign VX_dcache_req.core_req_warp_num = use_warp_num; + assign VX_dcache_req.core_req_pc = use_pc; // Cache can't accept request assign out_delay = VX_dcache_rsp.delay_req; @@ -64,7 +65,7 @@ module VX_lsu ( assign VX_mem_wb.wb_valid = VX_dcache_rsp.core_wb_valid; assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num; assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata; - assign VX_mem_wb.mem_wb_pc = 32'hdeadbeff; + assign VX_mem_wb.mem_wb_pc = VX_dcache_rsp.core_wb_pc[0]; // Core can't accept response assign VX_dcache_req.core_no_wb_slot = no_slot_mem; diff --git a/rtl/interfaces/VX_gpu_dcache_req_inter.v b/rtl/interfaces/VX_gpu_dcache_req_inter.v index 003ea4c1..2c37f355 100644 --- a/rtl/interfaces/VX_gpu_dcache_req_inter.v +++ b/rtl/interfaces/VX_gpu_dcache_req_inter.v @@ -21,6 +21,7 @@ interface VX_gpu_dcache_req_inter wire [4:0] core_req_rd; wire [1:0] core_req_wb; wire [`NW_M1:0] core_req_warp_num; + wire [31:0] core_req_pc; // Can't WB wire core_no_wb_slot; diff --git a/rtl/interfaces/VX_gpu_dcache_res_inter.v b/rtl/interfaces/VX_gpu_dcache_res_inter.v index bd32e801..59f5168e 100644 --- a/rtl/interfaces/VX_gpu_dcache_res_inter.v +++ b/rtl/interfaces/VX_gpu_dcache_res_inter.v @@ -13,11 +13,12 @@ interface VX_gpu_dcache_res_inter (); // Cache WB - wire [NUMBER_REQUESTS-1:0] core_wb_valid; + wire [NUMBER_REQUESTS-1:0] core_wb_valid; wire [4:0] core_wb_req_rd; wire [1:0] core_wb_req_wb; wire [`NW_M1:0] core_wb_warp_num; - wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata; + wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata; + wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc; // Cache Full wire delay_req; diff --git a/rtl/unit_tests/generic_queue/testbench.v b/rtl/unit_tests/generic_queue/testbench.v index a406abd5..b2dc8720 100644 --- a/rtl/unit_tests/generic_queue/testbench.v +++ b/rtl/unit_tests/generic_queue/testbench.v @@ -10,14 +10,9 @@ module testbench(); reg[3:0] in_data; reg push; reg pop; - wire io_enq_ready; wire[3:0] out_data; - wire io_deq_valid; - - wire full, empty; - - assign io_enq_ready = !full; - assign io_deq_valid = !empty; + wire full; + wire empty; VX_generic_queue_ll #(.DATAW(4), .SIZE(4)) dut ( .clk(clk), @@ -34,40 +29,28 @@ module testbench(); end initial begin - $monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h", $time, clk, reset, push, pop, in_data, empty, full, out_data); - #0 clk=0; reset=1; in_data=4'hd; push=1; pop=1; - #1 `check(io_enq_ready, 1); `check(out_data, 4'hd); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hx); `check(io_deq_valid, 0); - #0 reset=0; in_data=4'ha; pop=0; - #1 `check(io_enq_ready, 1); `check(out_data, 4'hx); `check(io_deq_valid, 0); - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); + $monitor ("%d: clk=%b rst=%b push=%b, pop=%b, din=%h, empty=%b, full=%b, dout=%h", + $time, clk, reset, push, pop, in_data, empty, full, out_data); + #0 clk=0; reset=1; pop=0; push=0; + #2 reset=0; in_data=4'ha; pop=0; push=1; + #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); #0 in_data=4'hb; - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); + #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); #0 in_data=4'hc; - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); + #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 0); #0 in_data=4'hd; - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 0); `check(out_data, 4'ha); `check(io_deq_valid, 1); + #2 `check(full, 1); `check(out_data, 4'ha); `check(empty, 0); #0 push=0; pop=1; - #1 `check(io_enq_ready, 0); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hb); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hb); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hc); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hc); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hd); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hd); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 0); - #0 in_data=4'ha; push=1; pop=0; - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 0); - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #0 in_data=4'hb; pop=1; - #1 `check(io_enq_ready, 1); `check(out_data, 4'ha); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hb); `check(io_deq_valid, 1); + #2 `check(full, 0); `check(out_data, 4'hb); `check(empty, 0); + #2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 0); + #2 `check(full, 0); `check(out_data, 4'hd); `check(empty, 0); + #2 `check(full, 0); `check(out_data, 4'ha); `check(empty, 1); + #0 in_data=4'he; push=1; pop=0; + #2 `check(full, 0); `check(out_data, 4'he); `check(empty, 0); + #0 in_data=4'hf; pop=1; + #2 `check(full, 0); `check(out_data, 4'hf); `check(empty, 0); #0 push=0; - #1 `check(io_enq_ready, 1); `check(out_data, 4'hb); `check(io_deq_valid, 1); - #1 `check(io_enq_ready, 1); `check(out_data, 4'hc); `check(io_deq_valid, 0); + #2 `check(full, 0); `check(out_data, 4'hc); `check(empty, 1); #1 $finish; end