diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index d31e0ceb..0db28ba0 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -67,7 +67,60 @@ inline bool is_aligned(size_t addr, size_t alignment) { /////////////////////////////////////////////////////////////////////////////// -static int vx_scope_start(vx_device_h hdevice) { +struct scope_signal_t { + int width; + const char* name; +}; + +static const scope_signal_t scope_signals[] = { + { 32, "icache_req_addr" }, + { 2 , "icache_req_tag" }, + { 32, "icache_rsp_data" }, + { 2 , "icache_rsp_tag" }, + { 32, "dcache_req_addr" }, + { 2 , "dcache_req_tag" }, + { 32, "dcache_rsp_data" }, + { 2 , "dcache_rsp_tag" }, + { 29, "dram_req_tag" }, + { 29, "dram_rsp_tag" }, + { 2 , "icache_req_warp_num" }, + { 2 , "dcache_req_warp_num" }, + { 32, "decode_curr_PC" }, + { 5 , "execute_rd" }, + { 2 , "execute_warp_num" }, + { 32, "execute_a" }, + { 32, "execute_b" }, + { 5 , "writeback_rd" }, + { 2 , "writeback_warp_num" }, + { 32, "writeback_data" }, + { 2 , "decode_warp_num" }, + { 1 , "decode_is_jal" }, + { 5 , "decode_rs1" }, + { 5 , "decode_rs2" }, + { 2 , "writeback_wb" }, + + { 1, "icache_req_valid" }, + { 1, "icache_req_ready" }, + { 1, "icache_rsp_valid" }, + { 1, "icache_rsp_ready" }, + { 4, "dcache_req_valid" }, + { 1, "dcache_req_ready" }, + { 4, "dcache_rsp_valid" }, + { 1, "dcache_rsp_ready" }, + { 1, "dram_req_valid" }, + { 1, "dram_req_ready" }, + { 1, "dram_rsp_valid" }, + { 1, "dram_rsp_ready" }, + { 4, "decode_valid" }, + { 4, "execute_valid" }, + { 4, "writeback_valid" }, + { 1, "schedule_delay" }, + { 1, "memory_delay" }, + { 1, "exec_delay" }, + { 1, "gpr_stage_delay" }, +}; + +static int vx_scope_start(vx_device_h hdevice) { if (nullptr == hdevice) return -1; @@ -80,48 +133,19 @@ static int vx_scope_start(vx_device_h hdevice) { // start execution CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN)); + const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t); + std::ofstream ofs("vx_scope.vcd"); ofs << "$timescale 1 ns $end" << std::endl; - int fwidth = 0; - ofs << "$var reg 1 0 clk $end" << std::endl; - fwidth += 1; - - ofs << "$var reg 1 1 icache_req_valid $end" << std::endl; - ofs << "$var reg 1 2 icache_req_ready $end" << std::endl; - ofs << "$var reg 1 3 icache_rsp_valid $end" << std::endl; - ofs << "$var reg 1 4 icache_rsp_ready $end" << std::endl; - ofs << "$var reg 4 5 dcache_req_valid $end" << std::endl; - ofs << "$var reg 1 6 dcache_req_ready $end" << std::endl; - ofs << "$var reg 4 7 dcache_rsp_valid $end" << std::endl; - ofs << "$var reg 1 8 dcache_rsp_ready $end" << std::endl; - ofs << "$var reg 1 9 dram_req_valid $end" << std::endl; - ofs << "$var reg 1 10 dram_req_ready $end" << std::endl; - ofs << "$var reg 1 11 dram_rsp_valid $end" << std::endl; - ofs << "$var reg 1 12 dram_rsp_ready $end" << std::endl; - ofs << "$var reg 1 13 schedule_delay $end" << std::endl; - - fwidth += 19; - - ofs << "$var reg 32 14 icache_req_addr $end" << std::endl; - ofs << "$var reg 2 15 icache_req_tag $end" << std::endl; - ofs << "$var reg 32 16 icache_rsp_data $end" << std::endl; - ofs << "$var reg 2 17 icache_rsp_tag $end" << std::endl; - ofs << "$var reg 32 18 dcache_req_addr $end" << std::endl; - ofs << "$var reg 2 19 dcache_req_tag $end" << std::endl; - ofs << "$var reg 32 20 dcache_rsp_data $end" << std::endl; - ofs << "$var reg 2 21 dcache_rsp_tag $end" << std::endl; - ofs << "$var reg 29 22 dram_req_tag $end" << std::endl; - ofs << "$var reg 29 23 dram_rsp_tag $end" << std::endl; - ofs << "$var reg 2 24 icache_req_warp $end" << std::endl; - ofs << "$var reg 2 25 dcache_req_warp $end" << std::endl; - - fwidth += 198; - - const int num_signals = 26; + int fwidth = 0; + for (int i = 0; i < num_signals; ++i) { + ofs << "$var reg " << scope_signals[i].width << " " << (i+1) << " " << scope_signals[i].name << " $end" << std::endl; + fwidth += scope_signals[i].width; + } uint64_t frame_width, max_frames, data_valid; @@ -141,7 +165,7 @@ static int vx_scope_start(vx_device_h hdevice) { CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &frame_width)); std::cout << "scope::frame_width=" << frame_width << std::endl; - assert((fwidth-1)== (int)frame_width); + assert(fwidth == (int)frame_width); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 3)); CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &max_frames)); @@ -149,7 +173,7 @@ static int vx_scope_start(vx_device_h hdevice) { CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_CMD, 1)); - std::vector signa_data(frame_width+1); + std::vector signal_data(frame_width+1); uint64_t frame_offset = 0, frame_no = 0, timestamp = 0; @@ -174,34 +198,7 @@ static int vx_scope_start(vx_device_h hdevice) { --delta; } - signal_id = 1; - }; - - auto print_signal = [&] (uint64_t word, int signal_width) { - - int word_offset = frame_offset % 64; - - signa_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; - - ++signal_offset; - ++frame_offset; - - if (signal_offset == signal_width) { - signa_data[signal_width] = 0; // string null termination - ofs << 'b' << signa_data.data() << ' ' << (num_signals - signal_id) << std::endl; - signal_offset = 0; - ++signal_id; - } - - if (frame_offset == frame_width) { - assert(0 == signal_offset); - signal_id = 0; - frame_offset = 0; - ++frame_no; - if (frame_no != max_frames) { - print_header(); - } - } + signal_id = num_signals; }; print_header(); @@ -218,34 +215,30 @@ static int vx_scope_start(vx_device_h hdevice) { uint64_t word; CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_SCOPE_DATA, &word)); - do { - switch (num_signals - signal_id) { - default: - print_signal(word, 1); - break; - case 15: - case 17: - case 19: - case 21: - case 24: - case 25: - print_signal(word, 2); - break; - case 5: - case 7: - print_signal(word, 4); - break; - case 22: - case 23: - print_signal(word, 29); - break; - case 14: - case 16: - case 18: - case 20: - print_signal(word, 32); - break; - } + do { + int signal_width = scope_signals[signal_id-1].width; + int word_offset = frame_offset % 64; + + signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0'; + + ++signal_offset; + ++frame_offset; + + if (signal_offset == signal_width) { + signal_data[signal_width] = 0; // string null termination + ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl; + signal_offset = 0; + --signal_id; + } + + if (frame_offset == frame_width) { + assert(0 == signal_offset); + frame_offset = 0; + ++frame_no; + if (frame_no != max_frames) { + print_header(); + } + } } while ((frame_offset % 64) != 0); } while (frame_no != max_frames); diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 658e089f..146880f5 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -10,6 +10,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ -DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_MSRQ \ -DDBG_PRINT_DRAM \ + -DDBG_PRINT_WB \ -DDBG_PRINT_OPAE #DBG_PRINT=$(DBG_PRINT_FLAGS) @@ -19,7 +20,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 #DEBUG=1 -#AFU=1 +AFU=1 CFLAGS += -fPIC diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin old mode 100755 new mode 100644 index 6efd60e5..803639e3 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 8a07c21e..277659e0 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -9,6 +9,7 @@ vortex_afu.json #+define+DBG_PRINT_CACHE_SNP #+define+DBG_PRINT_CACHE_MSRQ #+define+DBG_PRINT_DRAM +#+define+DBG_PRINT_WB #+define+DBG_PRINT_OPAE #+define+DBG_PRINT_SCOPE @@ -87,12 +88,11 @@ vortex_afu.json ../rtl/VX_writeback.v ../rtl/VX_csr_pipe.v ../rtl/VX_csr_data.v -../rtl/VX_csr_wrapper.v ../rtl/VX_warp_sched.v ../rtl/VX_gpr.v ../rtl/VX_gpr_ram.v ../rtl/VX_gpr_stage.v -../rtl/VX_dmem_ctrl.v +../rtl/VX_mem_ctrl.v ../rtl/VX_alu_unit.v ../rtl/VX_lsu_unit.v ../rtl/VX_decode.v diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 5216e359..8aa0a4fb 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -804,7 +804,7 @@ end `SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag); `SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_LIST}) == 217, "oops!") +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 389, "oops!") wire force_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) @@ -814,17 +814,17 @@ wire force_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_dram_rsp_valid && scope_dram_rsp_ready); VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_LIST})), + .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})), .BUSW (64), - .SIZE (8192), - .IDW (19) + .SIZE (4096), + .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) ) scope ( .clk (clk), .reset (SoftReset), .start (vx_reset), .stop (cmd_run_done), .changed (force_changed), - .data_in ({`SCOPE_SIGNALS_LIST}), + .data_in ({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}), .bus_in (csr_scope_cmd), .bus_out (csr_scope_data), .bus_read (csr_scope_read), @@ -841,7 +841,6 @@ Vortex_Socket #() vx_socket ( `SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_FE_ATTACH `SCOPE_SIGNALS_BE_ATTACH .clk (clk), @@ -865,6 +864,7 @@ Vortex_Socket #() vx_socket ( // Snoop request .snp_req_valid (vx_snp_req_valid), .snp_req_addr (vx_snp_req_addr), + .snp_req_invalidate(0), .snp_req_tag (vx_snp_req_tag), .snp_req_ready (vx_snp_req_ready), diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index b082cc2b..bc02a785 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -1,17 +1,17 @@ `include "VX_define.vh" module VX_alu_unit ( - input wire clk, - input wire reset, - input wire[31:0] src_a, - input wire[31:0] src_b, - input wire src_rs2, - input wire[31:0] itype_immed, - input wire[19:0] upper_immed, - input wire[4:0] alu_op, - input wire[31:0] curr_PC, - output reg[31:0] alu_result, - output reg alu_stall + input wire clk, + input wire reset, + input wire [31:0] src_a, + input wire [31:0] src_b, + input wire src_rs2, + input wire [31:0] itype_immed, + input wire [19:0] upper_immed, + input wire [4:0] alu_op, + input wire [31:0] curr_PC, + output reg [31:0] alu_result, + output reg alu_stall ); localparam div_pipeline_len = 20; localparam mul_pipeline_len = 8; @@ -85,7 +85,7 @@ module VX_alu_unit ( reg [15:0] inst_delay; reg inst_was_stalling; - wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0; + wire inst_delay_stall = inst_was_stalling ? (inst_delay != 0) : (curr_inst_delay != 0); assign alu_stall = inst_delay_stall; always @(*) begin @@ -127,7 +127,7 @@ module VX_alu_unit ( wire which_in2; wire[31:0] upper_immed; - assign which_in2 = src_rs2 == `RS2_IMMED; + assign which_in2 = (src_rs2 == `RS2_IMMED); assign ALU_in1 = src_a; assign ALU_in2 = which_in2 ? itype_immed : src_b; @@ -167,7 +167,7 @@ module VX_alu_unit ( wire which_in2; wire[31:0] upper_immed_s; - assign which_in2 = src_rs2 == `RS2_IMMED; + assign which_in2 = (src_rs2 == `RS2_IMMED); assign ALU_in1 = src_a; diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 16adde01..e70e4e10 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -3,6 +3,7 @@ module VX_back_end #( parameter CORE_ID = 0 ) ( + `SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_BE_IO input wire clk, @@ -70,7 +71,7 @@ module VX_back_end #( VX_lsu_unit #( .CORE_ID(CORE_ID) ) lsu_unit ( - `SCOPE_SIGNALS_BE_ATTACH + `SCOPE_SIGNALS_DCACHE_ATTACH .clk (clk), .reset (reset), @@ -123,4 +124,23 @@ module VX_back_end #( .no_slot_csr (no_slot_csr) ); + `SCOPE_ASSIGN(scope_decode_valid, bckE_req_if.valid); + `SCOPE_ASSIGN(scope_decode_warp_num, bckE_req_if.warp_num); + `SCOPE_ASSIGN(scope_decode_curr_PC, bckE_req_if.curr_PC); + `SCOPE_ASSIGN(scope_decode_is_jal, bckE_req_if.is_jal); + `SCOPE_ASSIGN(scope_decode_rs1, bckE_req_if.rs1); + `SCOPE_ASSIGN(scope_decode_rs2, bckE_req_if.rs2); + + `SCOPE_ASSIGN(scope_execute_valid, exec_unit_req_if.valid); + `SCOPE_ASSIGN(scope_execute_warp_num, exec_unit_req_if.warp_num); + `SCOPE_ASSIGN(scope_execute_rd, exec_unit_req_if.rd); + `SCOPE_ASSIGN(scope_execute_a, exec_unit_req_if.a_reg_data[0]); + `SCOPE_ASSIGN(scope_execute_b, exec_unit_req_if.b_reg_data[0]); + + `SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid); + `SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb); + `SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num); + `SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd); + `SCOPE_ASSIGN(scope_writeback_data, writeback_if.data[0]); + endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_wrapper.v b/hw/rtl/VX_csr_wrapper.v deleted file mode 100644 index 87e24a23..00000000 --- a/hw/rtl/VX_csr_wrapper.v +++ /dev/null @@ -1,37 +0,0 @@ - -`include "VX_define.vh" - -module VX_csr_wrapper ( - VX_csr_req_if csr_req_if, - VX_wb_if csr_wb_if -); - - wire[`NUM_THREADS-1:0][31:0] thread_ids; - wire[`NUM_THREADS-1:0][31:0] warp_ids; - - genvar i; - generate - for (i = 0; i < `NUM_THREADS; i++) begin : thread_ids_init - assign thread_ids[i] = i; - end - - for (i = 0; i < `NUM_THREADS; i++) begin : warp_ids_init - assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; - end - endgenerate - - - assign csr_wb_if.valid = csr_req_if.valid; - assign csr_wb_if.warp_num = csr_req_if.warp_num; - assign csr_wb_if.rd = csr_req_if.rd; - assign csr_wb_if.wb = csr_req_if.wb; - - - wire thread_select = csr_req_if.csr_address == 12'h20; - wire warp_select = csr_req_if.csr_address == 12'h21; - - assign csr_wb_if.csr_result = thread_select ? thread_ids : - warp_select ? warp_ids : - 0; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 6766b81c..ad4d823e 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -91,7 +91,7 @@ module VX_decode( assign func7 = in_instruction[31:25]; assign u_12 = in_instruction[31:20]; - assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4; + assign frE_to_bckE_req_if.next_PC = in_curr_PC + 32'h4; // Write Back sigal assign is_rtype = (curr_opcode == `INST_R); @@ -169,12 +169,12 @@ module VX_decode( case (curr_opcode) `INST_JAL: begin - temp_jal = 1'b1 && in_valid; + temp_jal = in_valid; temp_jal_offset = jal_1_offset; end `INST_JALR: begin - temp_jal = 1'b1 && in_valid; + temp_jal = in_valid; temp_jal_offset = jal_2_offset; end `INST_SYS: @@ -185,13 +185,13 @@ module VX_decode( end default: begin - temp_jal = 1'b0 && in_valid; - temp_jal_offset = 32'hdeadbeef; + temp_jal = 1'b0; + temp_jal_offset = 32'hdeadbeef; end endcase end - assign frE_to_bckE_req_if.jalQual = is_jal; + assign frE_to_bckE_req_if.is_jal = is_jal; assign frE_to_bckE_req_if.jal = temp_jal; assign frE_to_bckE_req_if.jal_offset = temp_jal_offset; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 384f7157..1a6f54e9 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -283,7 +283,34 @@ /////////////////////////////////////////////////////////////////////////////// `ifdef SCOPE - `define SCOPE_SIGNALS_LIST \ + `define SCOPE_SIGNALS_DATA_LIST \ + scope_icache_req_addr, \ + scope_icache_req_tag, \ + scope_icache_rsp_data, \ + scope_icache_rsp_tag, \ + scope_dcache_req_addr, \ + scope_dcache_req_tag, \ + scope_dcache_rsp_data, \ + scope_dcache_rsp_tag, \ + scope_dram_req_tag, \ + scope_dram_rsp_tag, \ + scope_icache_req_warp_num, \ + scope_dcache_req_warp_num, \ + scope_decode_curr_PC, \ + scope_execute_rd, \ + scope_execute_warp_num, \ + scope_execute_a, \ + scope_execute_b, \ + scope_writeback_rd, \ + scope_writeback_warp_num, \ + scope_writeback_data, \ + scope_decode_warp_num, \ + scope_decode_is_jal, \ + scope_decode_rs1, \ + scope_decode_rs2, \ + scope_writeback_wb, + + `define SCOPE_SIGNALS_UPD_LIST \ scope_icache_req_valid, \ scope_icache_req_ready, \ scope_icache_rsp_valid, \ @@ -296,23 +323,18 @@ scope_dram_req_ready, \ scope_dram_rsp_valid, \ scope_dram_rsp_ready, \ + scope_decode_valid, \ + scope_execute_valid, \ + scope_writeback_valid, \ scope_schedule_delay, \ - scope_icache_req_addr, \ - scope_icache_req_tag, \ - scope_icache_rsp_data, \ - scope_icache_rsp_tag, \ - scope_dcache_req_addr, \ - scope_dcache_req_tag, \ - scope_dcache_rsp_data, \ - scope_dcache_rsp_tag, \ - scope_dram_req_tag, \ - scope_dram_rsp_tag, \ - scope_icache_req_warp, \ - scope_dcache_req_warp + scope_memory_delay, \ + scope_exec_delay, \ + scope_gpr_stage_delay `define SCOPE_SIGNALS_DECL \ wire scope_icache_req_valid; \ wire [31:0] scope_icache_req_addr; \ + wire [1:0] scope_icache_req_warp_num; \ wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \ wire scope_icache_req_ready; \ wire scope_icache_rsp_valid; \ @@ -321,6 +343,7 @@ wire scope_icache_rsp_ready; \ wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \ wire [31:0] scope_dcache_req_addr; \ + wire [1:0] scope_dcache_req_warp_num; \ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \ wire scope_dcache_req_ready; \ wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \ @@ -334,13 +357,31 @@ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ wire scope_dram_rsp_ready; \ wire scope_schedule_delay; \ - wire [1:0] scope_icache_req_warp; \ - wire [1:0] scope_dcache_req_warp; + wire scope_memory_delay; \ + wire scope_exec_delay; \ + wire scope_gpr_stage_delay; \ + wire [3:0] scope_decode_valid; \ + wire [1:0] scope_decode_warp_num; \ + wire [31:0] scope_decode_curr_PC; \ + wire scope_decode_is_jal; \ + wire [4:0] scope_decode_rs1; \ + wire [4:0] scope_decode_rs2; \ + wire [3:0] scope_execute_valid; \ + wire [1:0] scope_execute_warp_num; \ + wire [4:0] scope_execute_rd; \ + wire [31:0] scope_execute_a; \ + wire [31:0] scope_execute_b; \ + wire [3:0] scope_writeback_valid; \ + wire [1:0] scope_writeback_warp_num; \ + wire [1:0] scope_writeback_wb; \ + wire [4:0] scope_writeback_rd; \ + wire [31:0] scope_writeback_data; `define SCOPE_SIGNALS_ICACHE_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_icache_req_valid, \ output wire [31:0] scope_icache_req_addr, \ + output wire [1:0] scope_icache_req_warp_num, \ output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \ output wire scope_icache_req_ready, \ output wire scope_icache_rsp_valid, \ @@ -353,6 +394,7 @@ /* verilator lint_off UNDRIVEN */ \ output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \ output wire [31:0] scope_dcache_req_addr, \ + output wire [1:0] scope_dcache_req_warp_num, \ output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \ output wire scope_dcache_req_ready, \ output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \ @@ -374,21 +416,35 @@ `define SCOPE_SIGNALS_CORE_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_schedule_delay, \ - /* verilator lint_on UNDRIVEN */ - - `define SCOPE_SIGNALS_FE_IO \ - /* verilator lint_off UNDRIVEN */ \ - output wire [1:0] scope_icache_req_warp, \ + output wire scope_memory_delay, \ + output wire scope_exec_delay, \ + output wire scope_gpr_stage_delay, \ /* verilator lint_on UNDRIVEN */ `define SCOPE_SIGNALS_BE_IO \ /* verilator lint_off UNDRIVEN */ \ - output wire [1:0] scope_dcache_req_warp, \ + output wire [3:0] scope_decode_valid, \ + output wire [1:0] scope_decode_warp_num, \ + output wire [31:0] scope_decode_curr_PC, \ + output wire scope_decode_is_jal, \ + output wire [4:0] scope_decode_rs1, \ + output wire [4:0] scope_decode_rs2, \ + output wire [3:0] scope_execute_valid, \ + output wire [1:0] scope_execute_warp_num, \ + output wire [4:0] scope_execute_rd, \ + output wire [31:0] scope_execute_a, \ + output wire [31:0] scope_execute_b, \ + output wire [3:0] scope_writeback_valid, \ + output wire [1:0] scope_writeback_warp_num, \ + output wire [1:0] scope_writeback_wb, \ + output wire [4:0] scope_writeback_rd, \ + output wire [31:0] scope_writeback_data, \ /* verilator lint_on UNDRIVEN */ `define SCOPE_SIGNALS_ICACHE_ATTACH \ .scope_icache_req_valid (scope_icache_req_valid), \ .scope_icache_req_addr (scope_icache_req_addr), \ + .scope_icache_req_warp_num (scope_icache_req_warp_num), \ .scope_icache_req_tag (scope_icache_req_tag), \ .scope_icache_req_ready (scope_icache_req_ready), \ .scope_icache_rsp_valid (scope_icache_rsp_valid), \ @@ -399,6 +455,7 @@ `define SCOPE_SIGNALS_DCACHE_ATTACH \ .scope_dcache_req_valid (scope_dcache_req_valid), \ .scope_dcache_req_addr (scope_dcache_req_addr), \ + .scope_dcache_req_warp_num (scope_dcache_req_warp_num), \ .scope_dcache_req_tag (scope_dcache_req_tag), \ .scope_dcache_req_ready (scope_dcache_req_ready), \ .scope_dcache_rsp_valid (scope_dcache_rsp_valid), \ @@ -415,13 +472,28 @@ .scope_dram_rsp_ready (scope_dram_rsp_ready), `define SCOPE_SIGNALS_CORE_ATTACH \ - .scope_schedule_delay (scope_schedule_delay), - - `define SCOPE_SIGNALS_FE_ATTACH \ - .scope_icache_req_warp (scope_icache_req_warp), + .scope_schedule_delay (scope_schedule_delay), \ + .scope_memory_delay (scope_memory_delay), \ + .scope_exec_delay (scope_exec_delay), \ + .scope_gpr_stage_delay (scope_gpr_stage_delay), `define SCOPE_SIGNALS_BE_ATTACH \ - .scope_dcache_req_warp (scope_dcache_req_warp), + .scope_decode_valid (scope_decode_valid), \ + .scope_decode_warp_num (scope_decode_warp_num), \ + .scope_decode_curr_PC (scope_decode_curr_PC), \ + .scope_decode_is_jal (scope_decode_is_jal), \ + .scope_decode_rs1 (scope_decode_rs1), \ + .scope_decode_rs2 (scope_decode_rs2), \ + .scope_execute_valid (scope_execute_valid), \ + .scope_execute_warp_num (scope_execute_warp_num), \ + .scope_execute_rd (scope_execute_rd), \ + .scope_execute_a (scope_execute_a), \ + .scope_execute_b (scope_execute_b), \ + .scope_writeback_valid (scope_writeback_valid), \ + .scope_writeback_warp_num (scope_writeback_warp_num), \ + .scope_writeback_wb (scope_writeback_wb), \ + .scope_writeback_rd (scope_writeback_rd), \ + .scope_writeback_data (scope_writeback_data), `define SCOPE_ASSIGN(d,s) assign d = s `else @@ -429,14 +501,12 @@ `define SCOPE_SIGNALS_DCACHE_IO `define SCOPE_SIGNALS_DRAM_IO `define SCOPE_SIGNALS_CORE_IO - `define SCOPE_SIGNALS_FE_IO `define SCOPE_SIGNALS_BE_IO `define SCOPE_SIGNALS_ICACHE_ATTACH `define SCOPE_SIGNALS_DCACHE_ATTACH `define SCOPE_SIGNALS_DRAM_ATTACH `define SCOPE_SIGNALS_CORE_ATTACH - `define SCOPE_SIGNALS_FE_ATTACH `define SCOPE_SIGNALS_BE_ATTACH `define SCOPE_ASSIGN(d,s) diff --git a/hw/rtl/VX_exec_unit.v b/hw/rtl/VX_exec_unit.v index a43de1b7..fbe504c4 100644 --- a/hw/rtl/VX_exec_unit.v +++ b/hw/rtl/VX_exec_unit.v @@ -7,11 +7,8 @@ module VX_exec_unit ( VX_exec_unit_req_if exec_unit_req_if, // Output - // Writeback - VX_wb_if inst_exec_wb_if, - // JAL Response + VX_wb_if inst_exec_wb_if, VX_jal_rsp_if jal_rsp_if, - // Branch Response VX_branch_rsp_if branch_rsp_if, input wire no_slot_exec, @@ -71,7 +68,7 @@ module VX_exec_unit ( `DEBUG_BEGIN wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index; - wire jal_branch_found_valid; + wire jal_branch_found_valid; `DEBUG_END VX_generic_priority_encoder #( @@ -103,16 +100,12 @@ module VX_exec_unit ( generate for (i = 0; i < `NUM_THREADS; i++) begin - assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; + assign duplicate_PC_data[i] = exec_unit_req_if.next_PC; end endgenerate - - - // VX_wb_if inst_exec_wb_temp_if(); - // JAL Response - VX_jal_rsp_if jal_rsp_temp_if(); - // Branch Response - VX_branch_rsp_if branch_rsp_temp_if(); + + VX_jal_rsp_if jal_rsp_temp_if(); + VX_branch_rsp_if branch_rsp_temp_if(); // Actual Writeback assign inst_exec_wb_if.rd = exec_unit_req_if.rd; @@ -120,7 +113,7 @@ module VX_exec_unit ( assign inst_exec_wb_if.valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}}; assign inst_exec_wb_if.warp_num = exec_unit_req_if.warp_num; assign inst_exec_wb_if.data = exec_unit_req_if.jal ? duplicate_PC_data : alu_result; - assign inst_exec_wb_if.pc = in_curr_PC; + assign inst_exec_wb_if.curr_PC = in_curr_PC; // Jal rsp assign jal_rsp_temp_if.jal = in_jal; @@ -133,50 +126,26 @@ module VX_exec_unit ( assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num; assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset - wire zero = 0; - - // VX_generic_register #(.N(174)) exec_reg( - // .clk (clk), - // .reset(reset), - // .stall(zero), - // .flush(zero), - // .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}), - // .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc }) - // ); - VX_generic_register #( .N(33 + `NW_BITS-1 + 1) ) jal_reg ( - .clk (clk), - .reset(reset), - .stall(zero), - .flush(zero), - .in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}), - .out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num}) + .clk (clk), + .reset (reset), + .stall (1'b0), + .flush (1'b0), + .in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}), + .out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num}) ); VX_generic_register #( .N(34 + `NW_BITS-1 + 1) ) branch_reg ( - .clk (clk), - .reset(reset), - .stall(zero), - .flush(zero), - .in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}), - .out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest }) + .clk (clk), + .reset (reset), + .stall (1'b0), + .flush (1'b0), + .in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}), + .out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest }) ); - // always @(*) begin - // case (in_alu_op) - // `ALU_CSR_RW: out_csr_result = in_csr_mask; - // `ALU_CSR_RS: out_csr_result = in_csr_data | in_csr_mask; - // `ALU_CSR_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask); - // default: out_csr_result = 32'hdeadbeef; - // endcase - - // end - - // assign out_is_csr = exec_unit_req_if.is_csr; - // assign out_csr_address = exec_unit_req_if.csr_address; - endmodule : VX_exec_unit \ No newline at end of file diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index 83eb45cc..a24f5e89 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -3,7 +3,7 @@ module VX_front_end #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_FE_IO + `SCOPE_SIGNALS_ICACHE_IO input wire clk, input wire reset, @@ -65,7 +65,7 @@ module VX_front_end #( VX_icache_stage #( .CORE_ID(CORE_ID) ) icache_stage ( - `SCOPE_SIGNALS_FE_ATTACH + `SCOPE_SIGNALS_ICACHE_ATTACH .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index 4737a591..da579017 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -3,40 +3,40 @@ module VX_gpr ( input wire clk, input wire reset, - input wire valid_write_request, + input wire write_ce, VX_gpr_read_if gpr_read_if, VX_wb_if writeback_if, - output wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data, - output wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data + output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data, + output wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data ); - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_uqual; - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_uqual; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data_uqual; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data_uqual; assign a_reg_data = (gpr_read_if.rs1 != 0) ? a_reg_data_uqual : 0; assign b_reg_data = (gpr_read_if.rs2 != 0) ? b_reg_data_uqual : 0; - wire write_enable = valid_write_request && ((writeback_if.wb != 0)); + wire write_enable = write_ce && ((writeback_if.wb != 0)); `ifndef ASIC VX_gpr_ram gpr_ram ( - .we (write_enable), - .clk (clk), - .reset (reset), - .waddr (writeback_if.rd), - .raddr1(gpr_read_if.rs1), - .raddr2(gpr_read_if.rs2), - .be (writeback_if.valid), - .wdata (writeback_if.data), - .q1 (a_reg_data_uqual), - .q2 (b_reg_data_uqual) + .we (write_enable), + .clk (clk), + .reset (reset), + .waddr (writeback_if.rd), + .raddr1 (gpr_read_if.rs1), + .raddr2 (gpr_read_if.rs2), + .be (writeback_if.valid), + .wdata (writeback_if.data), + .q1 (a_reg_data_uqual), + .q2 (b_reg_data_uqual) ); `else wire going_to_write = write_enable & (| writeback_if.wb_valid); - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; genvar i; for (i = 0; i < `NUM_THREADS; i++) begin @@ -44,31 +44,27 @@ module VX_gpr ( assign write_bit_mask[i] = {`NUM_GPRS{~local_write}}; end - // wire cenb = !going_to_write; - wire cenb = 0; + wire cenb = 0; + wire cena_1 = 0; + wire cena_2 = 0; - // wire cena_1 = (gpr_read_if.rs1 == 0); - // wire cena_2 = (gpr_read_if.rs2 == 0); - wire cena_1 = 0; - wire cena_2 = 0; - - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_a; - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_a; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_b; `ifndef SYN genvar j; for (i = 0; i < `NUM_THREADS; i++) begin for (j = 0; j < `NUM_GPRS; j++) begin - assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j]; - assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j]; + assign a_reg_data_uqual[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j]; + assign b_reg_data_uqual[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j]; end end `else - assign a_reg_data_uqual = temp_a; - assign b_reg_data_uqual = temp_b; + assign a_reg_data_uqual = tmp_a; + assign b_reg_data_uqual = tmp_b; `endif - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data; + wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = writeback_if.write_data; for (i = 0; i < 'NT; i=i+4) begin @@ -79,7 +75,7 @@ module VX_gpr ( .CENYB(), .WENYB(), .AYB(), - .QA(temp_a[(i+3):(i)]), + .QA(tmp_a[(i+3):(i)]), .SOA(), .SOB(), .CLKA(clk), @@ -116,7 +112,7 @@ module VX_gpr ( .CENYB(), .WENYB(), .AYB(), - .QA(temp_b[(i+3):(i)]), + .QA(tmp_b[(i+3):(i)]), .SOA(), .SOB(), .CLKA(clk), diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 36ba9941..13694d19 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -10,8 +10,7 @@ module VX_gpr_stage ( input wire stall_gpr_csr, output wire gpr_stage_delay, - // inputs - // Instruction Information + // decodee inputs VX_frE_to_bckE_req_if bckE_req_if, // WriteBack inputs @@ -24,11 +23,11 @@ module VX_gpr_stage ( VX_csr_req_if csr_req_if ); `DEBUG_BEGIN - wire[31:0] curr_PC = bckE_req_if.curr_PC; + wire[31:0] curr_PC = bckE_req_if.curr_PC; wire[2:0] branchType = bckE_req_if.branch_type; - wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); - wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); - wire jalQual = bckE_req_if.jalQual; + wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO); + wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO); + wire is_jal = bckE_req_if.is_jal; `DEBUG_END VX_gpr_read_if gpr_read_if(); @@ -38,15 +37,15 @@ module VX_gpr_stage ( `ifndef ASIC VX_gpr_jal_if gpr_jal_if(); - assign gpr_jal_if.is_jal = bckE_req_if.jalQual; + assign gpr_jal_if.is_jal = bckE_req_if.is_jal; assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC; `else VX_gpr_jal_if gpr_jal_if(); - assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual; + assign gpr_jal_if.is_jal = exec_unit_req_if.is_jal; assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC; `endif - VX_gpr_data_if gpr_datf_if(); + VX_gpr_data_if gpr_datf_if(); VX_gpr_wrapper grp_wrapper ( .clk (clk), @@ -73,6 +72,7 @@ module VX_gpr_stage ( .gpu_inst_req_if (gpu_inst_req_temp_if), .csr_req_if (csr_req_temp_if) ); + `DEBUG_BEGIN wire is_lsu = (| lsu_req_temp_if.valid); `DEBUG_END @@ -104,11 +104,11 @@ module VX_gpr_stage ( `UNUSED_PIN (size) ); - wire[`NUM_THREADS-1:0][31:0] temp_store_data; - wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data + wire [`NUM_THREADS-1:0][31:0] temp_store_data; + wire [`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data - wire[`NUM_THREADS-1:0][31:0] real_store_data; - wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data + wire [`NUM_THREADS-1:0][31:0] real_store_data; + wire [`NUM_THREADS-1:0][31:0] real_base_address; // A reg data wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; @@ -132,12 +132,12 @@ module VX_gpr_stage ( VX_generic_register #( .N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS)) ) lsu_reg ( - .clk (clk), - .reset(reset), - .stall(stall_lsu), - .flush(flush_lsu), - .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), - .out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) + .clk (clk), + .reset (reset), + .stall (stall_lsu), + .flush (flush_lsu), + .in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), + .out ({lsu_req_if.valid , lsu_req_if.curr_PC ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) ); VX_generic_register #( @@ -147,8 +147,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_exec), .flush (flush_exec), - .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), - .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); assign exec_unit_req_if.a_reg_data = real_base_address; @@ -161,8 +161,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_rest), .flush (flush_rest), - .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}), - .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next }) + .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC}), + .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC }) ); assign gpu_inst_req_if.a_reg_data = real_base_address; @@ -189,8 +189,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_lsu), .flush (flush_lsu), - .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), - .out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) + .in ({lsu_req_temp_if.valid, lsu_req_temp_if.curr_PC, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), + .out ({lsu_req_if.valid , lsu_req_if.curr_PC , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) ); VX_generic_register #( @@ -200,8 +200,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_exec), .flush (flush_exec), - .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), - .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.next_PC, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.is_jal, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.next_PC , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.is_jal , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); VX_generic_register #( @@ -211,8 +211,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_rest), .flush (flush_rest), - .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}), - .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 }) + .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.next_PC, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}), + .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.next_PC , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 }) ); VX_generic_register #( diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index 54c8804f..ec0f41a0 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -1,20 +1,19 @@ `include "VX_define.vh" module VX_gpr_wrapper ( - input wire clk, - input wire reset, - VX_gpr_read_if gpr_read_if, - VX_wb_if writeback_if, - VX_gpr_jal_if gpr_jal_if, + input wire clk, + input wire reset, + VX_gpr_read_if gpr_read_if, + VX_wb_if writeback_if, + VX_gpr_jal_if gpr_jal_if, - output wire[`NUM_THREADS-1:0][31:0] a_reg_data, - output wire[`NUM_THREADS-1:0][31:0] b_reg_data -); + output wire [`NUM_THREADS-1:0][31:0] a_reg_data, + output wire [`NUM_THREADS-1:0][31:0] b_reg_data +); + wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_a_reg_data; + wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_b_reg_data; + wire [`NUM_THREADS-1:0][31:0] jal_data; - wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data; - wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data; - - wire[`NUM_THREADS-1:0][31:0] jal_data; genvar i; generate for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign @@ -23,49 +22,42 @@ module VX_gpr_wrapper ( endgenerate `ifndef ASIC - assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num])); - assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]); + assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (tmp_a_reg_data[gpr_read_if.warp_num])); + assign b_reg_data = (tmp_b_reg_data[gpr_read_if.warp_num]); `else - wire zer = 0; - - wire[`NW_BITS-1:0] old_warp_num; + wire [`NW_BITS-1:0] old_warp_num; + VX_generic_register #( .N(`NW_BITS-1+1) ) store_wn ( - .clk (clk), - .reset(reset), - .stall(zer), - .flush(zer), - .in (gpr_read_if.warp_num), - .out (old_warp_num) + .clk (clk), + .reset (reset), + .stall (1'b0), + .flush (1'b0), + .in (gpr_read_if.warp_num), + .out (old_warp_num) ); - assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); - assign b_reg_data = (temp_b_reg_data[old_warp_num]); + assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (tmp_a_reg_data[old_warp_num])); + assign b_reg_data = (tmp_b_reg_data[old_warp_num]); `endif generate for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs - wire valid_write_request = i == writeback_if.warp_num; + wire write_ce = (i == writeback_if.warp_num); VX_gpr gpr( - .clk (clk), - .reset (reset), - .valid_write_request (valid_write_request), - .gpr_read_if (gpr_read_if), - .writeback_if (writeback_if), - .a_reg_data (temp_a_reg_data[i]), - .b_reg_data (temp_b_reg_data[i]) + .clk (clk), + .reset (reset), + .write_ce (write_ce), + .gpr_read_if (gpr_read_if), + .writeback_if (writeback_if), + .a_reg_data (tmp_a_reg_data[i]), + .b_reg_data (tmp_b_reg_data[i]) ); - - /*always_ff @(posedge clk) begin - if (valid_write_request && ((writeback_if.wb != 0))) begin - $display("%t: GPR%01d$: wid=%0d, rd=%0d, data=%0h", $time, 0, writeback_if.warp_num, writeback_if.rd, writeback_if.data); - end - end*/ end - endgenerate + endgenerate endmodule diff --git a/hw/rtl/VX_gpu_inst.v b/hw/rtl/VX_gpu_inst.v index 5eb836e6..5dbf3e57 100644 --- a/hw/rtl/VX_gpu_inst.v +++ b/hw/rtl/VX_gpu_inst.v @@ -78,7 +78,7 @@ module VX_gpu_inst ( assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}})); assign warp_ctl_if.split_new_mask = split_new_use_mask; assign warp_ctl_if.split_later_mask = split_new_later_mask; - assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next; + assign warp_ctl_if.split_save_pc = gpu_inst_req_if.next_PC; assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num; // gpu_inst_req_if.is_wspawn diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 07fee468..936d3354 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -3,7 +3,7 @@ module VX_icache_stage #( parameter CORE_ID = 0 ) ( - `SCOPE_SIGNALS_FE_IO + `SCOPE_SIGNALS_ICACHE_IO input wire clk, input wire reset, @@ -45,8 +45,6 @@ module VX_icache_stage #( .read_data ({dbg_mrq_write_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num}) ); - `SCOPE_ASSIGN(scope_icache_req_warp, fe_inst_meta_fi.warp_num); - always @(posedge clk) begin if (reset) begin //-- @@ -76,7 +74,7 @@ module VX_icache_stage #( assign icache_req_if.core_req_tag = mrq_write_addr; `endif - assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0]; + assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_valid ? icache_rsp_if.core_rsp_data[0] : 0; assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0; assign icache_stage_response = mrq_pop; @@ -85,6 +83,16 @@ module VX_icache_stage #( // Can't accept new response assign icache_rsp_if.core_rsp_ready = ~total_freeze; + `SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.core_req_valid); + `SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.core_req_addr, 2'b0}); + `SCOPE_ASSIGN(scope_icache_req_warp_num, fe_inst_meta_fi.warp_num); + `SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.core_req_tag); + `SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.core_req_ready); + `SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_if.core_rsp_valid); + `SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_if.core_rsp_data); + `SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_if.core_rsp_tag); + `SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready); + `ifdef DBG_PRINT_CORE_ICACHE always_ff @(posedge clk) begin if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin diff --git a/hw/rtl/VX_inst_multiplex.v b/hw/rtl/VX_inst_multiplex.v index f1ef35db..b13e8c2f 100644 --- a/hw/rtl/VX_inst_multiplex.v +++ b/hw/rtl/VX_inst_multiplex.v @@ -42,14 +42,13 @@ module VX_inst_multiplex ( assign lsu_req_if.mem_write = bckE_req_if.mem_write; assign lsu_req_if.rd = bckE_req_if.rd; assign lsu_req_if.wb = bckE_req_if.wb; - assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC; - + assign lsu_req_if.curr_PC = bckE_req_if.curr_PC; // Execute Unit assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask); assign exec_unit_req_if.warp_num = bckE_req_if.warp_num; assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC; - assign exec_unit_req_if.PC_next = bckE_req_if.PC_next; + assign exec_unit_req_if.next_PC = bckE_req_if.next_PC; assign exec_unit_req_if.rd = bckE_req_if.rd; assign exec_unit_req_if.wb = bckE_req_if.wb; assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data; @@ -61,12 +60,11 @@ module VX_inst_multiplex ( assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed; assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed; assign exec_unit_req_if.branch_type = bckE_req_if.branch_type; - assign exec_unit_req_if.jalQual = bckE_req_if.jalQual; + assign exec_unit_req_if.is_jal = bckE_req_if.is_jal; assign exec_unit_req_if.jal = bckE_req_if.jal; assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset; assign exec_unit_req_if.is_etype = bckE_req_if.is_etype; - // GPR Req assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask; assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num; @@ -76,8 +74,7 @@ module VX_inst_multiplex ( assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier; assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data; assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0]; - assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next; - + assign gpu_inst_req_if.next_PC = bckE_req_if.next_PC; // CSR Req assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask; @@ -90,8 +87,4 @@ module VX_inst_multiplex ( assign csr_req_if.csr_immed = bckE_req_if.csr_immed; assign csr_req_if.csr_mask = bckE_req_if.csr_mask; -endmodule - - - - +endmodule \ No newline at end of file diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index b199b32b..fc0ef23f 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -2,9 +2,8 @@ module VX_lsu_unit #( parameter CORE_ID = 0 -) ( - - `SCOPE_SIGNALS_BE_IO +) ( + `SCOPE_SIGNALS_DCACHE_IO input wire clk, input wire reset, @@ -44,16 +43,14 @@ module VX_lsu_unit #( VX_generic_register #( .N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65) ) lsu_buffer ( - .clk (clk), - .reset(reset), - .stall(delay), - .flush(1'b0), - .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}), - .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) + .clk (clk), + .reset (reset), + .stall (delay), + .flush (1'b0), + .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.curr_PC}), + .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) ); - `SCOPE_ASSIGN(scope_dcache_req_warp, use_warp_num); - wire core_req_rw = (use_mem_write != `BYTE_EN_NO); wire [`NUM_THREADS-1:0][4:0] mem_req_offset; @@ -108,7 +105,7 @@ module VX_lsu_unit #( .full (mrq_full), .pop (mrq_pop), .read_addr (mrq_read_addr), - .read_data ({dbg_mrq_write_addr, mem_wb_if.pc, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num}) + .read_data ({dbg_mrq_write_addr, mem_wb_if.curr_PC, mem_wb_if.wb, mem_rsp_offset, core_rsp_mem_read, mem_wb_if.rd, mem_wb_if.warp_num}) ); always @(posedge clk) begin @@ -165,6 +162,16 @@ module VX_lsu_unit #( // Can't accept new response assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; + + `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.core_req_valid); + `SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_if.core_req_addr[0], 2'b0}); + `SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num); + `SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.core_req_tag); + `SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.core_req_ready); + `SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.core_rsp_valid); + `SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.core_rsp_data[0]); + `SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.core_rsp_tag); + `SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready); `ifdef DBG_PRINT_CORE_DCACHE always_ff @(posedge clk) begin @@ -172,7 +179,7 @@ module VX_lsu_unit #( $display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h", $time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data); end if ((| dcache_rsp_if.core_rsp_valid) && dcache_rsp_if.core_rsp_ready) begin - $display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); + $display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); end end `endif diff --git a/hw/rtl/VX_dmem_ctrl.v b/hw/rtl/VX_mem_ctrl.v similarity index 94% rename from hw/rtl/VX_dmem_ctrl.v rename to hw/rtl/VX_mem_ctrl.v index cdaa0cdd..8b0db7b8 100644 --- a/hw/rtl/VX_dmem_ctrl.v +++ b/hw/rtl/VX_mem_ctrl.v @@ -1,6 +1,6 @@ `include "VX_define.vh" -module VX_dmem_ctrl # ( +module VX_mem_ctrl # ( parameter CORE_ID = 0 ) ( input wire clk, @@ -111,6 +111,7 @@ module VX_dmem_ctrl # ( // Snoop request .snp_req_valid (0), .snp_req_addr (0), + .snp_req_invalidate (0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), @@ -122,6 +123,7 @@ module VX_dmem_ctrl # ( // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), + `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), .snp_fwdout_ready (0), @@ -192,6 +194,7 @@ module VX_dmem_ctrl # ( // Snoop request .snp_req_valid (dcache_snp_req_if.snp_req_valid), .snp_req_addr (dcache_snp_req_if.snp_req_addr), + .snp_req_invalidate (dcache_snp_req_if.snp_req_invalidate), .snp_req_tag (dcache_snp_req_if.snp_req_tag), .snp_req_ready (dcache_snp_req_if.snp_req_ready), @@ -203,6 +206,7 @@ module VX_dmem_ctrl # ( // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), `UNUSED_PIN (snp_fwdout_addr), + `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), .snp_fwdout_ready (0), @@ -272,6 +276,7 @@ module VX_dmem_ctrl # ( // Snoop request .snp_req_valid (0), .snp_req_addr (0), + .snp_req_invalidate (0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), @@ -282,7 +287,8 @@ module VX_dmem_ctrl # ( // Snoop forward out `UNUSED_PIN (snp_fwdout_valid), - `UNUSED_PIN (snp_fwdout_addr), + `UNUSED_PIN (snp_fwdout_addr), + `UNUSED_PIN (snp_fwdout_invalidate), `UNUSED_PIN (snp_fwdout_tag), .snp_fwdout_ready (0), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 78c76af3..e8052cff 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -6,7 +6,6 @@ module VX_pipeline #( `SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_FE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -57,26 +56,6 @@ module VX_pipeline #( wire gpr_stage_delay; wire schedule_delay; - `SCOPE_ASSIGN(scope_icache_req_valid, icache_req_valid); - `SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_addr, 2'b0}); - `SCOPE_ASSIGN(scope_icache_req_tag, icache_req_tag); - `SCOPE_ASSIGN(scope_icache_req_ready, icache_req_ready); - `SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_valid); - `SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_data); - `SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_tag); - `SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_ready); - - `SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_valid); - `SCOPE_ASSIGN(scope_dcache_req_addr, {dcache_req_addr[0], 2'b0}); - `SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_tag); - `SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_ready); - `SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_valid); - `SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_data[0]); - `SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_tag); - `SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_ready); - - `SCOPE_ASSIGN(scope_schedule_delay, schedule_delay); - // Dcache VX_cache_core_req_if #( .NUM_REQUESTS(`NUM_THREADS), @@ -121,7 +100,8 @@ module VX_pipeline #( VX_front_end #( .CORE_ID(CORE_ID) ) front_end ( - `SCOPE_SIGNALS_FE_ATTACH + `SCOPE_SIGNALS_ICACHE_ATTACH + .clk (clk), .reset (reset), .warp_ctl_if (warp_ctl_if), @@ -149,7 +129,9 @@ module VX_pipeline #( VX_back_end #( .CORE_ID(CORE_ID) ) back_end ( + `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_BE_ATTACH + .clk (clk), .reset (reset), .schedule_delay (schedule_delay), @@ -192,4 +174,17 @@ module VX_pipeline #( assign core_icache_rsp_if.core_rsp_tag = icache_rsp_tag; assign icache_rsp_ready = core_icache_rsp_if.core_rsp_ready; + `SCOPE_ASSIGN(scope_schedule_delay, schedule_delay); + `SCOPE_ASSIGN(scope_memory_delay, memory_delay); + `SCOPE_ASSIGN(scope_exec_delay, exec_delay); + `SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay); + +`ifdef DBG_PRINT_WB + always_ff @(posedge clk) begin + if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin + $display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data); + end + end +`endif + endmodule // Vortex \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 7ffb2765..4baddbfb 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -55,13 +55,11 @@ module VX_writeback ( mem_wb ? mem_wb_if.warp_num : 0; - assign writeback_tmp_if.pc = exec_wb ? inst_exec_wb_if.pc : - csr_wb ? 32'hdeadbeef : - mem_wb ? mem_wb_if.pc : + assign writeback_tmp_if.curr_PC = exec_wb ? inst_exec_wb_if.curr_PC : + csr_wb ? 32'hdeadbeef : + mem_wb ? mem_wb_if.curr_PC : 32'hdeadbeef; - wire zero = 0; - wire [`NUM_THREADS-1:0][31:0] use_wb_data; VX_generic_register #( @@ -69,10 +67,10 @@ module VX_writeback ( ) wb_register ( .clk (clk), .reset(reset), - .stall(zero), - .flush(zero), - .in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.pc}), - .out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.pc}) + .stall(1'b0), + .flush(1'b0), + .in ({writeback_tmp_if.data, writeback_tmp_if.valid, writeback_tmp_if.rd, writeback_tmp_if.wb, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC}), + .out ({use_wb_data, writeback_if.valid, writeback_if.rd, writeback_if.wb, writeback_if.warp_num, writeback_if.curr_PC}) ); reg [31:0] last_data_wb /* verilator public */; diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 2eddfc02..5e2f46c2 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -6,7 +6,6 @@ module Vortex #( `SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_FE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -46,6 +45,7 @@ module Vortex #( // Snoop request input wire snp_req_valid, input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -172,7 +172,6 @@ module Vortex #( `SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_FE_ATTACH `SCOPE_SIGNALS_BE_ATTACH .clk(clk), @@ -223,18 +222,19 @@ module Vortex #( .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) ) dcache_snp_rsp_if(); - assign dcache_snp_req_if.snp_req_valid = snp_req_valid; - assign dcache_snp_req_if.snp_req_addr = snp_req_addr; - assign dcache_snp_req_if.snp_req_tag = snp_req_tag; - assign snp_req_ready = dcache_snp_req_if.snp_req_ready; + assign dcache_snp_req_if.snp_req_valid = snp_req_valid; + assign dcache_snp_req_if.snp_req_addr = snp_req_addr; + assign dcache_snp_req_if.snp_req_invalidate = snp_req_invalidate; + assign dcache_snp_req_if.snp_req_tag = snp_req_tag; + assign snp_req_ready = dcache_snp_req_if.snp_req_ready; assign snp_rsp_valid = dcache_snp_rsp_if.snp_rsp_valid; assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag; assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready; - VX_dmem_ctrl #( + VX_mem_ctrl #( .CORE_ID(CORE_ID) - ) dmem_ctrl ( + ) mem_ctrl ( .clk (clk), .reset (reset), diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 6fe35980..37d0d21a 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -6,7 +6,6 @@ module Vortex_Cluster #( `SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_FE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -31,6 +30,7 @@ module Vortex_Cluster #( // Snoop request input wire snp_req_valid, input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -84,8 +84,9 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; - wire[`NUM_CORES-1:0] per_core_snp_req_valid; + wire[`NUM_CORES-1:0] per_core_snp_req_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; + wire[`NUM_CORES-1:0] per_core_snp_req_invalidate; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; wire[`NUM_CORES-1:0] per_core_snp_req_ready; @@ -115,7 +116,6 @@ module Vortex_Cluster #( `SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_FE_ATTACH `SCOPE_SIGNALS_BE_ATTACH .clk (clk), @@ -146,6 +146,7 @@ module Vortex_Cluster #( .snp_req_valid (per_core_snp_req_valid [i]), .snp_req_addr (per_core_snp_req_addr [i]), + .snp_req_invalidate (per_core_snp_req_invalidate[i]), .snp_req_tag (per_core_snp_req_tag [i]), .snp_req_ready (per_core_snp_req_ready [i]), @@ -203,6 +204,7 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0] l2_snp_fwdout_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr; + wire[`NUM_CORES-1:0] l2_snp_fwdout_invalidate; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag; wire[`NUM_CORES-1:0] l2_snp_fwdout_ready; @@ -241,10 +243,11 @@ module Vortex_Cluster #( assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i]; assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; - assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)]; - assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)]; - assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; + assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_invalidate [(i/2)] = l2_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)]; + assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; @@ -316,6 +319,7 @@ module Vortex_Cluster #( // Snoop request .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_invalidate (snp_req_invalidate), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), @@ -327,6 +331,7 @@ module Vortex_Cluster #( // Snoop forwarding out .snp_fwdout_valid (l2_snp_fwdout_valid), .snp_fwdout_addr (l2_snp_fwdout_addr), + .snp_fwdout_invalidate(l2_snp_fwdout_invalidate), .snp_fwdout_tag (l2_snp_fwdout_tag), .snp_fwdout_ready (l2_snp_fwdout_ready), @@ -353,6 +358,7 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0] arb_snp_fwdout_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr; + wire[`NUM_CORES-1:0] arb_snp_fwdout_invalidate; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag; wire[`NUM_CORES-1:0] arb_snp_fwdout_ready; @@ -394,10 +400,11 @@ module Vortex_Cluster #( assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; - assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)]; - assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)]; - assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; + assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)]; + assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)]; + assign per_core_snp_req_invalidate [(i/2)] = arb_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)]; + assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)]; assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)]; @@ -417,16 +424,19 @@ module Vortex_Cluster #( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_invalidate (snp_req_invalidate), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), .snp_rsp_valid (snp_rsp_valid), `UNUSED_PIN (snp_rsp_addr), + `UNUSED_PIN (snp_rsp_invalidate), .snp_rsp_tag (snp_rsp_tag), .snp_rsp_ready (snp_rsp_ready), .snp_fwdout_valid (arb_snp_fwdout_valid), .snp_fwdout_addr (arb_snp_fwdout_addr), + .snp_fwdout_invalidate(arb_snp_fwdout_invalidate), .snp_fwdout_tag (arb_snp_fwdout_tag), .snp_fwdout_ready (arb_snp_fwdout_ready), @@ -435,10 +445,11 @@ module Vortex_Cluster #( .snp_fwdin_ready (arb_snp_fwdin_ready) ); end else begin - assign arb_snp_fwdout_valid = snp_req_valid; - assign arb_snp_fwdout_addr = snp_req_addr; - assign arb_snp_fwdout_tag = snp_req_tag; - assign snp_req_ready = arb_snp_fwdout_ready; + assign arb_snp_fwdout_valid = snp_req_valid; + assign arb_snp_fwdout_addr = snp_req_addr; + assign arb_snp_fwdout_invalidate = snp_req_invalidate; + assign arb_snp_fwdout_tag = snp_req_tag; + assign snp_req_ready = arb_snp_fwdout_ready; assign snp_rsp_valid = arb_snp_fwdin_valid; assign snp_rsp_tag = arb_snp_fwdin_tag; diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index fe9e6b08..afb88ad2 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -4,7 +4,6 @@ module Vortex_Socket ( `SCOPE_SIGNALS_ICACHE_IO `SCOPE_SIGNALS_DCACHE_IO `SCOPE_SIGNALS_CORE_IO - `SCOPE_SIGNALS_FE_IO `SCOPE_SIGNALS_BE_IO // Clock @@ -29,6 +28,7 @@ module Vortex_Socket ( // Snoop request input wire snp_req_valid, input wire[`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire[`VX_SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -64,7 +64,6 @@ module Vortex_Socket ( `SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_FE_ATTACH `SCOPE_SIGNALS_BE_ATTACH .clk (clk), @@ -85,6 +84,7 @@ module Vortex_Socket ( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_invalidate (snp_req_invalidate), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), @@ -126,6 +126,7 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; + wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate; wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; @@ -155,7 +156,6 @@ module Vortex_Socket ( `SCOPE_SIGNALS_ICACHE_ATTACH `SCOPE_SIGNALS_DCACHE_ATTACH `SCOPE_SIGNALS_CORE_ATTACH - `SCOPE_SIGNALS_FE_ATTACH `SCOPE_SIGNALS_BE_ATTACH .clk (clk), @@ -176,6 +176,7 @@ module Vortex_Socket ( .snp_req_valid (per_cluster_snp_req_valid [i]), .snp_req_addr (per_cluster_snp_req_addr [i]), + .snp_req_invalidate (per_cluster_snp_req_invalidate[i]), .snp_req_tag (per_cluster_snp_req_tag [i]), .snp_req_ready (per_cluster_snp_req_ready [i]), @@ -229,6 +230,7 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; + wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate; wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; @@ -251,10 +253,11 @@ module Vortex_Socket ( assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i]; // Snoop Forwarding out - assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i]; - assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i]; - assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i]; - assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i]; + assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i]; + assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i]; + assign per_cluster_snp_req_invalidate [i] = l3_snp_fwdout_invalidate[i]; + assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i]; + assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i]; // Snoop Forwarding in assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i]; @@ -327,6 +330,7 @@ module Vortex_Socket ( // Snoop request .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_invalidate (snp_req_invalidate), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), @@ -338,6 +342,7 @@ module Vortex_Socket ( // Snoop forwarding out .snp_fwdout_valid (l3_snp_fwdout_valid), .snp_fwdout_addr (l3_snp_fwdout_addr), + .snp_fwdout_invalidate(l3_snp_fwdout_invalidate), .snp_fwdout_tag (l3_snp_fwdout_tag), .snp_fwdout_ready (l3_snp_fwdout_ready), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index b952a64b..2c5c175e 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -92,6 +92,7 @@ module VX_bank #( // Snp Request input wire snp_req_valid, input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -134,18 +135,19 @@ module VX_bank #( wire snrq_full; wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0; + wire snrq_invalidate_st0; wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0; VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH + SNP_REQ_TAG_WIDTH), + .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_REQ_TAG_WIDTH), .SIZE(SNRQ_SIZE) ) snp_req_queue ( .clk (clk), .reset (reset), .push (snp_req_valid), - .data_in ({snp_req_addr, snp_req_tag}), + .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), .pop (snrq_pop), - .data_out({snrq_addr_st0, snrq_tag_st0}), + .data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}), .empty (snrq_empty), .full (snrq_full), `UNUSED_PIN (size) @@ -236,6 +238,7 @@ module VX_bank #( wire mrvq_rw_st0; wire [WORD_SIZE-1:0] mrvq_byteen_st0; wire mrvq_is_snp_st0; + wire mrvq_snp_invalidate_st0; wire mrvq_pending_hazard_st1e; wire st2_pending_hazard_st1e; @@ -295,6 +298,8 @@ module VX_bank #( wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0; wire qual_going_to_write_st0; wire qual_is_snp_st0; + wire qual_snp_invalidate_st0; + wire valid_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; @@ -303,6 +308,7 @@ module VX_bank #( wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0]; + wire snp_invalidate_st1 [STAGE_1_CYCLES-1:0]; wire from_mrvq_st1 [STAGE_1_CYCLES-1:0]; assign qual_is_fill_st0 = dfpq_pop_unqual; @@ -339,6 +345,10 @@ module VX_bank #( snrq_pop_unqual ? 1 : 0; + assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 : + snrq_pop_unqual ? snrq_invalidate_st0 : + 0; + assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 : reqq_pop_unqual ? reqq_req_writeword_st0 : 0; @@ -352,27 +362,27 @@ module VX_bank #( ) VX_generic_register #( - .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) ) s0_1_c0 ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), .flush (1'b0), - .in ({qual_from_mrvq_st0, qual_is_snp_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({from_mrvq_st1[0] , is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) + .in ({qual_from_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), + .out ({from_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) ); genvar i; for (i = 1; i < STAGE_1_CYCLES; i++) begin VX_generic_register #( - .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) + .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) ) s0_1_cc ( - .clk (clk), - .reset(reset), - .stall(stall_bank_pipe), - .flush(1'b0), - .in ({from_mrvq_st1[i-1], is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), - .out ({from_mrvq_st1[i] , is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) + .clk (clk), + .reset (reset), + .stall (stall_bank_pipe), + .flush (1'b0), + .in ({from_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), + .out ({from_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) ); end @@ -390,6 +400,7 @@ module VX_bank #( wire [WORD_SIZE-1:0] mem_byteen_st1e; wire fill_saw_dirty_st1e; wire is_snp_st1e; + wire snp_invalidate_st1e; wire snp_to_mrvq_st1e; wire mrvq_init_ready_state_st1e; wire miss_add_because_miss; @@ -398,8 +409,9 @@ module VX_bank #( wire mrvq_recover_ready_state_st1e; assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; - assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; - assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; + assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; + assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; + assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1]; assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; @@ -440,6 +452,7 @@ module VX_bank #( .mem_byteen_st1e (mem_byteen_st1e), .is_snp_st1e (is_snp_st1e), + .snp_invalidate_st1e (snp_invalidate_st1e), // Read Data .readword_st1e (readword_st1e), @@ -473,6 +486,7 @@ module VX_bank #( wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire fill_saw_dirty_st2; wire is_snp_st2; + wire snp_invalidate_st2; wire snp_to_mrvq_st2; wire from_mrvq_st2; wire mrvq_init_ready_state_st2; @@ -482,14 +496,14 @@ module VX_bank #( wire mrvq_init_ready_state_hazard_st1e_st1; VX_generic_register #( - .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) + .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) ) st_1e_2 ( - .clk (clk), - .reset(reset), - .stall(stall_bank_pipe), - .flush(1'b0), - .in ({mrvq_recover_ready_state_st1e, from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), - .out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) + .clk (clk), + .reset (reset), + .stall (stall_bank_pipe), + .flush (1'b0), + .in ({mrvq_recover_ready_state_st1e, from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 }) ); `DEBUG_BLOCK( @@ -517,7 +531,8 @@ module VX_bank #( wire [`UP(`WORD_SELECT_WIDTH)-1:0] miss_add_wsel = wsel_st2; wire [`WORD_WIDTH-1:0] miss_add_data = writeword_st2; assign {miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_tid} = inst_meta_st2; - wire miss_add_is_snp = is_snp_st2; + wire miss_add_is_snp = is_snp_st2; + wire miss_add_snp_invalidate = snp_invalidate_st2; wire miss_add_from_mrvq = valid_st2 && from_mrvq_st2 && !stall_bank_pipe; @@ -551,6 +566,7 @@ module VX_bank #( .miss_add_rw (miss_add_rw), .miss_add_byteen (miss_add_byteen), .miss_add_is_snp (miss_add_is_snp), + .miss_add_snp_invalidate (miss_add_snp_invalidate), .miss_resrv_full (mrvq_full), .miss_resrv_stop (mrvq_stop), .mrvq_init_ready_state (mrvq_init_ready_state_st2), @@ -570,7 +586,8 @@ module VX_bank #( .miss_resrv_tag_st0 (mrvq_tag_st0), .miss_resrv_rw_st0 (mrvq_rw_st0), .miss_resrv_byteen_st0 (mrvq_byteen_st0), - .miss_resrv_is_snp_st0 (mrvq_is_snp_st0) + .miss_resrv_is_snp_st0 (mrvq_is_snp_st0), + .miss_resrv_snp_invalidate_st0 (mrvq_snp_invalidate_st0) ); // Enqueue core response @@ -760,7 +777,7 @@ module VX_bank #( $display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data); end if (snp_req_valid && snp_req_ready) begin - $display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag); + $display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag); end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 3d1e48b9..c52bf86a 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -101,6 +101,7 @@ module VX_cache #( // Snoop request input wire snp_req_valid, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -112,6 +113,7 @@ module VX_cache #( // Snoop Forwarding out output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid, output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr, + output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_invalidate, output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag, `IGNORE_WARNINGS_BEGIN input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready, @@ -164,6 +166,7 @@ module VX_cache #( wire snp_req_valid_qual; wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual; + wire snp_req_invalidate_qual; wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual; wire snp_req_ready_qual; @@ -180,16 +183,19 @@ module VX_cache #( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), + .snp_req_invalidate (snp_req_invalidate), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), .snp_rsp_valid (snp_req_valid_qual), .snp_rsp_addr (snp_req_addr_qual), + .snp_rsp_invalidate (snp_req_invalidate_qual), .snp_rsp_tag (snp_req_tag_qual), .snp_rsp_ready (snp_req_ready_qual), .snp_fwdout_valid (snp_fwdout_valid), .snp_fwdout_addr (snp_fwdout_addr), + .snp_fwdout_invalidate(snp_fwdout_invalidate), .snp_fwdout_tag (snp_fwdout_tag), .snp_fwdout_ready (snp_fwdout_ready), @@ -200,14 +206,16 @@ module VX_cache #( end else begin assign snp_fwdout_valid = 0; assign snp_fwdout_addr = 0; + assign snp_fwdout_invalidate = 0; assign snp_fwdout_tag = 0; assign snp_fwdin_ready = 0; - assign snp_req_valid_qual = snp_req_valid; - assign snp_req_addr_qual = snp_req_addr; - assign snp_req_tag_qual = snp_req_tag; - assign snp_req_ready = snp_req_ready_qual; + assign snp_req_valid_qual = snp_req_valid; + assign snp_req_addr_qual = snp_req_addr; + assign snp_req_invalidate_qual = snp_req_invalidate; + assign snp_req_tag_qual = snp_req_tag; + assign snp_req_ready = snp_req_ready_qual; end if (NUM_BANKS == 1) begin @@ -266,6 +274,7 @@ module VX_cache #( wire curr_bank_snp_req_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; + wire curr_bank_snp_req_invalidate; wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag; wire curr_bank_snp_req_ready; @@ -330,8 +339,9 @@ module VX_cache #( assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i); assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual); end - assign curr_bank_snp_req_tag = snp_req_tag_qual; - assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; + assign curr_bank_snp_req_invalidate = snp_req_invalidate_qual; + assign curr_bank_snp_req_tag = snp_req_tag_qual; + assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; // Snoop response assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid; @@ -400,6 +410,7 @@ module VX_cache #( // Snoop request .snp_req_valid (curr_bank_snp_req_valid), .snp_req_addr (curr_bank_snp_req_addr), + .snp_req_invalidate (curr_bank_snp_req_invalidate), .snp_req_tag (curr_bank_snp_req_tag), .snp_req_ready (curr_bank_snp_req_ready), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 71452e96..f4462fd8 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -8,8 +8,8 @@ // tag rw byteen tid `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) -// data metadata word_sel is_snp -`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1) +// data metadata word_sel is_snp snp_invalidate +`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1) `define BANK_BITS `LOG2UP(NUM_BANKS) diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 53e11575..188747c3 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -33,6 +33,7 @@ module VX_cache_miss_resrv #( input wire[WORD_SIZE-1:0] miss_add_byteen, input wire mrvq_init_ready_state, input wire miss_add_is_snp, + input wire miss_add_snp_invalidate, output wire miss_resrv_full, output wire miss_resrv_stop, @@ -52,7 +53,8 @@ module VX_cache_miss_resrv #( output wire[`REQ_TAG_WIDTH-1:0] miss_resrv_tag_st0, output wire miss_resrv_rw_st0, output wire[WORD_SIZE-1:0] miss_resrv_byteen_st0, - output wire miss_resrv_is_snp_st0 + output wire miss_resrv_is_snp_st0, + output wire miss_resrv_snp_invalidate_st0 ); reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0]; reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; @@ -91,7 +93,7 @@ module VX_cache_miss_resrv #( assign miss_resrv_valid_st0 = dequeue_possible; assign miss_resrv_addr_st0 = addr_table[dequeue_index]; - assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0} = metadata_table[dequeue_index]; + assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_rw_st0, miss_resrv_byteen_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0, miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index]; wire mrvq_push = miss_add && enqueue_possible && !from_mrvq; wire mrvq_pop = miss_resrv_pop && dequeue_possible; @@ -119,7 +121,7 @@ module VX_cache_miss_resrv #( valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; - metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp}; + metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}; tail_ptr <= tail_ptr + 1; end else if (increment_head) begin valid_table[head_ptr] <= 0; diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 43b96f8c..c1e559b8 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -14,18 +14,21 @@ module VX_snp_forwarder #( // Snoop request input wire snp_req_valid, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire snp_req_invalidate, input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, // Snoop response output wire snp_rsp_valid, output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr, + output wire snp_rsp_invalidate, output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready, // Snoop Forwarding out output wire [NUM_REQUESTS-1:0] snp_fwdout_valid, output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr, + output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate, output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag, input wire [NUM_REQUESTS-1:0] snp_fwdout_ready, @@ -58,18 +61,18 @@ module VX_snp_forwarder #( assign sfq_pop = snp_rsp_valid; VX_indexable_queue #( - .DATAW (`LOG2UP(SNRQ_SIZE) + `DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH), + .DATAW (`LOG2UP(SNRQ_SIZE) + 1 +`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH), .SIZE (SNRQ_SIZE) ) snp_fwd_queue ( .clk (clk), .reset (reset), - .write_data ({sfq_write_addr, snp_req_addr, snp_req_tag}), + .write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}), .write_addr (sfq_write_addr), .push (sfq_push), .full (sfq_full), .pop (sfq_pop), .read_addr (sfq_read_addr), - .read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_tag}) + .read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}) ); always @(posedge clk) begin @@ -89,9 +92,10 @@ module VX_snp_forwarder #( genvar i; for (i = 0; i < NUM_REQUESTS; i++) begin - assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full; - assign snp_fwdout_addr[i] = snp_req_addr; - assign snp_fwdout_tag[i] = sfq_write_addr; + assign snp_fwdout_valid[i] = snp_req_valid && !sfq_full; + assign snp_fwdout_addr[i] = snp_req_addr; + assign snp_fwdout_invalidate[i] = snp_req_invalidate; + assign snp_fwdout_tag[i] = sfq_write_addr; end assign snp_req_ready = !sfq_full && fwdout_ready; @@ -114,16 +118,16 @@ module VX_snp_forwarder #( `ifdef DBG_PRINT_CACHE_SNP always_ff @(posedge clk) begin if (snp_req_valid && snp_req_ready) begin - $display("%t: cache%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_tag); + $display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag); end if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin - $display("%t: cache%0d snp fwd_out: addr=%0h, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_tag[0]); + $display("%t: cache%0d snp fwd_out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]); end if (fwdin_valid && fwdin_ready) begin $display("%t: cache%0d snp fwd_in[%01d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag); end if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: cache%0d snp rsp: addr=%0h, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_tag); + $display("%t: cache%0d snp rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag); end end `endif diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 00279c0f..e8cb20d9 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -23,6 +23,7 @@ module VX_tag_data_access #( input wire reset, input wire stall, input wire is_snp_st1e, + input wire snp_invalidate_st1e, input wire stall_bank_pipe, input wire force_request_miss_st1e, @@ -113,12 +114,12 @@ module VX_tag_data_access #( .N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH), .PassThru(1) ) s0_1_c0 ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(1'b0), - .in({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}), - .out({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}), + .out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]}) ); genvar i; @@ -126,12 +127,12 @@ module VX_tag_data_access #( VX_generic_register #( .N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) ) s0_1_cc ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(1'b0), - .in({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}), - .out({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}), + .out ({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]}) ); end @@ -174,20 +175,19 @@ module VX_tag_data_access #( // use "case equality" to handle uninitialized tag when block entry is not valid assign tags_match = ((writetag_st1e == use_read_tag_st1e) === 1'b1); - wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e && !force_request_miss_st1e; + wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e; wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e; wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && !tags_match; wire real_miss = req_invalid || req_miss; wire force_core_miss = (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e && !real_miss); - assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e; // The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss - assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e || (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e); - // assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e || force_core_miss; + assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e + || (force_request_miss_st1e && !is_snp_st1e && !writefill_st1e && valid_req_st1e); assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss; assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e; diff --git a/hw/rtl/interfaces/VX_cache_snp_req_if.v b/hw/rtl/interfaces/VX_cache_snp_req_if.v index 935bd3a7..fce768b8 100644 --- a/hw/rtl/interfaces/VX_cache_snp_req_if.v +++ b/hw/rtl/interfaces/VX_cache_snp_req_if.v @@ -9,7 +9,8 @@ interface VX_cache_snp_req_if #( ) (); wire snp_req_valid; - wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr; + wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr; + wire snp_req_invalidate; wire [SNP_TAG_WIDTH-1:0] snp_req_tag; wire snp_req_ready; diff --git a/hw/rtl/interfaces/VX_exec_unit_req_if.v b/hw/rtl/interfaces/VX_exec_unit_req_if.v index 29a55ebd..b87fe1b0 100644 --- a/hw/rtl/interfaces/VX_exec_unit_req_if.v +++ b/hw/rtl/interfaces/VX_exec_unit_req_if.v @@ -9,7 +9,7 @@ interface VX_exec_unit_req_if (); wire [`NUM_THREADS-1:0] valid; wire [`NW_BITS-1:0] warp_num; wire [31:0] curr_PC; - wire [31:0] PC_next; + wire [31:0] next_PC; // Write Back Info wire [4:0] rd; @@ -29,7 +29,7 @@ interface VX_exec_unit_req_if (); wire [2:0] branch_type; // Jal info - wire jalQual; + wire is_jal; wire jal; wire [31:0] jal_offset; diff --git a/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v b/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v index 6c6ecdfa..420dd47c 100644 --- a/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v +++ b/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v @@ -22,10 +22,10 @@ interface VX_frE_to_bckE_req_if (); wire [19:0] upper_immed; wire [31:0] curr_PC; wire is_etype; - wire jalQual; + wire is_jal; wire jal; wire [31:0] jal_offset; - wire [31:0] PC_next; + wire [31:0] next_PC; wire [`NUM_THREADS-1:0] valid; wire [`NW_BITS-1:0] warp_num; diff --git a/hw/rtl/interfaces/VX_gpu_inst_req_if.v b/hw/rtl/interfaces/VX_gpu_inst_req_if.v index 85a82f8d..a39800b0 100644 --- a/hw/rtl/interfaces/VX_gpu_inst_req_if.v +++ b/hw/rtl/interfaces/VX_gpu_inst_req_if.v @@ -13,7 +13,7 @@ interface VX_gpu_inst_req_if(); wire is_barrier; - wire[31:0] pc_next; + wire[31:0] next_PC; wire [`NUM_THREADS-1:0][31:0] a_reg_data; wire [31:0] rd2; diff --git a/hw/rtl/interfaces/VX_jal_rsp_if.v b/hw/rtl/interfaces/VX_jal_rsp_if.v index c251a82a..61e0d73b 100644 --- a/hw/rtl/interfaces/VX_jal_rsp_if.v +++ b/hw/rtl/interfaces/VX_jal_rsp_if.v @@ -6,7 +6,7 @@ interface VX_jal_rsp_if (); - wire jal; + wire jal; wire [31:0] jal_dest; wire [`NW_BITS-1:0] jal_warp_num; diff --git a/hw/rtl/interfaces/VX_lsu_req_if.v b/hw/rtl/interfaces/VX_lsu_req_if.v index f0650870..636b6afd 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.v +++ b/hw/rtl/interfaces/VX_lsu_req_if.v @@ -7,7 +7,7 @@ interface VX_lsu_req_if (); wire [`NUM_THREADS-1:0] valid; - wire [31:0] lsu_pc; + wire [31:0] curr_PC; wire [`NW_BITS-1:0] warp_num; wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data diff --git a/hw/rtl/interfaces/VX_wb_if.v b/hw/rtl/interfaces/VX_wb_if.v index 78a6e17b..0d17e4c4 100644 --- a/hw/rtl/interfaces/VX_wb_if.v +++ b/hw/rtl/interfaces/VX_wb_if.v @@ -10,7 +10,7 @@ interface VX_wb_if (); wire [`NW_BITS-1:0] warp_num; wire [4:0] rd; wire [1:0] wb; - wire [31:0] pc; + wire [31:0] curr_PC; endinterface diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 534dc89c..3a725328 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -1,10 +1,11 @@ `include "VX_define.vh" module VX_scope #( - parameter DATAW = 64, - parameter BUSW = 64, - parameter SIZE = 256, - parameter IDW = 1 + parameter DATAW = 64, + parameter BUSW = 64, + parameter SIZE = 16, + parameter UPDW = 1, + parameter DELTAW = 16 ) ( input wire clk, input wire reset, @@ -17,7 +18,8 @@ module VX_scope #( input wire bus_write, input wire bus_read ); - localparam DELTA_ENABLE = (IDW != 0); + localparam DELTA_ENABLE = (UPDW != 0); + localparam MAX_DELTA = (1**DELTAW)-1; typedef enum logic[2:0] { CMD_GET_VALID, @@ -38,10 +40,9 @@ module VX_scope #( } cmd_get_t; reg [DATAW-1:0] data_store [SIZE-1:0]; - - reg [63:0] delta_store [SIZE-1:0]; - reg [IDW-1:0] prev_id; - reg [63:0] delta; + reg [DELTAW-1:0] delta_store [SIZE-1:0]; + reg [UPDW-1:0] prev_id; + reg [DELTAW-1:0] delta; reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end; @@ -57,7 +58,7 @@ module VX_scope #( wire [BUSW-4:0] cmd_data; assign {cmd_data, cmd_type} = bus_in; - wire [IDW-1:0] trigger_id = data_in[DATAW-1:DATAW-IDW]; + wire [UPDW-1:0] trigger_id = data_in[UPDW-1:0]; always @(posedge clk) begin if (reset) begin @@ -93,7 +94,7 @@ module VX_scope #( start_wait <= 0; recording <= 1; delay_cntr <= 0; - delta <= 0; + delta <= MAX_DELTA; end else begin start_wait <= 1; recording <= 0; @@ -106,22 +107,22 @@ module VX_scope #( if (1 == delay_cntr) begin start_wait <= 0; recording <= 1; - delta <= 0; + delta <= MAX_DELTA; end end if (recording) begin if (DELTA_ENABLE) begin if (changed - || (0 == waddr) + || (delta == MAX_DELTA) || (trigger_id != prev_id)) begin data_store[waddr] <= data_in; delta_store[waddr] <= delta; waddr <= waddr + 1; delta <= 0; - end else begin + end else begin delta <= delta + 1; - end + end prev_id <= trigger_id; end else begin data_store[waddr] <= data_in; @@ -171,7 +172,7 @@ module VX_scope #( GET_VALID : bus_out = BUSW'(data_valid); GET_WIDTH : bus_out = BUSW'(DATAW); GET_COUNT : bus_out = BUSW'(waddr) + BUSW'(1); - default : bus_out = read_delta ? (BUSW)'(delta_store[raddr]) : (BUSW)'(data_store[raddr] >> read_offset); + default : bus_out = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset); endcase end diff --git a/hw/rtl/pipe_regs/VX_d_e_reg.v b/hw/rtl/pipe_regs/VX_d_e_reg.v index 5b4aec63..01b90aa8 100644 --- a/hw/rtl/pipe_regs/VX_d_e_reg.v +++ b/hw/rtl/pipe_regs/VX_d_e_reg.v @@ -19,8 +19,8 @@ module VX_d_e_reg ( .reset (reset), .stall (stall), .flush (flush), - .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), - .out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) + .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.is_jal, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.next_PC, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), + .out ({bckE_req_if.csr_address , bckE_req_if.is_jal , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.next_PC , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) ); endmodule diff --git a/hw/rtl/pipe_regs/VX_f_d_reg.v b/hw/rtl/pipe_regs/VX_f_d_reg.v index 76066040..a2dfba0c 100644 --- a/hw/rtl/pipe_regs/VX_f_d_reg.v +++ b/hw/rtl/pipe_regs/VX_f_d_reg.v @@ -16,12 +16,12 @@ module VX_f_d_reg ( VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) ) f_d_reg ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(flush), - .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), - .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (flush), + .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), + .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) ); endmodule \ No newline at end of file diff --git a/hw/rtl/pipe_regs/VX_i_d_reg.v b/hw/rtl/pipe_regs/VX_i_d_reg.v index 014de237..9963883f 100644 --- a/hw/rtl/pipe_regs/VX_i_d_reg.v +++ b/hw/rtl/pipe_regs/VX_i_d_reg.v @@ -12,17 +12,16 @@ module VX_i_d_reg ( wire flush = 1'b0; wire stall = freeze == 1'b1; - - + VX_generic_register #( .N(64 + `NW_BITS-1 + 1 + `NUM_THREADS) ) i_d_reg ( - .clk (clk), - .reset(reset), - .stall(stall), - .flush(flush), - .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), - .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) + .clk (clk), + .reset (reset), + .stall (stall), + .flush (flush), + .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), + .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) ); endmodule \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index b66b082b..e312ee68 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -9,6 +9,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \ -DDBG_PRINT_CACHE_SNP \ -DDBG_PRINT_CACHE_MSRQ \ -DDBG_PRINT_DRAM \ + -DDBG_PRINT_WB \ -DDBG_PRINT_OPAE #DBG_PRINT=$(DBG_PRINT_FLAGS) diff --git a/hw/simulate/testbench.cpp b/hw/simulate/testbench.cpp index 68528a7c..707e9e0e 100644 --- a/hw/simulate/testbench.cpp +++ b/hw/simulate/testbench.cpp @@ -10,52 +10,52 @@ int main(int argc, char **argv) bool passed = true; std::string tests[] = { - "../../benchmarks/riscv_tests/rv32ui-p-add.hex", - "../../benchmarks/riscv_tests/rv32ui-p-addi.hex", - "../../benchmarks/riscv_tests/rv32ui-p-and.hex", - "../../benchmarks/riscv_tests/rv32ui-p-andi.hex", - "../../benchmarks/riscv_tests/rv32ui-p-auipc.hex", - "../../benchmarks/riscv_tests/rv32ui-p-beq.hex", - "../../benchmarks/riscv_tests/rv32ui-p-bge.hex", - "../../benchmarks/riscv_tests/rv32ui-p-bgeu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-blt.hex", - "../../benchmarks/riscv_tests/rv32ui-p-bltu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-bne.hex", - "../../benchmarks/riscv_tests/rv32ui-p-jal.hex", - "../../benchmarks/riscv_tests/rv32ui-p-jalr.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lb.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lbu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lh.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lhu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lui.hex", - "../../benchmarks/riscv_tests/rv32ui-p-lw.hex", - "../../benchmarks/riscv_tests/rv32ui-p-or.hex", - "../../benchmarks/riscv_tests/rv32ui-p-ori.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sb.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sh.hex", - "../../benchmarks/riscv_tests/rv32ui-p-simple.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sll.hex", - "../../benchmarks/riscv_tests/rv32ui-p-slli.hex", - "../../benchmarks/riscv_tests/rv32ui-p-slt.hex", - "../../benchmarks/riscv_tests/rv32ui-p-slti.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sltiu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sltu.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sra.hex", - "../../benchmarks/riscv_tests/rv32ui-p-srai.hex", - "../../benchmarks/riscv_tests/rv32ui-p-srl.hex", - "../../benchmarks/riscv_tests/rv32ui-p-srli.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sub.hex", - "../../benchmarks/riscv_tests/rv32ui-p-sw.hex", - "../../benchmarks/riscv_tests/rv32ui-p-xor.hex", - "../../benchmarks/riscv_tests/rv32ui-p-xori.hex", - "../../benchmarks/riscv_tests/rv32um-p-div.hex", - "../../benchmarks/riscv_tests/rv32um-p-divu.hex", - "../../benchmarks/riscv_tests/rv32um-p-mul.hex", - "../../benchmarks/riscv_tests/rv32um-p-mulh.hex", - "../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex", - "../../benchmarks/riscv_tests/rv32um-p-mulhu.hex", - "../../benchmarks/riscv_tests/rv32um-p-rem.hex", - "../../benchmarks/riscv_tests/rv32um-p-remu.hex" + "../../../benchmarks/riscv_tests/rv32ui-p-add.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-addi.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-and.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-andi.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-auipc.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-beq.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-bge.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-bgeu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-blt.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-bltu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-bne.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-jal.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-jalr.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lb.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lbu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lh.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lhu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lui.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-lw.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-or.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-ori.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sb.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sh.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-simple.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sll.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-slli.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-slt.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-slti.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sltiu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sltu.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sra.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-srai.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-srl.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-srli.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sub.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-sw.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-xor.hex", + "../../../benchmarks/riscv_tests/rv32ui-p-xori.hex", + "../../../benchmarks/riscv_tests/rv32um-p-div.hex", + "../../../benchmarks/riscv_tests/rv32um-p-divu.hex", + "../../../benchmarks/riscv_tests/rv32um-p-mul.hex", + "../../../benchmarks/riscv_tests/rv32um-p-mulh.hex", + "../../../benchmarks/riscv_tests/rv32um-p-mulhsu.hex", + "../../../benchmarks/riscv_tests/rv32um-p-mulhu.hex", + "../../../benchmarks/riscv_tests/rv32um-p-rem.hex", + "../../../benchmarks/riscv_tests/rv32um-p-remu.hex" }; for (std::string test : tests) { @@ -84,10 +84,10 @@ int main(int argc, char **argv) #else - char test[] = "../../runtime/tests/simple/vx_simple_main.hex"; - //char test[] = "../../benchmarks/riscv_tests/rv32ui-p-lb.hex"; - //char test[] = "../../benchmarks/riscv_tests/rv32ui-p-lw.hex"; - //char test[] = "../../benchmarks/riscv_tests/rv32ui-p-sw.hex"; + char test[] = "../../../runtime/tests/simple/vx_simple_main.hex"; + //char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-lb.hex"; + //char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-lw.hex"; + //char test[] = "../../../benchmarks/riscv_tests/rv32ui-p-sw.hex"; std::cerr << test << std::endl;