diff --git a/hw/modelsim/vortex_tb.v b/hw/modelsim/vortex_tb.v index 4db1dc84..b57f97cf 100644 --- a/hw/modelsim/vortex_tb.v +++ b/hw/modelsim/vortex_tb.v @@ -86,7 +86,7 @@ reg[31:0] io_data; initial begin // $fdumpfile("vortex1.vcd"); - load_file("../../runtime/tests/simple/vx_simple_main.hex"); + load_file("../../runtime/tests/simple/simple_main_if.hex"); $dumpvars(0, vortex_tb); reset = 1; clk = 0; diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index e4c98706..394f4823 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -1,130 +1,127 @@ `include "VX_define.vh" -module VX_back_end - #( - parameter CORE_ID = 0 - ) - ( +module VX_back_end #( + parameter CORE_ID = 0 +) ( input wire clk, input wire reset, input wire schedule_delay, - VX_gpu_dcache_rsp_if vx_dcache_rsp, - VX_gpu_dcache_req_if vx_dcache_req, + VX_gpu_dcache_rsp_if dcache_rsp_if, + VX_gpu_dcache_req_if dcache_req_if, - output wire out_mem_delay, - output wire out_exec_delay, - output wire gpr_stage_delay, - VX_jal_response_if vx_jal_rsp, - VX_branch_response_if vx_branch_rsp, + output wire out_mem_delay, + output wire out_exec_delay, + output wire gpr_stage_delay, + VX_jal_response_if jal_rsp_if, + VX_branch_response_if branch_rsp_if, - VX_frE_to_bckE_req_if vx_bckE_req, - VX_wb_if vx_writeback_if, + VX_frE_to_bckE_req_if bckE_req_if, + VX_wb_if writeback_if, - VX_warp_ctl_if vx_warp_ctl + VX_warp_ctl_if warp_ctl_if ); +VX_wb_if writeback_temp_if(); +assign writeback_if.wb = writeback_temp_if.wb; +assign writeback_if.rd = writeback_temp_if.rd; +assign writeback_if.write_data = writeback_temp_if.write_data; +assign writeback_if.wb_valid = writeback_temp_if.wb_valid; +assign writeback_if.wb_warp_num = writeback_temp_if.wb_warp_num; +assign writeback_if.wb_pc = writeback_temp_if.wb_pc; -VX_wb_if vx_writeback_temp(); -assign vx_writeback_if.wb = vx_writeback_temp.wb; -assign vx_writeback_if.rd = vx_writeback_temp.rd; -assign vx_writeback_if.write_data = vx_writeback_temp.write_data; -assign vx_writeback_if.wb_valid = vx_writeback_temp.wb_valid; -assign vx_writeback_if.wb_warp_num = vx_writeback_temp.wb_warp_num; -assign vx_writeback_if.wb_pc = vx_writeback_temp.wb_pc; - -// assign VX_writeback_if(vx_writeback_temp); +// assign VX_writeback_if(writeback_temp_if); wire no_slot_mem; wire no_slot_exec; // LSU input + output -VX_lsu_req_if vx_lsu_req(); -VX_inst_mem_wb_if vx_mem_wb(); +VX_lsu_req_if lsu_req_if(); +VX_inst_mem_wb_if mem_wb_if(); // Exec unit input + output -VX_exec_unit_req_if vx_exec_unit_req(); -VX_inst_exec_wb_if vx_inst_exec_wb(); +VX_exec_unit_req_if exec_unit_req_if(); +VX_inst_exec_wb_if inst_exec_wb_if(); // GPU unit input -VX_gpu_inst_req_if vx_gpu_inst_req(); +VX_gpu_inst_req_if gpu_inst_req_if(); // CSR unit inputs -VX_csr_req_if vx_csr_req(); -VX_csr_wb_if vx_csr_wb(); +VX_csr_req_if csr_req_if(); +VX_csr_wb_if csr_wb_if(); wire no_slot_csr; wire stall_gpr_csr; -VX_gpr_stage vx_gpr_stage( +VX_gpr_stage gpr_stage ( .clk (clk), .reset (reset), .schedule_delay (schedule_delay), - .vx_writeback_if(vx_writeback_temp), - .vx_bckE_req (vx_bckE_req), + .writeback_if (writeback_temp_if), + .bckE_req_if (bckE_req_if), // New - .vx_exec_unit_req(vx_exec_unit_req), - .vx_lsu_req (vx_lsu_req), - .vx_gpu_inst_req (vx_gpu_inst_req), - .vx_csr_req (vx_csr_req), + .exec_unit_req_if(exec_unit_req_if), + .lsu_req_if (lsu_req_if), + .gpu_inst_req_if (gpu_inst_req_if), + .csr_req_if (csr_req_if), .stall_gpr_csr (stall_gpr_csr), // End new - .memory_delay (out_mem_delay), - .exec_delay (out_exec_delay), - .gpr_stage_delay (gpr_stage_delay) - ); + .memory_delay (out_mem_delay), + .exec_delay (out_exec_delay), + .gpr_stage_delay (gpr_stage_delay) +); VX_lsu load_store_unit ( .clk (clk), .reset (reset), - .vx_lsu_req (vx_lsu_req), - .vx_mem_wb (vx_mem_wb), - .vx_dcache_rsp(vx_dcache_rsp), - .vx_dcache_req(vx_dcache_req), + .lsu_req_if (lsu_req_if), + .mem_wb_if (mem_wb_if), + .dcache_rsp_if(dcache_rsp_if), + .dcache_req_if(dcache_req_if), .out_delay (out_mem_delay), .no_slot_mem (no_slot_mem) ); -VX_execute_unit vx_execUnit ( +VX_execute_unit execUnit ( .clk (clk), .reset (reset), - .vx_exec_unit_req(vx_exec_unit_req), - .vx_inst_exec_wb (vx_inst_exec_wb), - .vx_jal_rsp (vx_jal_rsp), - .vx_branch_rsp (vx_branch_rsp), + .exec_unit_req_if(exec_unit_req_if), + .inst_exec_wb_if (inst_exec_wb_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), .out_delay (out_exec_delay), .no_slot_exec (no_slot_exec) ); -VX_gpgpu_inst vx_gpgpu_inst ( - .vx_gpu_inst_req(vx_gpu_inst_req), - .vx_warp_ctl (vx_warp_ctl) +VX_gpgpu_inst gpgpu_inst ( + .gpu_inst_req_if(gpu_inst_req_if), + .warp_ctl_if (warp_ctl_if) ); -// VX_csr_wrapper vx_csr_wrapper( -// .vx_csr_req(vx_csr_req), -// .vx_csr_wb (vx_csr_wb) -// ); +// VX_csr_wrapper csr_wrapper( +// .csr_req_if(csr_req_if), +// .csr_wb_if (csr_wb_if) +// ); VX_csr_pipe #( .CORE_ID(CORE_ID) -) vx_csr_pipe ( +) csr_pipe ( .clk (clk), .reset (reset), .no_slot_csr (no_slot_csr), - .vx_csr_req (vx_csr_req), - .vx_writeback(vx_writeback_temp), - .vx_csr_wb (vx_csr_wb), + .csr_req_if (csr_req_if), + .writeback_if(writeback_temp_if), + .csr_wb_if (csr_wb_if), .stall_gpr_csr(stall_gpr_csr) ); -VX_writeback vx_wb ( +VX_writeback wb ( .clk (clk), .reset (reset), - .vx_mem_wb (vx_mem_wb), - .vx_inst_exec_wb (vx_inst_exec_wb), - .vx_csr_wb (vx_csr_wb), + .mem_wb_if (mem_wb_if), + .inst_exec_wb_if (inst_exec_wb_if), + .csr_wb_if (csr_wb_if), - .vx_writeback_if(vx_writeback_temp), + .writeback_if (writeback_temp_if), .no_slot_mem (no_slot_mem), .no_slot_exec (no_slot_exec), .no_slot_csr (no_slot_csr) diff --git a/hw/rtl/VX_csr_handler.v b/hw/rtl/VX_csr_handler.v index f9cbde45..eac1e0b9 100644 --- a/hw/rtl/VX_csr_handler.v +++ b/hw/rtl/VX_csr_handler.v @@ -1,7 +1,7 @@ module VX_csr_handler ( input wire clk, input wire[`CSR_ADDR_SIZE-1:0] in_decode_csr_address, // done - VX_csr_write_request_if vx_csr_w_req, + VX_csr_write_request_if csr_w_req_if, input wire in_wb_valid, output wire[31:0] out_decode_csr_data // done ); @@ -9,9 +9,9 @@ module VX_csr_handler ( wire[`CSR_ADDR_SIZE-1:0] in_mem_csr_address; wire[31:0] in_mem_csr_result; - assign in_mem_is_csr = vx_csr_w_req.is_csr; - assign in_mem_csr_address = vx_csr_w_req.csr_address; - assign in_mem_csr_result = vx_csr_w_req.csr_result; + assign in_mem_is_csr = csr_w_req_if.is_csr; + assign in_mem_csr_address = csr_w_req_if.csr_address; + assign in_mem_csr_result = csr_w_req_if.csr_result; reg [`CSR_WIDTH-1:0] csr [`NUM_CSRS-1:0]; diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index 029e08ce..6da9a23c 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -3,13 +3,13 @@ module VX_csr_pipe #( parameter CORE_ID = 0 ) ( - input wire clk, // Clock - input wire reset, - input wire no_slot_csr, - VX_csr_req_if vx_csr_req, - VX_wb_if vx_writeback, - VX_csr_wb_if vx_csr_wb, - output wire stall_gpr_csr + input wire clk, + input wire reset, + input wire no_slot_csr, + VX_csr_req_if csr_req_if, + VX_wb_if writeback_if, + VX_csr_wb_if csr_wb_if, + output wire stall_gpr_csr ); wire[`NUM_THREADS-1:0] valid_s2; @@ -24,16 +24,16 @@ module VX_csr_pipe #( wire[31:0] csr_read_data_unqual; wire[31:0] csr_read_data; - assign stall_gpr_csr = no_slot_csr && vx_csr_req.is_csr && |(vx_csr_req.valid); + assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid); - assign csr_read_data = (csr_address_s2 == vx_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; + assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; - wire writeback = |vx_writeback.wb_valid; + wire writeback = |writeback_if.wb_valid; - VX_csr_data vx_csr_data( + VX_csr_data csr_data( .clk (clk), .reset (reset), - .in_read_csr_address (vx_csr_req.csr_address), + .in_read_csr_address (csr_req_if.csr_address), .in_write_valid (is_csr_s2), .in_write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]), .in_write_csr_address(csr_address_s2), @@ -44,10 +44,10 @@ module VX_csr_pipe #( reg [31:0] csr_updated_data; always @(*) begin - case (vx_csr_req.alu_op) - `CSR_ALU_RW: csr_updated_data = vx_csr_req.csr_mask; - `CSR_ALU_RS: csr_updated_data = csr_read_data | vx_csr_req.csr_mask; - `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - vx_csr_req.csr_mask); + case (csr_req_if.alu_op) + `CSR_ALU_RW: csr_updated_data = csr_req_if.csr_mask; + `CSR_ALU_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask; + `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask); default: csr_updated_data = 32'hdeadbeef; endcase end @@ -61,7 +61,7 @@ module VX_csr_pipe #( .reset(reset), .stall(no_slot_csr), .flush(zero), - .in ({vx_csr_req.valid, vx_csr_req.warp_num, vx_csr_req.rd, vx_csr_req.wb, vx_csr_req.is_csr, vx_csr_req.csr_address, csr_read_data , csr_updated_data }), + .in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }), .out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2}) ); @@ -97,10 +97,10 @@ module VX_csr_pipe #( warp_id_select ? warp_idz : csr_vec_read_data_s2; - assign vx_csr_wb.valid = valid_s2; - assign vx_csr_wb.warp_num = warp_num_s2; - assign vx_csr_wb.rd = rd_s2; - assign vx_csr_wb.wb = wb_s2; - assign vx_csr_wb.csr_result = final_csr_data; + assign csr_wb_if.valid = valid_s2; + assign csr_wb_if.warp_num = warp_num_s2; + assign csr_wb_if.rd = rd_s2; + assign csr_wb_if.wb = wb_s2; + assign csr_wb_if.csr_result = final_csr_data; endmodule diff --git a/hw/rtl/VX_csr_wrapper.v b/hw/rtl/VX_csr_wrapper.v index 3fcfb31a..f0d97118 100644 --- a/hw/rtl/VX_csr_wrapper.v +++ b/hw/rtl/VX_csr_wrapper.v @@ -2,8 +2,8 @@ `include "VX_define.vh" module VX_csr_wrapper ( - VX_csr_req_if vx_csr_req, - VX_csr_wb_if vx_csr_wb + VX_csr_req_if csr_req_if, + VX_csr_wb_if csr_wb_if ); @@ -17,21 +17,21 @@ module VX_csr_wrapper ( end for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init - assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, vx_csr_req.warp_num}; + assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; end endgenerate - assign vx_csr_wb.valid = vx_csr_req.valid; - assign vx_csr_wb.warp_num = vx_csr_req.warp_num; - assign vx_csr_wb.rd = vx_csr_req.rd; - assign vx_csr_wb.wb = vx_csr_req.wb; + assign csr_wb_if.valid = csr_req_if.valid; + assign csr_wb_if.warp_num = csr_req_if.warp_num; + assign csr_wb_if.rd = csr_req_if.rd; + assign csr_wb_if.wb = csr_req_if.wb; - wire thread_select = vx_csr_req.csr_address == 12'h20; - wire warp_select = vx_csr_req.csr_address == 12'h21; + wire thread_select = csr_req_if.csr_address == 12'h20; + wire warp_select = csr_req_if.csr_address == 12'h21; - assign vx_csr_wb.csr_result = thread_select ? thread_ids : + assign csr_wb_if.csr_result = thread_select ? thread_ids : warp_select ? warp_ids : 0; diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index de00a171..a3a79257 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -3,22 +3,22 @@ module VX_decode( // Fetch Inputs - VX_inst_meta_if fd_inst_meta_de, + VX_inst_meta_if fd_inst_meta_de, // Outputs - VX_frE_to_bckE_req_if vx_frE_to_bckE_req, - VX_wstall_if vx_wstall, - VX_join_if vx_join, + VX_frE_to_bckE_req_if frE_to_bckE_req_if, + VX_wstall_if wstall_if, + VX_join_if join_if, - output wire terminate_sim + output wire terminate_sim ); wire[31:0] in_instruction = fd_inst_meta_de.instruction; wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; - wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; + wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; - assign vx_frE_to_bckE_req.curr_PC = in_curr_PC; + assign frE_to_bckE_req_if.curr_PC = in_curr_PC; wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid; @@ -84,20 +84,20 @@ module VX_decode( reg[2:0] temp_branch_type; reg temp_branch_stall; - assign vx_frE_to_bckE_req.valid = fd_inst_meta_de.valid; + assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid; - assign vx_frE_to_bckE_req.warp_num = in_warp_num; + assign frE_to_bckE_req_if.warp_num = in_warp_num; assign curr_opcode = in_instruction[6:0]; - assign vx_frE_to_bckE_req.rd = in_instruction[11:7]; - assign vx_frE_to_bckE_req.rs1 = in_instruction[19:15]; - assign vx_frE_to_bckE_req.rs2 = in_instruction[24:20]; + assign frE_to_bckE_req_if.rd = in_instruction[11:7]; + assign frE_to_bckE_req_if.rs1 = in_instruction[19:15]; + assign frE_to_bckE_req_if.rs2 = in_instruction[24:20]; assign func3 = in_instruction[14:12]; assign func7 = in_instruction[31:25]; assign u_12 = in_instruction[31:20]; - assign vx_frE_to_bckE_req.PC_next = in_curr_PC + 32'h4; + assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4; // Write Back sigal assign is_rtype = (curr_opcode == `R_INST); @@ -123,43 +123,43 @@ module VX_decode( assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE - assign vx_join.is_join = is_join; - assign vx_join.join_warp_num = in_warp_num; + assign join_if.is_join = is_join; + assign join_if.join_warp_num = in_warp_num; - assign vx_frE_to_bckE_req.is_wspawn = is_wspawn; - assign vx_frE_to_bckE_req.is_tmc = is_tmc; - assign vx_frE_to_bckE_req.is_split = is_split; - assign vx_frE_to_bckE_req.is_barrier = is_barrier; + assign frE_to_bckE_req_if.is_wspawn = is_wspawn; + assign frE_to_bckE_req_if.is_tmc = is_tmc; + assign frE_to_bckE_req_if.is_split = is_split; + assign frE_to_bckE_req_if.is_barrier = is_barrier; - assign vx_frE_to_bckE_req.csr_immed = is_csr_immed; - assign vx_frE_to_bckE_req.is_csr = is_csr; + assign frE_to_bckE_req_if.csr_immed = is_csr_immed; + assign frE_to_bckE_req_if.is_csr = is_csr; - assign vx_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL : + assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL : is_linst ? `WB_MEM : (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : `NO_WB; - assign vx_frE_to_bckE_req.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; + assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; // MEM signals - assign vx_frE_to_bckE_req.mem_read = (is_linst) ? func3 : `NO_MEM_READ; - assign vx_frE_to_bckE_req.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE; + assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `NO_MEM_READ; + assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE; // UPPER IMMEDIATE always @(*) begin case(curr_opcode) - `LUI_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3}; - `AUIPC_INST: temp_upper_immed = {func7, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.rs1, func3}; + `LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; + `AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; default: temp_upper_immed = 20'h0; endcase // curr_opcode end - assign vx_frE_to_bckE_req.upper_immed = temp_upper_immed; + assign frE_to_bckE_req_if.upper_immed = temp_upper_immed; assign jal_b_19_to_12 = in_instruction[19:12]; @@ -171,7 +171,7 @@ module VX_decode( assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset}; - assign jalr_immed = {func7, vx_frE_to_bckE_req.rs2}; + assign jalr_immed = {func7, frE_to_bckE_req_if.rs2}; assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed}; @@ -208,16 +208,16 @@ module VX_decode( endcase end - assign vx_frE_to_bckE_req.jalQual = is_jal; - assign vx_frE_to_bckE_req.jal = temp_jal; - assign vx_frE_to_bckE_req.jal_offset = temp_jal_offset; + assign frE_to_bckE_req_if.jalQual = is_jal; + assign frE_to_bckE_req_if.jal = temp_jal; + assign frE_to_bckE_req_if.jal_offset = temp_jal_offset; // wire is_ebreak; // assign is_ebreak = is_e_inst; wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid)); - assign vx_frE_to_bckE_req.ebreak = ebreak; + assign frE_to_bckE_req_if.ebreak = ebreak; assign terminate_sim = is_e_inst; @@ -226,26 +226,26 @@ module VX_decode( assign csr_cond1 = func3 != 3'h0; assign csr_cond2 = u_12 >= 12'h2; - assign vx_frE_to_bckE_req.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55; + assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55; // ITYPE IMEED assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5); - assign alu_shift_i_immed = {{7{1'b0}}, vx_frE_to_bckE_req.rs2}; + assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2}; assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12; always @(*) begin case(curr_opcode) `ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; - `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, vx_frE_to_bckE_req.rd}; + `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd}; `L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12}; `B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; default: temp_itype_immed = 32'hdeadbeef; endcase end - assign vx_frE_to_bckE_req.itype_immed = temp_itype_immed; + assign frE_to_bckE_req_if.itype_immed = temp_itype_immed; always @(*) begin case(curr_opcode) @@ -282,10 +282,10 @@ module VX_decode( endcase end - assign vx_frE_to_bckE_req.branch_type = temp_branch_type; + assign frE_to_bckE_req_if.branch_type = temp_branch_type; - assign vx_wstall.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid); - assign vx_wstall.warp_num = in_warp_num; + assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid); + assign wstall_if.warp_num = in_warp_num; always @(*) begin // ALU OP @@ -330,14 +330,14 @@ module VX_decode( wire[4:0] temp_final_alu; - assign temp_final_alu = is_btype ? ((vx_frE_to_bckE_req.branch_type < `BLTU) ? `SUB : `SUBU) : + assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) : is_lui ? `LUI_ALU : is_auipc ? `AUIPC_ALU : is_csr ? csr_alu : (is_stype || is_linst) ? `ADD : alu_op; - assign vx_frE_to_bckE_req.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; + assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; endmodule diff --git a/hw/rtl/VX_dmem_controller.v b/hw/rtl/VX_dmem_controller.v index bd5b31c7..ea842699 100644 --- a/hw/rtl/VX_dmem_controller.v +++ b/hw/rtl/VX_dmem_controller.v @@ -5,69 +5,69 @@ module VX_dmem_controller ( input wire reset, // Dram <-> Dcache - VX_gpu_dcache_dram_req_if vx_gpu_dcache_dram_req, - VX_gpu_dcache_dram_rsp_if vx_gpu_dcache_dram_res, - VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req, + VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if, + VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_res_if, + VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if, // Dram <-> Icache - VX_gpu_dcache_dram_req_if vx_gpu_icache_dram_req, - VX_gpu_dcache_dram_rsp_if vx_gpu_icache_dram_res, - VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req, + VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if, + VX_gpu_dcache_dram_rsp_if gpu_icache_dram_res_if, + VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if, // Core <-> Dcache - VX_gpu_dcache_rsp_if vx_dcache_rsp, - VX_gpu_dcache_req_if vx_dcache_req, + VX_gpu_dcache_rsp_if dcache_rsp_if, + VX_gpu_dcache_req_if dcache_req_if, // Core <-> Icache - VX_gpu_dcache_rsp_if vx_icache_rsp, - VX_gpu_dcache_req_if vx_icache_req + VX_gpu_dcache_rsp_if icache_rsp_if, + VX_gpu_dcache_req_if icache_req_if ); - VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_smem(); - VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_smem(); + VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if(); + VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if(); - VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp_dcache(); - VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_dcache(); + VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if(); + VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if(); - wire to_shm = vx_dcache_req.core_req_addr[0][31:24] == 8'hFF; - wire dcache_wants_wb = (|vx_dcache_rsp_dcache.core_wb_valid); + wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF; + wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_wb_valid); // Dcache Request - assign vx_dcache_req_dcache.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}}; - assign vx_dcache_req_dcache.core_req_addr = vx_dcache_req.core_req_addr; - assign vx_dcache_req_dcache.core_req_writedata = vx_dcache_req.core_req_writedata; - assign vx_dcache_req_dcache.core_req_mem_read = vx_dcache_req.core_req_mem_read; - assign vx_dcache_req_dcache.core_req_mem_write = vx_dcache_req.core_req_mem_write; - assign vx_dcache_req_dcache.core_req_rd = vx_dcache_req.core_req_rd; - assign vx_dcache_req_dcache.core_req_wb = vx_dcache_req.core_req_wb; - assign vx_dcache_req_dcache.core_req_warp_num = vx_dcache_req.core_req_warp_num; - assign vx_dcache_req_dcache.core_req_pc = vx_dcache_req.core_req_pc; - assign vx_dcache_req_dcache.core_no_wb_slot = vx_dcache_req.core_no_wb_slot; + assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}}; + assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr; + assign dcache_req_dcache_if.core_req_writedata = dcache_req_if.core_req_writedata; + assign dcache_req_dcache_if.core_req_mem_read = dcache_req_if.core_req_mem_read; + assign dcache_req_dcache_if.core_req_mem_write = dcache_req_if.core_req_mem_write; + assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd; + assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb; + assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num; + assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc; + assign dcache_req_dcache_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot; // Shred Memory Request - assign vx_dcache_req_smem.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}}; - assign vx_dcache_req_smem.core_req_addr = vx_dcache_req.core_req_addr; - assign vx_dcache_req_smem.core_req_writedata = vx_dcache_req.core_req_writedata; - assign vx_dcache_req_smem.core_req_mem_read = vx_dcache_req.core_req_mem_read; - assign vx_dcache_req_smem.core_req_mem_write = vx_dcache_req.core_req_mem_write; - assign vx_dcache_req_smem.core_req_rd = vx_dcache_req.core_req_rd; - assign vx_dcache_req_smem.core_req_wb = vx_dcache_req.core_req_wb; - assign vx_dcache_req_smem.core_req_warp_num = vx_dcache_req.core_req_warp_num; - assign vx_dcache_req_smem.core_req_pc = vx_dcache_req.core_req_pc; - assign vx_dcache_req_smem.core_no_wb_slot = vx_dcache_req.core_no_wb_slot || dcache_wants_wb; + assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}}; + assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr; + assign dcache_req_smem_if.core_req_writedata = dcache_req_if.core_req_writedata; + assign dcache_req_smem_if.core_req_mem_read = dcache_req_if.core_req_mem_read; + assign dcache_req_smem_if.core_req_mem_write = dcache_req_if.core_req_mem_write; + assign dcache_req_smem_if.core_req_rd = dcache_req_if.core_req_rd; + assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb; + assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num; + assign dcache_req_smem_if.core_req_pc = dcache_req_if.core_req_pc; + assign dcache_req_smem_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot || dcache_wants_wb; // Dcache Response - assign vx_dcache_rsp.core_wb_valid = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_valid : vx_dcache_rsp_smem.core_wb_valid; - assign vx_dcache_rsp.core_wb_req_rd = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_rd : vx_dcache_rsp_smem.core_wb_req_rd; - assign vx_dcache_rsp.core_wb_req_wb = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_req_wb : vx_dcache_rsp_smem.core_wb_req_wb; - assign vx_dcache_rsp.core_wb_warp_num = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_warp_num : vx_dcache_rsp_smem.core_wb_warp_num; - assign vx_dcache_rsp.core_wb_readdata = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_readdata : vx_dcache_rsp_smem.core_wb_readdata; - assign vx_dcache_rsp.core_wb_pc = dcache_wants_wb ? vx_dcache_rsp_dcache.core_wb_pc : vx_dcache_rsp_smem.core_wb_pc; + assign dcache_rsp_if.core_wb_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_valid : dcache_rsp_smem_if.core_wb_valid; + assign dcache_rsp_if.core_wb_req_rd = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_rd : dcache_rsp_smem_if.core_wb_req_rd; + assign dcache_rsp_if.core_wb_req_wb = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_wb : dcache_rsp_smem_if.core_wb_req_wb; + assign dcache_rsp_if.core_wb_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_warp_num : dcache_rsp_smem_if.core_wb_warp_num; + assign dcache_rsp_if.core_wb_readdata = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_readdata : dcache_rsp_smem_if.core_wb_readdata; + assign dcache_rsp_if.core_wb_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_pc : dcache_rsp_smem_if.core_wb_pc; - assign vx_dcache_rsp.delay_req = to_shm ? vx_dcache_rsp_smem.delay_req : vx_dcache_rsp_dcache.delay_req; + assign dcache_rsp_if.delay_req = to_shm ? dcache_rsp_smem_if.delay_req : dcache_rsp_dcache_if.delay_req; - VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_req(); - VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_smem_dram_res(); + VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if(); + VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if(); VX_cache #( .CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES), @@ -95,46 +95,46 @@ module VX_dmem_controller ( .reset (reset), // Core req - .core_req_valid (vx_dcache_req_smem.core_req_valid), - .core_req_mem_read (vx_dcache_req_smem.core_req_mem_read), - .core_req_mem_write(vx_dcache_req_smem.core_req_mem_write), - .core_req_addr (vx_dcache_req_smem.core_req_addr), - .core_req_writedata(vx_dcache_req_smem.core_req_writedata), - .core_req_rd (vx_dcache_req_smem.core_req_rd), - .core_req_wb (vx_dcache_req_smem.core_req_wb), - .core_req_warp_num (vx_dcache_req_smem.core_req_warp_num), - .core_req_pc (vx_dcache_req_smem.core_req_pc), + .core_req_valid (dcache_req_smem_if.core_req_valid), + .core_req_mem_read (dcache_req_smem_if.core_req_mem_read), + .core_req_mem_write(dcache_req_smem_if.core_req_mem_write), + .core_req_addr (dcache_req_smem_if.core_req_addr), + .core_req_writedata(dcache_req_smem_if.core_req_writedata), + .core_req_rd (dcache_req_smem_if.core_req_rd), + .core_req_wb (dcache_req_smem_if.core_req_wb), + .core_req_warp_num (dcache_req_smem_if.core_req_warp_num), + .core_req_pc (dcache_req_smem_if.core_req_pc), // Delay Core Req - .delay_req (vx_dcache_rsp_smem.delay_req), + .delay_req (dcache_rsp_smem_if.delay_req), // Core Cache Can't WB - .core_no_wb_slot (vx_dcache_req_smem.core_no_wb_slot), + .core_no_wb_slot (dcache_req_smem_if.core_no_wb_slot), // Cache CWB - .core_wb_valid (vx_dcache_rsp_smem.core_wb_valid), - .core_wb_req_rd (vx_dcache_rsp_smem.core_wb_req_rd), - .core_wb_req_wb (vx_dcache_rsp_smem.core_wb_req_wb), - .core_wb_warp_num (vx_dcache_rsp_smem.core_wb_warp_num), - .core_wb_readdata (vx_dcache_rsp_smem.core_wb_readdata), - .core_wb_pc (vx_dcache_rsp_smem.core_wb_pc), + .core_wb_valid (dcache_rsp_smem_if.core_wb_valid), + .core_wb_req_rd (dcache_rsp_smem_if.core_wb_req_rd), + .core_wb_req_wb (dcache_rsp_smem_if.core_wb_req_wb), + .core_wb_warp_num (dcache_rsp_smem_if.core_wb_warp_num), + .core_wb_readdata (dcache_rsp_smem_if.core_wb_readdata), + .core_wb_pc (dcache_rsp_smem_if.core_wb_pc), `IGNORE_WARNINGS_BEGIN .core_wb_address (), `IGNORE_WARNINGS_END // DRAM response - .dram_rsp_valid (vx_gpu_smem_dram_res.dram_rsp_valid), - .dram_rsp_addr (vx_gpu_smem_dram_res.dram_rsp_addr), - .dram_rsp_data (vx_gpu_smem_dram_res.dram_rsp_data), + .dram_rsp_valid (gpu_smem_dram_res_if.dram_rsp_valid), + .dram_rsp_addr (gpu_smem_dram_res_if.dram_rsp_addr), + .dram_rsp_data (gpu_smem_dram_res_if.dram_rsp_data), // DRAM accept response - .dram_rsp_ready (vx_gpu_smem_dram_req.dram_rsp_ready), + .dram_rsp_ready (gpu_smem_dram_req_if.dram_rsp_ready), // DRAM Req - .dram_req_read (vx_gpu_smem_dram_req.dram_req_read), - .dram_req_write (vx_gpu_smem_dram_req.dram_req_write), - .dram_req_addr (vx_gpu_smem_dram_req.dram_req_addr), - .dram_req_data (vx_gpu_smem_dram_req.dram_req_data), + .dram_req_read (gpu_smem_dram_req_if.dram_req_read), + .dram_req_write (gpu_smem_dram_req_if.dram_req_write), + .dram_req_addr (gpu_smem_dram_req_if.dram_req_addr), + .dram_req_data (gpu_smem_dram_req_if.dram_req_data), .dram_req_full (1), // Snoop Request @@ -178,52 +178,52 @@ module VX_dmem_controller ( .reset (reset), // Core req - .core_req_valid (vx_dcache_req_dcache.core_req_valid), - .core_req_mem_read (vx_dcache_req_dcache.core_req_mem_read), - .core_req_mem_write(vx_dcache_req_dcache.core_req_mem_write), - .core_req_addr (vx_dcache_req_dcache.core_req_addr), - .core_req_writedata(vx_dcache_req_dcache.core_req_writedata), - .core_req_rd (vx_dcache_req_dcache.core_req_rd), - .core_req_wb (vx_dcache_req_dcache.core_req_wb), - .core_req_warp_num (vx_dcache_req_dcache.core_req_warp_num), - .core_req_pc (vx_dcache_req_dcache.core_req_pc), + .core_req_valid (dcache_req_dcache_if.core_req_valid), + .core_req_mem_read (dcache_req_dcache_if.core_req_mem_read), + .core_req_mem_write(dcache_req_dcache_if.core_req_mem_write), + .core_req_addr (dcache_req_dcache_if.core_req_addr), + .core_req_writedata(dcache_req_dcache_if.core_req_writedata), + .core_req_rd (dcache_req_dcache_if.core_req_rd), + .core_req_wb (dcache_req_dcache_if.core_req_wb), + .core_req_warp_num (dcache_req_dcache_if.core_req_warp_num), + .core_req_pc (dcache_req_dcache_if.core_req_pc), // Delay Core Req - .delay_req (vx_dcache_rsp_dcache.delay_req), + .delay_req (dcache_rsp_dcache_if.delay_req), // Core Cache Can't WB - .core_no_wb_slot (vx_dcache_req_dcache.core_no_wb_slot), + .core_no_wb_slot (dcache_req_dcache_if.core_no_wb_slot), // Cache CWB - .core_wb_valid (vx_dcache_rsp_dcache.core_wb_valid), - .core_wb_req_rd (vx_dcache_rsp_dcache.core_wb_req_rd), - .core_wb_req_wb (vx_dcache_rsp_dcache.core_wb_req_wb), - .core_wb_warp_num (vx_dcache_rsp_dcache.core_wb_warp_num), - .core_wb_readdata (vx_dcache_rsp_dcache.core_wb_readdata), - .core_wb_pc (vx_dcache_rsp_dcache.core_wb_pc), + .core_wb_valid (dcache_rsp_dcache_if.core_wb_valid), + .core_wb_req_rd (dcache_rsp_dcache_if.core_wb_req_rd), + .core_wb_req_wb (dcache_rsp_dcache_if.core_wb_req_wb), + .core_wb_warp_num (dcache_rsp_dcache_if.core_wb_warp_num), + .core_wb_readdata (dcache_rsp_dcache_if.core_wb_readdata), + .core_wb_pc (dcache_rsp_dcache_if.core_wb_pc), `IGNORE_WARNINGS_BEGIN .core_wb_address (), `IGNORE_WARNINGS_END // DRAM response - .dram_rsp_valid (vx_gpu_dcache_dram_res.dram_rsp_valid), - .dram_rsp_addr (vx_gpu_dcache_dram_res.dram_rsp_addr), - .dram_rsp_data (vx_gpu_dcache_dram_res.dram_rsp_data), + .dram_rsp_valid (gpu_dcache_dram_res_if.dram_rsp_valid), + .dram_rsp_addr (gpu_dcache_dram_res_if.dram_rsp_addr), + .dram_rsp_data (gpu_dcache_dram_res_if.dram_rsp_data), // DRAM accept response - .dram_rsp_ready (vx_gpu_dcache_dram_req.dram_rsp_ready), + .dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready), // DRAM Req - .dram_req_read (vx_gpu_dcache_dram_req.dram_req_read), - .dram_req_write (vx_gpu_dcache_dram_req.dram_req_write), - .dram_req_addr (vx_gpu_dcache_dram_req.dram_req_addr), - .dram_req_data (vx_gpu_dcache_dram_req.dram_req_data), - .dram_req_full (vx_gpu_dcache_dram_req.dram_req_full), + .dram_req_read (gpu_dcache_dram_req_if.dram_req_read), + .dram_req_write (gpu_dcache_dram_req_if.dram_req_write), + .dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr), + .dram_req_data (gpu_dcache_dram_req_if.dram_req_data), + .dram_req_full (gpu_dcache_dram_req_if.dram_req_full), // Snoop Request - .snp_req_valid (vx_gpu_dcache_snp_req.snp_req_valid), - .snp_req_addr (vx_gpu_dcache_snp_req.snp_req_addr), - .snp_req_full (vx_gpu_dcache_snp_req.snp_req_full), + .snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid), + .snp_req_addr (gpu_dcache_snp_req_if.snp_req_addr), + .snp_req_full (gpu_dcache_snp_req_if.snp_req_full), // Snoop Forward `IGNORE_WARNINGS_BEGIN @@ -259,52 +259,52 @@ module VX_dmem_controller ( .reset (reset), // Core req - .core_req_valid (vx_icache_req.core_req_valid), - .core_req_mem_read (vx_icache_req.core_req_mem_read), - .core_req_mem_write (vx_icache_req.core_req_mem_write), - .core_req_addr (vx_icache_req.core_req_addr), - .core_req_writedata (vx_icache_req.core_req_writedata), - .core_req_rd (vx_icache_req.core_req_rd), - .core_req_wb (vx_icache_req.core_req_wb), - .core_req_warp_num (vx_icache_req.core_req_warp_num), - .core_req_pc (vx_icache_req.core_req_pc), + .core_req_valid (icache_req_if.core_req_valid), + .core_req_mem_read (icache_req_if.core_req_mem_read), + .core_req_mem_write (icache_req_if.core_req_mem_write), + .core_req_addr (icache_req_if.core_req_addr), + .core_req_writedata (icache_req_if.core_req_writedata), + .core_req_rd (icache_req_if.core_req_rd), + .core_req_wb (icache_req_if.core_req_wb), + .core_req_warp_num (icache_req_if.core_req_warp_num), + .core_req_pc (icache_req_if.core_req_pc), // Delay Core Req - .delay_req (vx_icache_rsp.delay_req), + .delay_req (icache_rsp_if.delay_req), // Core Cache Can't WB - .core_no_wb_slot (vx_icache_req.core_no_wb_slot), + .core_no_wb_slot (icache_req_if.core_no_wb_slot), // Cache CWB - .core_wb_valid (vx_icache_rsp.core_wb_valid), - .core_wb_req_rd (vx_icache_rsp.core_wb_req_rd), - .core_wb_req_wb (vx_icache_rsp.core_wb_req_wb), - .core_wb_warp_num (vx_icache_rsp.core_wb_warp_num), - .core_wb_readdata (vx_icache_rsp.core_wb_readdata), - .core_wb_pc (vx_icache_rsp.core_wb_pc), + .core_wb_valid (icache_rsp_if.core_wb_valid), + .core_wb_req_rd (icache_rsp_if.core_wb_req_rd), + .core_wb_req_wb (icache_rsp_if.core_wb_req_wb), + .core_wb_warp_num (icache_rsp_if.core_wb_warp_num), + .core_wb_readdata (icache_rsp_if.core_wb_readdata), + .core_wb_pc (icache_rsp_if.core_wb_pc), `IGNORE_WARNINGS_BEGIN .core_wb_address (), `IGNORE_WARNINGS_END // DRAM response - .dram_rsp_valid (vx_gpu_icache_dram_res.dram_rsp_valid), - .dram_rsp_addr (vx_gpu_icache_dram_res.dram_rsp_addr), - .dram_rsp_data (vx_gpu_icache_dram_res.dram_rsp_data), + .dram_rsp_valid (gpu_icache_dram_res_if.dram_rsp_valid), + .dram_rsp_addr (gpu_icache_dram_res_if.dram_rsp_addr), + .dram_rsp_data (gpu_icache_dram_res_if.dram_rsp_data), // DRAM accept response - .dram_rsp_ready (vx_gpu_icache_dram_req.dram_rsp_ready), + .dram_rsp_ready (gpu_icache_dram_req_if.dram_rsp_ready), // DRAM Req - .dram_req_read (vx_gpu_icache_dram_req.dram_req_read), - .dram_req_write (vx_gpu_icache_dram_req.dram_req_write), - .dram_req_addr (vx_gpu_icache_dram_req.dram_req_addr), - .dram_req_data (vx_gpu_icache_dram_req.dram_req_data), - .dram_req_full (vx_gpu_icache_dram_req.dram_req_full), + .dram_req_read (gpu_icache_dram_req_if.dram_req_read), + .dram_req_write (gpu_icache_dram_req_if.dram_req_write), + .dram_req_addr (gpu_icache_dram_req_if.dram_req_addr), + .dram_req_data (gpu_icache_dram_req_if.dram_req_data), + .dram_req_full (gpu_icache_dram_req_if.dram_req_full), // Snoop Request - .snp_req_valid (vx_gpu_icache_snp_req.snp_req_valid), - .snp_req_addr (vx_gpu_icache_snp_req.snp_req_addr), - .snp_req_full (vx_gpu_icache_snp_req.snp_req_full), + .snp_req_valid (gpu_icache_snp_req_if.snp_req_valid), + .snp_req_addr (gpu_icache_snp_req_if.snp_req_addr), + .snp_req_full (gpu_icache_snp_req_if.snp_req_full), // Snoop Forward `IGNORE_WARNINGS_BEGIN diff --git a/hw/rtl/VX_execute_unit.v b/hw/rtl/VX_execute_unit.v index f89db41e..6126e58b 100644 --- a/hw/rtl/VX_execute_unit.v +++ b/hw/rtl/VX_execute_unit.v @@ -1,21 +1,21 @@ `include "VX_define.vh" module VX_execute_unit ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Request - VX_exec_unit_req_if vx_exec_unit_req, + VX_exec_unit_req_if exec_unit_req_if, // Output // Writeback - VX_inst_exec_wb_if vx_inst_exec_wb, + VX_inst_exec_wb_if inst_exec_wb_if, // JAL Response - VX_jal_response_if vx_jal_rsp, + VX_jal_response_if jal_rsp_if, // Branch Response - VX_branch_response_if vx_branch_rsp, + VX_branch_response_if branch_rsp_if, - input wire no_slot_exec, - output wire out_delay + input wire no_slot_exec, + output wire out_delay ); wire[`NUM_THREADS-1:0][31:0] in_a_reg_data; @@ -31,23 +31,23 @@ module VX_execute_unit ( wire[31:0] in_jal_offset; wire[31:0] in_curr_PC; - assign in_a_reg_data = vx_exec_unit_req.a_reg_data; - assign in_b_reg_data = vx_exec_unit_req.b_reg_data; - assign in_alu_op = vx_exec_unit_req.alu_op; - assign in_rs2_src = vx_exec_unit_req.rs2_src; - assign in_itype_immed = vx_exec_unit_req.itype_immed; - assign in_branch_type = vx_exec_unit_req.branch_type; - assign in_upper_immed = vx_exec_unit_req.upper_immed; - assign in_jal = vx_exec_unit_req.jal; - assign in_jal_offset = vx_exec_unit_req.jal_offset; - assign in_curr_PC = vx_exec_unit_req.curr_PC; + assign in_a_reg_data = exec_unit_req_if.a_reg_data; + assign in_b_reg_data = exec_unit_req_if.b_reg_data; + assign in_alu_op = exec_unit_req_if.alu_op; + assign in_rs2_src = exec_unit_req_if.rs2_src; + assign in_itype_immed = exec_unit_req_if.itype_immed; + assign in_branch_type = exec_unit_req_if.branch_type; + assign in_upper_immed = exec_unit_req_if.upper_immed; + assign in_jal = exec_unit_req_if.jal; + assign in_jal_offset = exec_unit_req_if.jal_offset; + assign in_curr_PC = exec_unit_req_if.curr_PC; wire[`NUM_THREADS-1:0][31:0] alu_result; wire[`NUM_THREADS-1:0] alu_stall; genvar index_out_reg; generate for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs - VX_alu vx_alu( + VX_alu alu( .clk(clk), .reset(reset), // .in_reg_data (in_reg_data[1:0]), @@ -77,17 +77,17 @@ module VX_execute_unit ( VX_generic_priority_encoder #( .N(`NUM_THREADS) ) choose_alu_result ( - .valids(vx_exec_unit_req.valid), + .valids(exec_unit_req_if.valid), .index (jal_branch_use_index), .found (jal_branch_found_valid) - ); + ); wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; reg temp_branch_dir; always @(*) begin - case (vx_exec_unit_req.branch_type) + case (exec_unit_req_if.branch_type) `BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN; `BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN; `BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN; @@ -104,35 +104,35 @@ module VX_execute_unit ( genvar i; generate for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup - assign duplicate_PC_data[i] = vx_exec_unit_req.PC_next; + assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; end endgenerate - // VX_inst_exec_wb_if vx_inst_exec_wb_temp(); + // VX_inst_exec_wb_if inst_exec_wb_temp_if(); // JAL Response - VX_jal_response_if vx_jal_rsp_temp(); + VX_jal_response_if jal_rsp_temp_if(); // Branch Response - VX_branch_response_if vx_branch_rsp_temp(); + VX_branch_response_if branch_rsp_temp_if(); // Actual Writeback - assign vx_inst_exec_wb.rd = vx_exec_unit_req.rd; - assign vx_inst_exec_wb.wb = vx_exec_unit_req.wb; - assign vx_inst_exec_wb.wb_valid = vx_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}}; - assign vx_inst_exec_wb.wb_warp_num = vx_exec_unit_req.warp_num; - assign vx_inst_exec_wb.alu_result = vx_exec_unit_req.jal ? duplicate_PC_data : alu_result; + assign inst_exec_wb_if.rd = exec_unit_req_if.rd; + assign inst_exec_wb_if.wb = exec_unit_req_if.wb; + assign inst_exec_wb_if.wb_valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}}; + assign inst_exec_wb_if.wb_warp_num = exec_unit_req_if.warp_num; + assign inst_exec_wb_if.alu_result = exec_unit_req_if.jal ? duplicate_PC_data : alu_result; - assign vx_inst_exec_wb.exec_wb_pc = in_curr_PC; + assign inst_exec_wb_if.exec_wb_pc = in_curr_PC; // Jal rsp - assign vx_jal_rsp_temp.jal = in_jal; - assign vx_jal_rsp_temp.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset); - assign vx_jal_rsp_temp.jal_warp_num = vx_exec_unit_req.warp_num; + assign jal_rsp_temp_if.jal = in_jal; + assign jal_rsp_temp_if.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset); + assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num; // Branch rsp - assign vx_branch_rsp_temp.valid_branch = (vx_exec_unit_req.branch_type != `NO_BRANCH) && (|vx_exec_unit_req.valid); - assign vx_branch_rsp_temp.branch_dir = temp_branch_dir; - assign vx_branch_rsp_temp.branch_warp_num = vx_exec_unit_req.warp_num; - assign vx_branch_rsp_temp.branch_dest = $signed(vx_exec_unit_req.curr_PC) + ($signed(vx_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset + assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid); + assign branch_rsp_temp_if.branch_dir = temp_branch_dir; + assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num; + assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset wire zero = 0; @@ -142,8 +142,8 @@ module VX_execute_unit ( // .reset(reset), // .stall(zero), // .flush(zero), - // .in ({vx_inst_exec_wb_temp.rd, vx_inst_exec_wb_temp.wb, vx_inst_exec_wb_temp.wb_valid, vx_inst_exec_wb_temp.wb_warp_num, vx_inst_exec_wb_temp.alu_result, vx_inst_exec_wb_temp.exec_wb_pc}), - // .out ({vx_inst_exec_wb.rd , vx_inst_exec_wb.wb , vx_inst_exec_wb.wb_valid , vx_inst_exec_wb.wb_warp_num , vx_inst_exec_wb.alu_result , vx_inst_exec_wb.exec_wb_pc }) + // .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}), + // .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc }) // ); VX_generic_register #( @@ -153,8 +153,8 @@ module VX_execute_unit ( .reset(reset), .stall(zero), .flush(zero), - .in ({vx_jal_rsp_temp.jal, vx_jal_rsp_temp.jal_dest, vx_jal_rsp_temp.jal_warp_num}), - .out ({vx_jal_rsp.jal , vx_jal_rsp.jal_dest , vx_jal_rsp.jal_warp_num}) + .in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}), + .out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num}) ); VX_generic_register #( @@ -164,8 +164,8 @@ module VX_execute_unit ( .reset(reset), .stall(zero), .flush(zero), - .in ({vx_branch_rsp_temp.valid_branch, vx_branch_rsp_temp.branch_dir, vx_branch_rsp_temp.branch_warp_num, vx_branch_rsp_temp.branch_dest}), - .out ({vx_branch_rsp.valid_branch , vx_branch_rsp.branch_dir , vx_branch_rsp.branch_warp_num , vx_branch_rsp.branch_dest }) + .in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}), + .out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest }) ); // always @(*) begin @@ -178,7 +178,7 @@ module VX_execute_unit ( // end - // assign out_is_csr = vx_exec_unit_req.is_csr; - // assign out_csr_address = vx_exec_unit_req.csr_address; + // assign out_is_csr = exec_unit_req_if.is_csr; + // assign out_csr_address = exec_unit_req_if.csr_address; endmodule : VX_execute_unit \ No newline at end of file diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index 7d4dca6f..e7027844 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -1,20 +1,20 @@ `include "VX_define.vh" module VX_fetch ( - input wire clk, - input wire reset, - VX_wstall_if vx_wstall, - VX_join_if vx_join, - input wire schedule_delay, - input wire icache_stage_delay, - input wire[`NW_BITS-1:0] icache_stage_wid, - input wire[`NUM_THREADS-1:0] icache_stage_valids, + input wire clk, + input wire reset, + VX_wstall_if wstall_if, + VX_join_if join_if, + input wire schedule_delay, + input wire icache_stage_delay, + input wire[`NW_BITS-1:0] icache_stage_wid, + input wire[`NUM_THREADS-1:0] icache_stage_valids, - output wire out_ebreak, - VX_jal_response_if vx_jal_rsp, - VX_branch_response_if vx_branch_rsp, + output wire out_ebreak, + VX_jal_response_if jal_rsp_if, + VX_branch_response_if branch_rsp_if, VX_inst_meta_if fe_inst_meta_fi, - VX_warp_ctl_if vx_warp_ctl + VX_warp_ctl_if warp_ctl_if ); wire[`NUM_THREADS-1:0] thread_mask; @@ -22,15 +22,12 @@ module VX_fetch ( wire[31:0] warp_pc; wire scheduled_warp; - wire pipe_stall; - // Only reason this is there is because there is a hidden assumption that decode is exactly after fetch // Locals - assign pipe_stall = schedule_delay || icache_stage_delay; VX_warp_scheduler warp_scheduler( @@ -38,52 +35,52 @@ module VX_fetch ( .reset (reset), .stall (pipe_stall), - .is_barrier (vx_warp_ctl.is_barrier), - .barrier_id (vx_warp_ctl.barrier_id), - .num_warps (vx_warp_ctl.num_warps), - .barrier_warp_num (vx_warp_ctl.warp_num), + .is_barrier (warp_ctl_if.is_barrier), + .barrier_id (warp_ctl_if.barrier_id), + .num_warps (warp_ctl_if.num_warps), + .barrier_warp_num (warp_ctl_if.warp_num), // Wspawn - .wspawn (vx_warp_ctl.wspawn), - .wsapwn_pc (vx_warp_ctl.wspawn_pc), - .wspawn_new_active(vx_warp_ctl.wspawn_new_active), + .wspawn (warp_ctl_if.wspawn), + .wsapwn_pc (warp_ctl_if.wspawn_pc), + .wspawn_new_active(warp_ctl_if.wspawn_new_active), // CTM - .ctm (vx_warp_ctl.change_mask), - .ctm_mask (vx_warp_ctl.thread_mask), - .ctm_warp_num (vx_warp_ctl.warp_num), + .ctm (warp_ctl_if.change_mask), + .ctm_mask (warp_ctl_if.thread_mask), + .ctm_warp_num (warp_ctl_if.warp_num), // WHALT - .whalt (vx_warp_ctl.ebreak), - .whalt_warp_num (vx_warp_ctl.warp_num), + .whalt (warp_ctl_if.ebreak), + .whalt_warp_num (warp_ctl_if.warp_num), // Wstall - .wstall (vx_wstall.wstall), - .wstall_warp_num (vx_wstall.warp_num), + .wstall (wstall_if.wstall), + .wstall_warp_num (wstall_if.warp_num), // Lock/release Stuff .icache_stage_valids(icache_stage_valids), .icache_stage_wid (icache_stage_wid), // Join - .is_join (vx_join.is_join), - .join_warp_num (vx_join.join_warp_num), + .is_join (join_if.is_join), + .join_warp_num (join_if.join_warp_num), // Split - .is_split (vx_warp_ctl.is_split), - .dont_split (vx_warp_ctl.dont_split), - .split_new_mask (vx_warp_ctl.split_new_mask), - .split_later_mask (vx_warp_ctl.split_later_mask), - .split_save_pc (vx_warp_ctl.split_save_pc), - .split_warp_num (vx_warp_ctl.warp_num), + .is_split (warp_ctl_if.is_split), + .dont_split (warp_ctl_if.dont_split), + .split_new_mask (warp_ctl_if.split_new_mask), + .split_later_mask (warp_ctl_if.split_later_mask), + .split_save_pc (warp_ctl_if.split_save_pc), + .split_warp_num (warp_ctl_if.warp_num), // JAL - .jal (vx_jal_rsp.jal), - .jal_dest (vx_jal_rsp.jal_dest), - .jal_warp_num (vx_jal_rsp.jal_warp_num), + .jal (jal_rsp_if.jal), + .jal_dest (jal_rsp_if.jal_dest), + .jal_warp_num (jal_rsp_if.jal_warp_num), // Branch - .branch_valid (vx_branch_rsp.valid_branch), - .branch_dir (vx_branch_rsp.branch_dir), - .branch_dest (vx_branch_rsp.branch_dest), - .branch_warp_num (vx_branch_rsp.branch_warp_num), + .branch_valid (branch_rsp_if.valid_branch), + .branch_dir (branch_rsp_if.branch_dir), + .branch_dest (branch_rsp_if.branch_dest), + .branch_warp_num (branch_rsp_if.branch_warp_num), // Outputs .thread_mask (thread_mask), diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index 3cb20691..06b82e70 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -6,73 +6,68 @@ module VX_front_end ( input wire schedule_delay, - VX_warp_ctl_if vx_warp_ctl, + VX_warp_ctl_if warp_ctl_if, - VX_gpu_dcache_rsp_if vx_icache_rsp, - VX_gpu_dcache_req_if vx_icache_req, + VX_gpu_dcache_rsp_if icache_rsp_if, + VX_gpu_dcache_req_if icache_req_if, - VX_jal_response_if vx_jal_rsp, - VX_branch_response_if vx_branch_rsp, + VX_jal_response_if jal_rsp_if, + VX_branch_response_if branch_rsp_if, - VX_frE_to_bckE_req_if vx_bckE_req, + VX_frE_to_bckE_req_if bckE_req_if, output wire fetch_ebreak ); + VX_inst_meta_if fe_inst_meta_fi(); + VX_inst_meta_if fe_inst_meta_fi2(); + VX_inst_meta_if fe_inst_meta_id(); -VX_inst_meta_if fe_inst_meta_fi(); -VX_inst_meta_if fe_inst_meta_fi2(); -VX_inst_meta_if fe_inst_meta_id(); + VX_frE_to_bckE_req_if frE_to_bckE_req_if(); + VX_inst_meta_if fd_inst_meta_de(); -VX_frE_to_bckE_req_if vx_frE_to_bckE_req(); -VX_inst_meta_if fd_inst_meta_de(); + wire total_freeze = schedule_delay; + wire icache_stage_delay; -wire total_freeze = schedule_delay; -wire icache_stage_delay; + wire vortex_ebreak; + wire terminate_sim; -wire vortex_ebreak; -wire terminate_sim; + wire[`NW_BITS-1:0] icache_stage_wid; + wire[`NUM_THREADS-1:0] icache_stage_valids; -wire[`NW_BITS-1:0] icache_stage_wid; -wire[`NUM_THREADS-1:0] icache_stage_valids; - -reg old_ebreak; // This should be eventually removed -always @(posedge clk) begin - if (reset) begin - old_ebreak <= 0; - end else begin - old_ebreak <= old_ebreak || fetch_ebreak; + reg old_ebreak; // This should be eventually removed + always @(posedge clk) begin + if (reset) begin + old_ebreak <= 0; + end else begin + old_ebreak <= old_ebreak || fetch_ebreak; + end end -end -assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak; + assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak; + VX_wstall_if wstall_if(); + VX_join_if join_if(); -VX_wstall_if vx_wstall(); -VX_join_if vx_join(); - -VX_fetch vx_fetch( + VX_fetch fetch( .clk (clk), .reset (reset), .icache_stage_wid (icache_stage_wid), .icache_stage_valids(icache_stage_valids), - .vx_wstall (vx_wstall), - .vx_join (vx_join), + .wstall_if (wstall_if), + .join_if (join_if), .schedule_delay (schedule_delay), - .vx_jal_rsp (vx_jal_rsp), - .vx_warp_ctl (vx_warp_ctl), + .jal_rsp_if (jal_rsp_if), + .warp_ctl_if (warp_ctl_if), .icache_stage_delay (icache_stage_delay), - .vx_branch_rsp (vx_branch_rsp), + .branch_rsp_if (branch_rsp_if), .out_ebreak (vortex_ebreak), // fetch_ebreak .fe_inst_meta_fi (fe_inst_meta_fi) ); -wire freeze_fi_reg = total_freeze || icache_stage_delay; + wire freeze_fi_reg = total_freeze || icache_stage_delay; - - - -VX_f_d_reg vx_f_i_reg( + VX_f_d_reg f_i_reg( .clk (clk), .reset (reset), .in_freeze (freeze_fi_reg), @@ -80,46 +75,46 @@ VX_f_d_reg vx_f_i_reg( .fd_inst_meta_de(fe_inst_meta_fi2) ); -VX_icache_stage vx_icache_stage( - .clk (clk), - .reset (reset), - .total_freeze (total_freeze), - .icache_stage_delay (icache_stage_delay), - .icache_stage_valids(icache_stage_valids), - .icache_stage_wid (icache_stage_wid), - .fe_inst_meta_fi (fe_inst_meta_fi2), - .fe_inst_meta_id (fe_inst_meta_id), - .vx_icache_rsp (vx_icache_rsp), - .vx_icache_req (vx_icache_req) + VX_icache_stage icache_stage( + .clk (clk), + .reset (reset), + .total_freeze (total_freeze), + .icache_stage_delay (icache_stage_delay), + .icache_stage_valids(icache_stage_valids), + .icache_stage_wid (icache_stage_wid), + .fe_inst_meta_fi (fe_inst_meta_fi2), + .fe_inst_meta_id (fe_inst_meta_id), + .icache_rsp_if (icache_rsp_if), + .icache_req_if (icache_req_if) ); -VX_i_d_reg vx_i_d_reg( - .clk (clk), - .reset (reset), - .in_freeze (total_freeze), - .fe_inst_meta_fd(fe_inst_meta_id), - .fd_inst_meta_de(fd_inst_meta_de) + VX_i_d_reg i_d_reg( + .clk (clk), + .reset (reset), + .in_freeze (total_freeze), + .fe_inst_meta_fd (fe_inst_meta_id), + .fd_inst_meta_de (fd_inst_meta_de) ); -VX_decode vx_decode( - .fd_inst_meta_de (fd_inst_meta_de), - .vx_frE_to_bckE_req(vx_frE_to_bckE_req), - .vx_wstall (vx_wstall), - .vx_join (vx_join), - .terminate_sim (terminate_sim) + VX_decode decode( + .fd_inst_meta_de (fd_inst_meta_de), + .frE_to_bckE_req_if (frE_to_bckE_req_if), + .wstall_if (wstall_if), + .join_if (join_if), + .terminate_sim (terminate_sim) ); -wire no_br_stall = 0; + wire no_br_stall = 0; -VX_d_e_reg vx_d_e_reg( - .clk (clk), - .reset (reset), - .in_branch_stall(no_br_stall), - .in_freeze (total_freeze), - .vx_frE_to_bckE_req(vx_frE_to_bckE_req), - .vx_bckE_req (vx_bckE_req) + VX_d_e_reg d_e_reg( + .clk (clk), + .reset (reset), + .in_branch_stall (no_br_stall), + .in_freeze (total_freeze), + .frE_to_bckE_req_if (frE_to_bckE_req_if), + .bckE_req_if (bckE_req_if) ); endmodule diff --git a/hw/rtl/VX_gpgpu_inst.v b/hw/rtl/VX_gpgpu_inst.v index 74ffb7cc..045464d7 100644 --- a/hw/rtl/VX_gpgpu_inst.v +++ b/hw/rtl/VX_gpgpu_inst.v @@ -2,57 +2,57 @@ module VX_gpgpu_inst ( // Input - VX_gpu_inst_req_if vx_gpu_inst_req, + VX_gpu_inst_req_if gpu_inst_req_if, // Output - VX_warp_ctl_if vx_warp_ctl + VX_warp_ctl_if warp_ctl_if ); - wire[`NUM_THREADS-1:0] curr_valids = vx_gpu_inst_req.valid; - wire is_split = (vx_gpu_inst_req.is_split); + wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid; + wire is_split = (gpu_inst_req_if.is_split); wire[`NUM_THREADS-1:0] tmc_new_mask; - wire all_threads = `NUM_THREADS < vx_gpu_inst_req.a_reg_data[0]; + wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0]; genvar curr_t; generate for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init - assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < vx_gpu_inst_req.a_reg_data[0]; + assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0]; end endgenerate wire valid_inst = (|curr_valids); - assign vx_warp_ctl.warp_num = vx_gpu_inst_req.warp_num; - assign vx_warp_ctl.change_mask = (vx_gpu_inst_req.is_tmc) && valid_inst; - assign vx_warp_ctl.thread_mask = vx_gpu_inst_req.is_tmc ? tmc_new_mask : 0; + assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num; + assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst; + assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0; - // assign vx_warp_ctl.ebreak = (vx_gpu_inst_req.a_reg_data[0] == 0) && valid_inst; - assign vx_warp_ctl.ebreak = vx_warp_ctl.change_mask && (vx_warp_ctl.thread_mask == 0); + // assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst; + assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0); - wire wspawn = vx_gpu_inst_req.is_wspawn; - wire[31:0] wspawn_pc = vx_gpu_inst_req.rd2; - wire all_active = `NUM_WARPS < vx_gpu_inst_req.a_reg_data[0]; + wire wspawn = gpu_inst_req_if.is_wspawn; + wire[31:0] wspawn_pc = gpu_inst_req_if.rd2; + wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0]; wire[`NUM_WARPS-1:0] wspawn_new_active; genvar curr_w; generate for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init - assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < vx_gpu_inst_req.a_reg_data[0]; + assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0]; end endgenerate - assign vx_warp_ctl.is_barrier = vx_gpu_inst_req.is_barrier && valid_inst; - assign vx_warp_ctl.barrier_id = vx_gpu_inst_req.a_reg_data[0]; + assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst; + assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0]; `DEBUG_BEGIN - wire[31:0] num_warps_m1 = vx_gpu_inst_req.rd2 - 1; + wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1; `DEBUG_END - assign vx_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0]; + assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0]; - assign vx_warp_ctl.wspawn = wspawn; - assign vx_warp_ctl.wspawn_pc = wspawn_pc; - assign vx_warp_ctl.wspawn_new_active = wspawn_new_active; + assign warp_ctl_if.wspawn = wspawn; + assign warp_ctl_if.wspawn_pc = wspawn_pc; + assign warp_ctl_if.wspawn_new_active = wspawn_new_active; wire[`NUM_THREADS-1:0] split_new_use_mask; wire[`NUM_THREADS-1:0] split_new_later_mask; @@ -61,7 +61,7 @@ module VX_gpgpu_inst ( genvar curr_s_t; generate for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init - wire curr_bool = (vx_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1); + wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1); assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool); @@ -79,15 +79,15 @@ module VX_gpgpu_inst ( // wire[`NW_BITS-1:0] num_valids = $countones(curr_valids); - assign vx_warp_ctl.is_split = is_split && (num_valids > 1); - assign vx_warp_ctl.dont_split = vx_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}})); - assign vx_warp_ctl.split_new_mask = split_new_use_mask; - assign vx_warp_ctl.split_later_mask = split_new_later_mask; - assign vx_warp_ctl.split_save_pc = vx_gpu_inst_req.pc_next; - assign vx_warp_ctl.split_warp_num = vx_gpu_inst_req.warp_num; + assign warp_ctl_if.is_split = is_split && (num_valids > 1); + assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}})); + assign warp_ctl_if.split_new_mask = split_new_use_mask; + assign warp_ctl_if.split_later_mask = split_new_later_mask; + assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next; + assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num; - // vx_gpu_inst_req.is_wspawn - // vx_gpu_inst_req.is_split - // vx_gpu_inst_req.is_barrier + // gpu_inst_req_if.is_wspawn + // gpu_inst_req_if.is_split + // gpu_inst_req_if.is_barrier endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index e338a7a7..523f1dda 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -4,8 +4,8 @@ module VX_gpr ( input wire clk, input wire reset, input wire valid_write_request, - VX_gpr_read_if vx_gpr_read, - VX_wb_if vx_writeback_if, + VX_gpr_read_if gpr_read_if, + VX_wb_if writeback_if, output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_a_reg_data, output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] out_b_reg_data @@ -13,36 +13,36 @@ module VX_gpr ( wire write_enable; `ifndef ASIC - assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0)) && (vx_writeback_if.rd != 0); + assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0); byte_enabled_simple_dual_port_ram first_ram( .we (write_enable), .clk (clk), .reset (reset), - .waddr (vx_writeback_if.rd), - .raddr1(vx_gpr_read.rs1), - .raddr2(vx_gpr_read.rs2), - .be (vx_writeback_if.wb_valid), - .wdata (vx_writeback_if.write_data), + .waddr (writeback_if.rd), + .raddr1(gpr_read_if.rs1), + .raddr2(gpr_read_if.rs2), + .be (writeback_if.wb_valid), + .wdata (writeback_if.write_data), .q1 (out_a_reg_data), .q2 (out_b_reg_data) ); `else - assign write_enable = valid_write_request && ((vx_writeback_if.wb != 0)); - wire going_to_write = write_enable & (|vx_writeback_if.wb_valid); + assign write_enable = valid_write_request && ((writeback_if.wb != 0)); + wire going_to_write = write_enable & (|writeback_if.wb_valid); wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; genvar curr_t; for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin - wire local_write = write_enable & vx_writeback_if.wb_valid[curr_t]; + wire local_write = write_enable & writeback_if.wb_valid[curr_t]; assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}}; end // wire cenb = !going_to_write; wire cenb = 0; - // wire cena_1 = (vx_gpr_read.rs1 == 0); - // wire cena_2 = (vx_gpr_read.rs2 == 0); + // wire cena_1 = (gpr_read_if.rs1 == 0); + // wire cena_2 = (gpr_read_if.rs2 == 0); wire cena_1 = 0; wire cena_2 = 0; @@ -65,7 +65,7 @@ module VX_gpr ( assign out_b_reg_data = temp_b; `endif - wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (vx_writeback_if.rd != 0) ? vx_writeback_if.write_data : 0; + wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0; genvar curr_base_thread; for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) @@ -82,11 +82,11 @@ module VX_gpr ( .SOB(), .CLKA(clk), .CENA(cena_1), - .AA(vx_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]), + .AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]), .CLKB(clk), .CENB(cenb), .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), + .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), .EMAA(3'b011), .EMASA(1'b0), @@ -121,11 +121,11 @@ module VX_gpr ( .SOB(), .CLKA(clk), .CENA(cena_2), - .AA(vx_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]), + .AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]), .CLKB(clk), .CENB(cenb), .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(vx_writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), + .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), .EMAA(3'b011), .EMASA(1'b0), diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 16a8884c..8eda34d9 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -12,72 +12,72 @@ module VX_gpr_stage ( // inputs // Instruction Information - VX_frE_to_bckE_req_if vx_bckE_req, + VX_frE_to_bckE_req_if bckE_req_if, // WriteBack inputs - VX_wb_if vx_writeback_if, + VX_wb_if writeback_if, // Outputs - VX_exec_unit_req_if vx_exec_unit_req, - VX_lsu_req_if vx_lsu_req, - VX_gpu_inst_req_if vx_gpu_inst_req, - VX_csr_req_if vx_csr_req + VX_exec_unit_req_if exec_unit_req_if, + VX_lsu_req_if lsu_req_if, + VX_gpu_inst_req_if gpu_inst_req_if, + VX_csr_req_if csr_req_if ); `DEBUG_BEGIN - wire[31:0] curr_PC = vx_bckE_req.curr_PC; - wire[2:0] branchType = vx_bckE_req.branch_type; - wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE); - wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ); - wire jalQual = vx_bckE_req.jalQual; + wire[31:0] curr_PC = bckE_req_if.curr_PC; + wire[2:0] branchType = bckE_req_if.branch_type; + wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE); + wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ); + wire jalQual = bckE_req_if.jalQual; `DEBUG_END - VX_gpr_read_if vx_gpr_read(); - assign vx_gpr_read.rs1 = vx_bckE_req.rs1; - assign vx_gpr_read.rs2 = vx_bckE_req.rs2; - assign vx_gpr_read.warp_num = vx_bckE_req.warp_num; + VX_gpr_read_if gpr_read_if(); + assign gpr_read_if.rs1 = bckE_req_if.rs1; + assign gpr_read_if.rs2 = bckE_req_if.rs2; + assign gpr_read_if.warp_num = bckE_req_if.warp_num; `ifndef ASIC - VX_gpr_jal_if vx_gpr_jal(); - assign vx_gpr_jal.is_jal = vx_bckE_req.jalQual; - assign vx_gpr_jal.curr_PC = vx_bckE_req.curr_PC; + VX_gpr_jal_if gpr_jal_if(); + assign gpr_jal_if.is_jal = bckE_req_if.jalQual; + assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC; `else - VX_gpr_jal_if vx_gpr_jal(); - assign vx_gpr_jal.is_jal = vx_exec_unit_req.jalQual; - assign vx_gpr_jal.curr_PC = vx_exec_unit_req.curr_PC; + VX_gpr_jal_if gpr_jal_if(); + assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual; + assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC; `endif - VX_gpr_data_if vx_gpr_datf(); + VX_gpr_data_if gpr_datf_if(); - VX_gpr_wrapper vx_grp_wrapper ( + VX_gpr_wrapper grp_wrapper ( .clk (clk), .reset (reset), - .vx_writeback_if(vx_writeback_if), - .vx_gpr_read (vx_gpr_read), - .vx_gpr_jal (vx_gpr_jal), + .writeback_if(writeback_if), + .gpr_read_if (gpr_read_if), + .gpr_jal_if (gpr_jal_if), - .out_a_reg_data (vx_gpr_datf.a_reg_data), - .out_b_reg_data (vx_gpr_datf.b_reg_data) + .out_a_reg_data (gpr_datf_if.a_reg_data), + .out_b_reg_data (gpr_datf_if.b_reg_data) ); - // assign vx_bckE_req.is_csr = is_csr; - // assign vx_bckE_req_out.csr_mask = (vx_bckE_req.sr_immed == 1'b1) ? {27'h0, vx_bckE_req.rs1} : vx_gpr_data.a_reg_data[0]; + // assign bckE_req_if.is_csr = is_csr; + // assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0]; // Outputs - VX_exec_unit_req_if vx_exec_unit_req_temp(); - VX_lsu_req_if vx_lsu_req_temp(); - VX_gpu_inst_req_if vx_gpu_inst_req_temp(); - VX_csr_req_if vx_csr_req_temp(); + VX_exec_unit_req_if exec_unit_req_temp_if(); + VX_lsu_req_if lsu_req_temp_if(); + VX_gpu_inst_req_if gpu_inst_req_temp_if(); + VX_csr_req_if csr_req_temp_if(); - VX_inst_multiplex vx_inst_mult( - .vx_bckE_req (vx_bckE_req), - .vx_gpr_data (vx_gpr_datf), - .vx_exec_unit_req(vx_exec_unit_req_temp), - .vx_lsu_req (vx_lsu_req_temp), - .vx_gpu_inst_req (vx_gpu_inst_req_temp), - .vx_csr_req (vx_csr_req_temp) + VX_inst_multiplex inst_mult( + .bckE_req_if (bckE_req_if), + .gpr_data_if (gpr_datf_if), + .exec_unit_req_if(exec_unit_req_temp_if), + .lsu_req_if (lsu_req_temp_if), + .gpu_inst_req_if (gpu_inst_req_temp_if), + .csr_req_if (csr_req_temp_if) ); `DEBUG_BEGIN - wire is_lsu = (|vx_lsu_req_temp.valid); + wire is_lsu = (|lsu_req_temp_if.valid); `DEBUG_END wire stall_rest = 0; wire flush_rest = schedule_delay; @@ -88,7 +88,7 @@ module VX_gpr_stage ( wire stall_exec = exec_delay; wire flush_exec = schedule_delay && !stall_exec; - wire stall_csr = stall_gpr_csr && vx_bckE_req.is_csr && (|vx_bckE_req.valid); + wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid); assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr; @@ -125,11 +125,11 @@ module VX_gpr_stage ( .out ({temp_store_data, temp_base_address}) ); - assign real_store_data = vx_lsu_req_temp.store_data; - assign real_base_address = vx_lsu_req_temp.base_address; + assign real_store_data = lsu_req_temp_if.store_data; + assign real_base_address = lsu_req_temp_if.base_address; - assign vx_lsu_req.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data; - assign vx_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; + assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data; + assign lsu_req_if.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; VX_generic_register #( .N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS)) @@ -138,8 +138,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_lsu), .flush(flush_lsu), - .in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}), - .out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc ,vx_lsu_req.warp_num , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb }) + .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), + .out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) ); VX_generic_register #( @@ -149,12 +149,12 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_exec), .flush(flush_exec), - .in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}), - .out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); - assign vx_exec_unit_req.a_reg_data = real_base_address; - assign vx_exec_unit_req.b_reg_data = real_store_data; + assign exec_unit_req_if.a_reg_data = real_base_address; + assign exec_unit_req_if.b_reg_data = real_store_data; VX_generic_register #( .N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS)) @@ -163,12 +163,12 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_rest), .flush(flush_rest), - .in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next}), - .out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next }) + .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}), + .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next }) ); - assign vx_gpu_inst_req.a_reg_data = real_base_address; - assign vx_gpu_inst_req.rd2 = real_store_data; + assign gpu_inst_req_if.a_reg_data = real_base_address; + assign gpu_inst_req_if.rd2 = real_store_data; VX_generic_register #( .N(`NW_BITS-1 + 1 + `NUM_THREADS + 58) @@ -177,8 +177,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_gpr_csr), .flush(flush_rest), - .in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}), - .out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask }) + .in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}), + .out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask }) ); `else @@ -191,8 +191,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_lsu), .flush(flush_lsu), - .in ({vx_lsu_req_temp.valid, vx_lsu_req_temp.lsu_pc, vx_lsu_req_temp.warp_num, vx_lsu_req_temp.store_data, vx_lsu_req_temp.base_address, vx_lsu_req_temp.offset, vx_lsu_req_temp.mem_read, vx_lsu_req_temp.mem_write, vx_lsu_req_temp.rd, vx_lsu_req_temp.wb}), - .out ({vx_lsu_req.valid , vx_lsu_req.lsu_pc , vx_lsu_req.warp_num , vx_lsu_req.store_data , vx_lsu_req.base_address , vx_lsu_req.offset , vx_lsu_req.mem_read , vx_lsu_req.mem_write , vx_lsu_req.rd , vx_lsu_req.wb }) + .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), + .out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) ); VX_generic_register #( @@ -202,8 +202,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_exec), .flush(flush_exec), - .in ({vx_exec_unit_req_temp.valid, vx_exec_unit_req_temp.warp_num, vx_exec_unit_req_temp.curr_PC, vx_exec_unit_req_temp.PC_next, vx_exec_unit_req_temp.rd, vx_exec_unit_req_temp.wb, vx_exec_unit_req_temp.a_reg_data, vx_exec_unit_req_temp.b_reg_data, vx_exec_unit_req_temp.alu_op, vx_exec_unit_req_temp.rs1, vx_exec_unit_req_temp.rs2, vx_exec_unit_req_temp.rs2_src, vx_exec_unit_req_temp.itype_immed, vx_exec_unit_req_temp.upper_immed, vx_exec_unit_req_temp.branch_type, vx_exec_unit_req_temp.jalQual, vx_exec_unit_req_temp.jal, vx_exec_unit_req_temp.jal_offset, vx_exec_unit_req_temp.ebreak, vx_exec_unit_req_temp.wspawn, vx_exec_unit_req_temp.is_csr, vx_exec_unit_req_temp.csr_address, vx_exec_unit_req_temp.csr_immed, vx_exec_unit_req_temp.csr_mask}), - .out ({vx_exec_unit_req.valid , vx_exec_unit_req.warp_num , vx_exec_unit_req.curr_PC , vx_exec_unit_req.PC_next , vx_exec_unit_req.rd , vx_exec_unit_req.wb , vx_exec_unit_req.a_reg_data , vx_exec_unit_req.b_reg_data , vx_exec_unit_req.alu_op , vx_exec_unit_req.rs1 , vx_exec_unit_req.rs2 , vx_exec_unit_req.rs2_src , vx_exec_unit_req.itype_immed , vx_exec_unit_req.upper_immed , vx_exec_unit_req.branch_type , vx_exec_unit_req.jalQual , vx_exec_unit_req.jal , vx_exec_unit_req.jal_offset , vx_exec_unit_req.ebreak , vx_exec_unit_req.wspawn , vx_exec_unit_req.is_csr , vx_exec_unit_req.csr_address , vx_exec_unit_req.csr_immed , vx_exec_unit_req.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); VX_generic_register #( @@ -213,8 +213,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_rest), .flush(flush_rest), - .in ({vx_gpu_inst_req_temp.valid, vx_gpu_inst_req_temp.warp_num, vx_gpu_inst_req_temp.is_wspawn, vx_gpu_inst_req_temp.is_tmc, vx_gpu_inst_req_temp.is_split, vx_gpu_inst_req_temp.is_barrier, vx_gpu_inst_req_temp.pc_next, vx_gpu_inst_req_temp.a_reg_data, vx_gpu_inst_req_temp.rd2}), - .out ({vx_gpu_inst_req.valid , vx_gpu_inst_req.warp_num , vx_gpu_inst_req.is_wspawn , vx_gpu_inst_req.is_tmc , vx_gpu_inst_req.is_split , vx_gpu_inst_req.is_barrier , vx_gpu_inst_req.pc_next , vx_gpu_inst_req.a_reg_data , vx_gpu_inst_req.rd2 }) + .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}), + .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 }) ); VX_generic_register #( @@ -224,8 +224,8 @@ module VX_gpr_stage ( .reset(reset), .stall(stall_gpr_csr), .flush(flush_rest), - .in ({vx_csr_req_temp.valid, vx_csr_req_temp.warp_num, vx_csr_req_temp.rd, vx_csr_req_temp.wb, vx_csr_req_temp.alu_op, vx_csr_req_temp.is_csr, vx_csr_req_temp.csr_address, vx_csr_req_temp.csr_immed, vx_csr_req_temp.csr_mask}), - .out ({vx_csr_req.valid , vx_csr_req.warp_num , vx_csr_req.rd , vx_csr_req.wb , vx_csr_req.alu_op , vx_csr_req.is_csr , vx_csr_req.csr_address , vx_csr_req.csr_immed , vx_csr_req.csr_mask }) + .in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}), + .out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask }) ); `endif diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index b4d1f3bd..f728bf3f 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -3,9 +3,9 @@ module VX_gpr_wrapper ( input wire clk, input wire reset, - VX_gpr_read_if vx_gpr_read, - VX_wb_if vx_writeback_if, - VX_gpr_jal_if vx_gpr_jal, + VX_gpr_read_if gpr_read_if, + VX_wb_if writeback_if, + VX_gpr_jal_if gpr_jal_if, output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data, output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data @@ -19,13 +19,13 @@ module VX_gpr_wrapper ( genvar index; generate for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign - assign jal_data[index] = vx_gpr_jal.curr_PC; + assign jal_data[index] = gpr_jal_if.curr_PC; end endgenerate `ifndef ASIC - assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[vx_gpr_read.warp_num])); - assign out_b_reg_data = (temp_b_reg_data[vx_gpr_read.warp_num]); + assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num])); + assign out_b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]); `else wire zer = 0; @@ -38,31 +38,29 @@ module VX_gpr_wrapper ( .reset(reset), .stall(zer), .flush(zer), - .in (vx_gpr_read.warp_num), + .in (gpr_read_if.warp_num), .out (old_warp_num) ); - assign out_a_reg_data = (vx_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); + assign out_a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); assign out_b_reg_data = (temp_b_reg_data[old_warp_num]); - + `endif genvar warp_index; generate for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs - - wire valid_write_request = warp_index == vx_writeback_if.wb_warp_num; - VX_gpr vx_gpr( + wire valid_write_request = warp_index == writeback_if.wb_warp_num; + VX_gpr gpr( .clk (clk), .reset (reset), .valid_write_request(valid_write_request), - .vx_gpr_read (vx_gpr_read), - .vx_writeback_if (vx_writeback_if), + .gpr_read_if (gpr_read_if), + .writeback_if (writeback_if), .out_a_reg_data (temp_a_reg_data[warp_index]), .out_b_reg_data (temp_b_reg_data[warp_index]) - ); - + ); end endgenerate diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index d07736bd..ef40a490 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -1,17 +1,17 @@ `include "VX_define.vh" module VX_icache_stage ( - input wire clk, - input wire reset, - input wire total_freeze, - output wire icache_stage_delay, + input wire clk, + input wire reset, + input wire total_freeze, + output wire icache_stage_delay, output wire[`NW_BITS-1:0] icache_stage_wid, output wire[`NUM_THREADS-1:0] icache_stage_valids, - VX_inst_meta_if fe_inst_meta_fi, - VX_inst_meta_if fe_inst_meta_id, + VX_inst_meta_if fe_inst_meta_fi, + VX_inst_meta_if fe_inst_meta_id, - VX_gpu_dcache_rsp_if vx_icache_rsp, - VX_gpu_dcache_req_if vx_icache_req + VX_gpu_dcache_rsp_if icache_rsp_if, + VX_gpu_dcache_req_if icache_req_if ); reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0]; @@ -19,30 +19,30 @@ module VX_icache_stage ( wire valid_inst = (|fe_inst_meta_fi.valid); // Icache Request - assign vx_icache_req.core_req_valid = valid_inst && !total_freeze; - assign vx_icache_req.core_req_addr = fe_inst_meta_fi.inst_pc; - assign vx_icache_req.core_req_writedata = 32'b0; - assign vx_icache_req.core_req_mem_read = `LW_MEM_READ; - assign vx_icache_req.core_req_mem_write = `NO_MEM_WRITE; - assign vx_icache_req.core_req_rd = 5'b0; - assign vx_icache_req.core_req_wb = {1{2'b1}}; - assign vx_icache_req.core_req_warp_num = fe_inst_meta_fi.warp_num; - assign vx_icache_req.core_req_pc = fe_inst_meta_fi.inst_pc; + assign icache_req_if.core_req_valid = valid_inst && !total_freeze; + assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc; + assign icache_req_if.core_req_writedata = 32'b0; + assign icache_req_if.core_req_mem_read = `LW_MEM_READ; + assign icache_req_if.core_req_mem_write = `NO_MEM_WRITE; + assign icache_req_if.core_req_rd = 5'b0; + assign icache_req_if.core_req_wb = {1{2'b1}}; + assign icache_req_if.core_req_warp_num = fe_inst_meta_fi.warp_num; + assign icache_req_if.core_req_pc = fe_inst_meta_fi.inst_pc; - assign fe_inst_meta_id.instruction = vx_icache_rsp.core_wb_readdata[0][31:0]; - assign fe_inst_meta_id.inst_pc = vx_icache_rsp.core_wb_pc[0]; - assign fe_inst_meta_id.warp_num = vx_icache_rsp.core_wb_warp_num; + assign fe_inst_meta_id.instruction = icache_rsp_if.core_wb_readdata[0][31:0]; + assign fe_inst_meta_id.inst_pc = icache_rsp_if.core_wb_pc[0]; + assign fe_inst_meta_id.warp_num = icache_rsp_if.core_wb_warp_num; - assign fe_inst_meta_id.valid = vx_icache_rsp.core_wb_valid ? threads_active[vx_icache_rsp.core_wb_warp_num] : 0; + assign fe_inst_meta_id.valid = icache_rsp_if.core_wb_valid ? threads_active[icache_rsp_if.core_wb_warp_num] : 0; assign icache_stage_wid = fe_inst_meta_id.warp_num; assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}}; // Cache can't accept request - assign icache_stage_delay = vx_icache_rsp.delay_req; + assign icache_stage_delay = icache_rsp_if.delay_req; // Core can't accept response - assign vx_icache_req.core_no_wb_slot = total_freeze; + assign icache_req_if.core_no_wb_slot = total_freeze; integer curr_w; always @(posedge clk) begin diff --git a/hw/rtl/VX_inst_multiplex.v b/hw/rtl/VX_inst_multiplex.v index a789b8b0..19cf7a07 100644 --- a/hw/rtl/VX_inst_multiplex.v +++ b/hw/rtl/VX_inst_multiplex.v @@ -2,23 +2,23 @@ module VX_inst_multiplex ( // Inputs - VX_frE_to_bckE_req_if vx_bckE_req, - VX_gpr_data_if vx_gpr_data, + VX_frE_to_bckE_req_if bckE_req_if, + VX_gpr_data_if gpr_data_if, // Outputs - VX_exec_unit_req_if vx_exec_unit_req, - VX_lsu_req_if vx_lsu_req, - VX_gpu_inst_req_if vx_gpu_inst_req, - VX_csr_req_if vx_csr_req + VX_exec_unit_req_if exec_unit_req_if, + VX_lsu_req_if lsu_req_if, + VX_gpu_inst_req_if gpu_inst_req_if, + VX_csr_req_if csr_req_if ); wire[`NUM_THREADS-1:0] is_mem_mask; wire[`NUM_THREADS-1:0] is_gpu_mask; wire[`NUM_THREADS-1:0] is_csr_mask; - wire is_mem = (vx_bckE_req.mem_write != `NO_MEM_WRITE) || (vx_bckE_req.mem_read != `NO_MEM_READ); - wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split); - wire is_csr = vx_bckE_req.is_csr; + wire is_mem = (bckE_req_if.mem_write != `NO_MEM_WRITE) || (bckE_req_if.mem_read != `NO_MEM_READ); + wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); + wire is_csr = bckE_req_if.is_csr; // wire is_gpu = 0; genvar currT; @@ -31,64 +31,64 @@ module VX_inst_multiplex ( endgenerate // LSU Unit - assign vx_lsu_req.valid = vx_bckE_req.valid & is_mem_mask; - assign vx_lsu_req.warp_num = vx_bckE_req.warp_num; - assign vx_lsu_req.base_address = vx_gpr_data.a_reg_data; - assign vx_lsu_req.store_data = vx_gpr_data.b_reg_data; + assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask; + assign lsu_req_if.warp_num = bckE_req_if.warp_num; + assign lsu_req_if.base_address = gpr_data_if.a_reg_data; + assign lsu_req_if.store_data = gpr_data_if.b_reg_data; - assign vx_lsu_req.offset = vx_bckE_req.itype_immed; + assign lsu_req_if.offset = bckE_req_if.itype_immed; - assign vx_lsu_req.mem_read = vx_bckE_req.mem_read; - assign vx_lsu_req.mem_write = vx_bckE_req.mem_write; - assign vx_lsu_req.rd = vx_bckE_req.rd; - assign vx_lsu_req.wb = vx_bckE_req.wb; - assign vx_lsu_req.lsu_pc = vx_bckE_req.curr_PC; + assign lsu_req_if.mem_read = bckE_req_if.mem_read; + assign lsu_req_if.mem_write = bckE_req_if.mem_write; + assign lsu_req_if.rd = bckE_req_if.rd; + assign lsu_req_if.wb = bckE_req_if.wb; + assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC; // Execute Unit - assign vx_exec_unit_req.valid = vx_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask); - assign vx_exec_unit_req.warp_num = vx_bckE_req.warp_num; - assign vx_exec_unit_req.curr_PC = vx_bckE_req.curr_PC; - assign vx_exec_unit_req.PC_next = vx_bckE_req.PC_next; - assign vx_exec_unit_req.rd = vx_bckE_req.rd; - assign vx_exec_unit_req.wb = vx_bckE_req.wb; - assign vx_exec_unit_req.a_reg_data = vx_gpr_data.a_reg_data; - assign vx_exec_unit_req.b_reg_data = vx_gpr_data.b_reg_data; - assign vx_exec_unit_req.alu_op = vx_bckE_req.alu_op; - assign vx_exec_unit_req.rs1 = vx_bckE_req.rs1; - assign vx_exec_unit_req.rs2 = vx_bckE_req.rs2; - assign vx_exec_unit_req.rs2_src = vx_bckE_req.rs2_src; - assign vx_exec_unit_req.itype_immed = vx_bckE_req.itype_immed; - assign vx_exec_unit_req.upper_immed = vx_bckE_req.upper_immed; - assign vx_exec_unit_req.branch_type = vx_bckE_req.branch_type; - assign vx_exec_unit_req.jalQual = vx_bckE_req.jalQual; - assign vx_exec_unit_req.jal = vx_bckE_req.jal; - assign vx_exec_unit_req.jal_offset = vx_bckE_req.jal_offset; - assign vx_exec_unit_req.ebreak = vx_bckE_req.ebreak; + assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask); + assign exec_unit_req_if.warp_num = bckE_req_if.warp_num; + assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC; + assign exec_unit_req_if.PC_next = bckE_req_if.PC_next; + assign exec_unit_req_if.rd = bckE_req_if.rd; + assign exec_unit_req_if.wb = bckE_req_if.wb; + assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data; + assign exec_unit_req_if.b_reg_data = gpr_data_if.b_reg_data; + assign exec_unit_req_if.alu_op = bckE_req_if.alu_op; + assign exec_unit_req_if.rs1 = bckE_req_if.rs1; + assign exec_unit_req_if.rs2 = bckE_req_if.rs2; + assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src; + assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed; + assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed; + assign exec_unit_req_if.branch_type = bckE_req_if.branch_type; + assign exec_unit_req_if.jalQual = bckE_req_if.jalQual; + assign exec_unit_req_if.jal = bckE_req_if.jal; + assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset; + assign exec_unit_req_if.ebreak = bckE_req_if.ebreak; // GPR Req - assign vx_gpu_inst_req.valid = vx_bckE_req.valid & is_gpu_mask; - assign vx_gpu_inst_req.warp_num = vx_bckE_req.warp_num; - assign vx_gpu_inst_req.is_wspawn = vx_bckE_req.is_wspawn; - assign vx_gpu_inst_req.is_tmc = vx_bckE_req.is_tmc; - assign vx_gpu_inst_req.is_split = vx_bckE_req.is_split; - assign vx_gpu_inst_req.is_barrier = vx_bckE_req.is_barrier; - assign vx_gpu_inst_req.a_reg_data = vx_gpr_data.a_reg_data; - assign vx_gpu_inst_req.rd2 = vx_gpr_data.b_reg_data[0]; - assign vx_gpu_inst_req.pc_next = vx_bckE_req.PC_next; + assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask; + assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num; + assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn; + assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc; + assign gpu_inst_req_if.is_split = bckE_req_if.is_split; + assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier; + assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data; + assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0]; + assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next; // CSR Req - assign vx_csr_req.valid = vx_bckE_req.valid & is_csr_mask; - assign vx_csr_req.warp_num = vx_bckE_req.warp_num; - assign vx_csr_req.rd = vx_bckE_req.rd; - assign vx_csr_req.wb = vx_bckE_req.wb; - assign vx_csr_req.alu_op = vx_bckE_req.alu_op; - assign vx_csr_req.is_csr = vx_bckE_req.is_csr; - assign vx_csr_req.csr_address = vx_bckE_req.csr_address; - assign vx_csr_req.csr_immed = vx_bckE_req.csr_immed; - assign vx_csr_req.csr_mask = vx_bckE_req.csr_mask; + assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask; + assign csr_req_if.warp_num = bckE_req_if.warp_num; + assign csr_req_if.rd = bckE_req_if.rd; + assign csr_req_if.wb = bckE_req_if.wb; + assign csr_req_if.alu_op = bckE_req_if.alu_op; + assign csr_req_if.is_csr = bckE_req_if.is_csr; + assign csr_req_if.csr_address = bckE_req_if.csr_address; + assign csr_req_if.csr_immed = bckE_req_if.csr_immed; + assign csr_req_if.csr_mask = bckE_req_if.csr_mask; endmodule diff --git a/hw/rtl/VX_lsu.v b/hw/rtl/VX_lsu.v index cd36eaf8..51249e26 100644 --- a/hw/rtl/VX_lsu.v +++ b/hw/rtl/VX_lsu.v @@ -1,23 +1,23 @@ `include "VX_define.vh" module VX_lsu ( - input wire clk, - input wire reset, - input wire no_slot_mem, - VX_lsu_req_if vx_lsu_req, + input wire clk, + input wire reset, + input wire no_slot_mem, + VX_lsu_req_if lsu_req_if, // Write back to GPR - VX_inst_mem_wb_if vx_mem_wb, + VX_inst_mem_wb_if mem_wb_if, - VX_gpu_dcache_rsp_if vx_dcache_rsp, - VX_gpu_dcache_req_if vx_dcache_req, - output wire out_delay + VX_gpu_dcache_rsp_if dcache_rsp_if, + VX_gpu_dcache_req_if dcache_req_if, + output wire out_delay ); // Generate Addresses wire[`NUM_THREADS-1:0][31:0] address; VX_lsu_addr_gen VX_lsu_addr_gen ( - .base_address (vx_lsu_req.base_address), - .offset (vx_lsu_req.offset), + .base_address (lsu_req_if.base_address), + .offset (lsu_req_if.offset), .address (address) ); @@ -40,33 +40,33 @@ module VX_lsu ( .reset(reset), .stall(out_delay), .flush(zero), - .in ({address , vx_lsu_req.store_data, vx_lsu_req.valid, vx_lsu_req.mem_read, vx_lsu_req.mem_write, vx_lsu_req.rd, vx_lsu_req.warp_num, vx_lsu_req.wb, vx_lsu_req.lsu_pc}), + .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}), .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) ); // Core Request - assign vx_dcache_req.core_req_valid = use_valid; - assign vx_dcache_req.core_req_addr = use_address; - assign vx_dcache_req.core_req_writedata = use_store_data; - assign vx_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}}; - assign vx_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}}; - assign vx_dcache_req.core_req_rd = use_rd; - assign vx_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}}; - assign vx_dcache_req.core_req_warp_num = use_warp_num; - assign vx_dcache_req.core_req_pc = use_pc; + assign dcache_req_if.core_req_valid = use_valid; + assign dcache_req_if.core_req_addr = use_address; + assign dcache_req_if.core_req_writedata = use_store_data; + assign dcache_req_if.core_req_mem_read = {`NUM_THREADS{use_mem_read}}; + assign dcache_req_if.core_req_mem_write = {`NUM_THREADS{use_mem_write}}; + assign dcache_req_if.core_req_rd = use_rd; + assign dcache_req_if.core_req_wb = {`NUM_THREADS{use_wb}}; + assign dcache_req_if.core_req_warp_num = use_warp_num; + assign dcache_req_if.core_req_pc = use_pc; // Core can't accept response - assign vx_dcache_req.core_no_wb_slot = no_slot_mem; + assign dcache_req_if.core_no_wb_slot = no_slot_mem; // Cache can't accept request - assign out_delay = vx_dcache_rsp.delay_req; + assign out_delay = dcache_rsp_if.delay_req; // Core Response - assign vx_mem_wb.rd = vx_dcache_rsp.core_wb_req_rd; - assign vx_mem_wb.wb = vx_dcache_rsp.core_wb_req_wb; - assign vx_mem_wb.wb_valid = vx_dcache_rsp.core_wb_valid; - assign vx_mem_wb.wb_warp_num = vx_dcache_rsp.core_wb_warp_num; - assign vx_mem_wb.loaded_data = vx_dcache_rsp.core_wb_readdata; + assign mem_wb_if.rd = dcache_rsp_if.core_wb_req_rd; + assign mem_wb_if.wb = dcache_rsp_if.core_wb_req_wb; + assign mem_wb_if.wb_valid = dcache_rsp_if.core_wb_valid; + assign mem_wb_if.wb_warp_num = dcache_rsp_if.core_wb_warp_num; + assign mem_wb_if.loaded_data = dcache_rsp_if.core_wb_readdata; wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index; @@ -75,12 +75,12 @@ module VX_lsu ( `DEBUG_END VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc( - .valids(vx_dcache_rsp.core_wb_valid), + .valids(dcache_rsp_if.core_wb_valid), .index (use_pc_index), .found (found) ); - assign vx_mem_wb.mem_wb_pc = vx_dcache_rsp.core_wb_pc[use_pc_index]; + assign mem_wb_if.mem_wb_pc = dcache_rsp_if.core_wb_pc[use_pc_index]; endmodule // Memory diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 9bf17ea8..6e89838f 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -1,13 +1,13 @@ `include "VX_define.vh" module VX_scheduler ( - input wire clk, - input wire reset, - input wire memory_delay, - input wire exec_delay, - input wire gpr_stage_delay, - VX_frE_to_bckE_req_if vx_bckE_req, - VX_wb_if vx_writeback_if, + input wire clk, + input wire reset, + input wire memory_delay, + input wire exec_delay, + input wire gpr_stage_delay, + VX_frE_to_bckE_req_if bckE_req_if, + VX_wb_if writeback_if, output wire schedule_delay, output wire is_empty @@ -18,31 +18,31 @@ module VX_scheduler ( reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - wire valid_wb = (vx_writeback_if.wb != 0) && (|vx_writeback_if.wb_valid) && (vx_writeback_if.rd != 0); - wire wb_inc = (vx_bckE_req.wb != 0) && (vx_bckE_req.rd != 0); + wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.wb_valid) && (writeback_if.rd != 0); + wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); - wire rs1_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs1] != 0; - wire rs2_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rs2] != 0; - wire rd_rename = rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd ] != 0; + wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0; + wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0; + wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0; - wire is_store = (vx_bckE_req.mem_write != `NO_MEM_WRITE); - wire is_load = (vx_bckE_req.mem_read != `NO_MEM_READ); + wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE); + wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ); // classify our next instruction. wire is_mem = is_store || is_load; - wire is_gpu = (vx_bckE_req.is_wspawn || vx_bckE_req.is_tmc || vx_bckE_req.is_barrier || vx_bckE_req.is_split); - wire is_csr = vx_bckE_req.is_csr; + wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); + wire is_csr = bckE_req_if.is_csr; wire is_exec = !is_mem && !is_gpu && !is_csr; - wire using_rs2 = (vx_bckE_req.rs2_src == `RS2_REG) || is_store || vx_bckE_req.is_barrier || vx_bckE_req.is_wspawn; + wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn; - wire rs1_rename_qual = ((rs1_rename) && (vx_bckE_req.rs1 != 0)); - wire rs2_rename_qual = ((rs2_rename) && (vx_bckE_req.rs2 != 0 && using_rs2)); - wire rd_rename_qual = ((rd_rename ) && (vx_bckE_req.rd != 0)); + wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0)); + wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2)); + wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0)); wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; - assign schedule_delay = ((rename_valid) && (|vx_bckE_req.valid)) + assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid)) || (memory_delay && is_mem) || (gpr_stage_delay && (is_mem || is_exec)) || (exec_delay && is_exec); @@ -59,15 +59,15 @@ module VX_scheduler ( end end else begin if (valid_wb) begin - rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] <= rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & (~vx_writeback_if.wb_valid); + rename_table[writeback_if.wb_warp_num][writeback_if.rd] <= rename_table[writeback_if.wb_warp_num][writeback_if.rd] & (~writeback_if.wb_valid); end if (!schedule_delay && wb_inc) begin - rename_table[vx_bckE_req.warp_num][vx_bckE_req.rd] <= vx_bckE_req.valid; + rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; end if (valid_wb - && (0 == (rename_table[vx_writeback_if.wb_warp_num][vx_writeback_if.rd] & ~vx_writeback_if.wb_valid))) begin + && (0 == (rename_table[writeback_if.wb_warp_num][writeback_if.rd] & ~writeback_if.wb_valid))) begin count_valid <= count_valid - 1; end diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 6f5ed918..dc9971cc 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -1,67 +1,66 @@ `include "VX_define.vh" module VX_writeback ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // Mem WB info - VX_inst_mem_wb_if vx_mem_wb, + VX_inst_mem_wb_if mem_wb_if, // EXEC Unit WB info - VX_inst_exec_wb_if vx_inst_exec_wb, + VX_inst_exec_wb_if inst_exec_wb_if, // CSR Unit WB info - VX_csr_wb_if vx_csr_wb, + VX_csr_wb_if csr_wb_if, // Actual WB to GPR - VX_wb_if vx_writeback_if, - output wire no_slot_mem, - output wire no_slot_exec, - output wire no_slot_csr + VX_wb_if writeback_if, + output wire no_slot_mem, + output wire no_slot_exec, + output wire no_slot_csr ); - VX_wb_if vx_writeback_tempp(); + VX_wb_if writeback_tempp_if(); - wire exec_wb = (vx_inst_exec_wb.wb != 0) && (|vx_inst_exec_wb.wb_valid); - wire mem_wb = (vx_mem_wb.wb != 0) && (|vx_mem_wb.wb_valid); - wire csr_wb = (vx_csr_wb.wb != 0) && (|vx_csr_wb.valid); + wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.wb_valid); + wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid); + wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid); assign no_slot_mem = mem_wb && (exec_wb || csr_wb); assign no_slot_csr = csr_wb && (exec_wb); assign no_slot_exec = 0; - assign vx_writeback_tempp.write_data = exec_wb ? vx_inst_exec_wb.alu_result : - csr_wb ? vx_csr_wb.csr_result : - mem_wb ? vx_mem_wb.loaded_data : + assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result : + csr_wb ? csr_wb_if.csr_result : + mem_wb ? mem_wb_if.loaded_data : 0; - assign vx_writeback_tempp.wb_valid = exec_wb ? vx_inst_exec_wb.wb_valid : - csr_wb ? vx_csr_wb.valid : - mem_wb ? vx_mem_wb.wb_valid : + assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid : + csr_wb ? csr_wb_if.valid : + mem_wb ? mem_wb_if.wb_valid : 0; - assign vx_writeback_tempp.rd = exec_wb ? vx_inst_exec_wb.rd : - csr_wb ? vx_csr_wb.rd : - mem_wb ? vx_mem_wb.rd : + assign writeback_tempp_if.rd = exec_wb ? inst_exec_wb_if.rd : + csr_wb ? csr_wb_if.rd : + mem_wb ? mem_wb_if.rd : 0; - assign vx_writeback_tempp.wb = exec_wb ? vx_inst_exec_wb.wb : - csr_wb ? vx_csr_wb.wb : - mem_wb ? vx_mem_wb.wb : + assign writeback_tempp_if.wb = exec_wb ? inst_exec_wb_if.wb : + csr_wb ? csr_wb_if.wb : + mem_wb ? mem_wb_if.wb : 0; - assign vx_writeback_tempp.wb_warp_num = exec_wb ? vx_inst_exec_wb.wb_warp_num : - csr_wb ? vx_csr_wb.warp_num : - mem_wb ? vx_mem_wb.wb_warp_num : + assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num : + csr_wb ? csr_wb_if.warp_num : + mem_wb ? mem_wb_if.wb_warp_num : 0; - assign vx_writeback_tempp.wb_pc = exec_wb ? vx_inst_exec_wb.exec_wb_pc : + assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc : csr_wb ? 32'hdeadbeef : - mem_wb ? vx_mem_wb.mem_wb_pc : + mem_wb ? mem_wb_if.mem_wb_pc : 32'hdeadbeef; - wire zero = 0; wire[`NUM_THREADS-1:0][31:0] use_wb_data; @@ -71,19 +70,19 @@ module VX_writeback ( .reset(reset), .stall(zero), .flush(zero), - .in ({vx_writeback_tempp.write_data, vx_writeback_tempp.wb_valid, vx_writeback_tempp.rd, vx_writeback_tempp.wb, vx_writeback_tempp.wb_warp_num, vx_writeback_tempp.wb_pc}), - .out ({use_wb_data , vx_writeback_if.wb_valid, vx_writeback_if.rd, vx_writeback_if.wb, vx_writeback_if.wb_warp_num, vx_writeback_if.wb_pc}) + .in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}), + .out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc}) ); reg[31:0] last_data_wb /* verilator public */ ; always @(posedge clk) begin - if ((|vx_writeback_if.wb_valid) && (vx_writeback_if.wb != 0) && (vx_writeback_if.rd == 28)) begin + if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin last_data_wb <= use_wb_data[0]; end end - assign vx_writeback_if.write_data = use_wb_data; + assign writeback_if.write_data = use_wb_data; endmodule : VX_writeback diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index f4ffbc61..413c7634 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -99,106 +99,106 @@ module Vortex wire schedule_delay; // Dcache Interface - VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_rsp(); - VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req(); - VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) vx_dcache_req_qual(); + VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if(); + VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if(); + VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if(); - VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_req(); - VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) vx_gpu_dcache_dram_res(); + VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if(); + VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_res_if(); - assign vx_gpu_dcache_dram_res.dram_rsp_valid = dram_rsp_valid; - assign vx_gpu_dcache_dram_res.dram_rsp_addr = dram_rsp_addr; + assign gpu_dcache_dram_res_if.dram_rsp_valid = dram_rsp_valid; + assign gpu_dcache_dram_res_if.dram_rsp_addr = dram_rsp_addr; - assign dram_req_write = vx_gpu_dcache_dram_req.dram_req_write; - assign dram_req_read = vx_gpu_dcache_dram_req.dram_req_read; - assign dram_req_addr = vx_gpu_dcache_dram_req.dram_req_addr; - assign dram_rsp_ready = vx_gpu_dcache_dram_req.dram_rsp_ready; + assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write; + assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read; + assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr; + assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready; - assign vx_gpu_dcache_dram_req.dram_req_full = dram_req_full; + assign gpu_dcache_dram_req_if.dram_req_full = dram_req_full; genvar i; generate for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin - assign vx_gpu_dcache_dram_res.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32]; - assign dram_req_data[i * 32 +: 32] = vx_gpu_dcache_dram_req.dram_req_data[i]; + assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32]; + assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i]; end endgenerate wire temp_io_valid = (!memory_delay) - && (|vx_dcache_req.core_req_valid) - && (vx_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) - && (vx_dcache_req.core_req_addr[0] == 32'h00010000); + && (|dcache_req_if.core_req_valid) + && (dcache_req_if.core_req_mem_write[0] != `NO_MEM_WRITE) + && (dcache_req_if.core_req_addr[0] == 32'h00010000); - wire[31:0] temp_io_data = vx_dcache_req.core_req_writedata[0]; + wire[31:0] temp_io_data = dcache_req_if.core_req_writedata[0]; assign io_valid = temp_io_valid; assign io_data = temp_io_data; - assign vx_dcache_req_qual.core_req_valid = vx_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}}; - assign vx_dcache_req_qual.core_req_addr = vx_dcache_req.core_req_addr; - assign vx_dcache_req_qual.core_req_writedata = vx_dcache_req.core_req_writedata; - assign vx_dcache_req_qual.core_req_mem_read = vx_dcache_req.core_req_mem_read; - assign vx_dcache_req_qual.core_req_mem_write = vx_dcache_req.core_req_mem_write; - assign vx_dcache_req_qual.core_req_rd = vx_dcache_req.core_req_rd; - assign vx_dcache_req_qual.core_req_wb = vx_dcache_req.core_req_wb; - assign vx_dcache_req_qual.core_req_warp_num = vx_dcache_req.core_req_warp_num; - assign vx_dcache_req_qual.core_req_pc = vx_dcache_req.core_req_pc; - assign vx_dcache_req_qual.core_no_wb_slot = vx_dcache_req.core_no_wb_slot; + assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}}; + assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr; + assign dcache_req_qual_if.core_req_writedata = dcache_req_if.core_req_writedata; + assign dcache_req_qual_if.core_req_mem_read = dcache_req_if.core_req_mem_read; + assign dcache_req_qual_if.core_req_mem_write = dcache_req_if.core_req_mem_write; + assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd; + assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb; + assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num; + assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc; + assign dcache_req_qual_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot; - VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_rsp(); - VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) vx_icache_req(); + VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if(); + VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if(); - VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_req(); - VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) vx_gpu_icache_dram_res(); + VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if(); + VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_res_if(); - assign vx_gpu_icache_dram_res.dram_rsp_valid = I_dram_rsp_valid; - assign vx_gpu_icache_dram_res.dram_rsp_addr = I_dram_rsp_addr; + assign gpu_icache_dram_res_if.dram_rsp_valid = I_dram_rsp_valid; + assign gpu_icache_dram_res_if.dram_rsp_addr = I_dram_rsp_addr; - assign I_dram_req_write = vx_gpu_icache_dram_req.dram_req_write; - assign I_dram_req_read = vx_gpu_icache_dram_req.dram_req_read; - assign I_dram_req_addr = vx_gpu_icache_dram_req.dram_req_addr; - assign I_dram_rsp_ready = vx_gpu_icache_dram_req.dram_rsp_ready; + assign I_dram_req_write = gpu_icache_dram_req_if.dram_req_write; + assign I_dram_req_read = gpu_icache_dram_req_if.dram_req_read; + assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr; + assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready; - assign vx_gpu_icache_dram_req.dram_req_full = I_dram_req_full; + assign gpu_icache_dram_req_if.dram_req_full = I_dram_req_full; genvar j; generate for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin - assign vx_gpu_icache_dram_res.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32]; - assign I_dram_req_data[j * 32 +: 32] = vx_gpu_icache_dram_req.dram_req_data[j]; + assign gpu_icache_dram_res_if.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32]; + assign I_dram_req_data[j * 32 +: 32] = gpu_icache_dram_req_if.dram_req_data[j]; end endgenerate ///////////////////////////////////////////////////////////////////////// // Front-end to Back-end -VX_frE_to_bckE_req_if vx_bckE_req(); // New instruction request to EXE/MEM +VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM // Back-end to Front-end -VX_wb_if vx_writeback_if(); // Writeback to GPRs -VX_branch_response_if vx_branch_rsp(); // Branch Resolution to Fetch -VX_jal_response_if vx_jal_rsp(); // Jump resolution to Fetch +VX_wb_if writeback_if(); // Writeback to GPRs +VX_branch_response_if branch_rsp_if(); // Branch Resolution to Fetch +VX_jal_response_if jal_rsp_if(); // Jump resolution to Fetch // CSR Buses -// VX_csr_write_request_if vx_csr_w_req(); +// VX_csr_write_request_if csr_w_req_if(); -VX_warp_ctl_if vx_warp_ctl(); -VX_gpu_snp_req_rsp_if vx_gpu_icache_snp_req(); -VX_gpu_snp_req_rsp_if vx_gpu_dcache_snp_req(); +VX_warp_ctl_if warp_ctl_if(); +VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if(); +VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if(); -assign vx_gpu_dcache_snp_req.snp_req_valid = snp_req_valid; -assign vx_gpu_dcache_snp_req.snp_req_addr = snp_req_addr; -assign snp_req_full = vx_gpu_dcache_snp_req.snp_req_full; +assign gpu_dcache_snp_req_if.snp_req_valid = snp_req_valid; +assign gpu_dcache_snp_req_if.snp_req_addr = snp_req_addr; +assign snp_req_full = gpu_dcache_snp_req_if.snp_req_full; -VX_front_end vx_front_end( +VX_front_end front_end( .clk (clk), .reset (reset), - .vx_warp_ctl (vx_warp_ctl), - .vx_bckE_req (vx_bckE_req), + .warp_ctl_if (warp_ctl_if), + .bckE_req_if (bckE_req_if), .schedule_delay (schedule_delay), - .vx_icache_rsp (vx_icache_rsp), - .vx_icache_req (vx_icache_req), - .vx_jal_rsp (vx_jal_rsp), - .vx_branch_rsp (vx_branch_rsp), + .icache_rsp_if (icache_rsp_if), + .icache_req_if (icache_req_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), .fetch_ebreak (out_ebreak) ); @@ -208,56 +208,56 @@ VX_scheduler schedule( .memory_delay (memory_delay), .exec_delay (exec_delay), .gpr_stage_delay (gpr_stage_delay), - .vx_bckE_req (vx_bckE_req), - .vx_writeback_if (vx_writeback_if), + .bckE_req_if (bckE_req_if), + .writeback_if (writeback_if), .schedule_delay (schedule_delay), .is_empty (scheduler_empty) ); -VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end( +VX_back_end #(.CORE_ID(CORE_ID)) back_end( .clk (clk), .reset (reset), .schedule_delay (schedule_delay), - .vx_warp_ctl (vx_warp_ctl), - .vx_bckE_req (vx_bckE_req), - .vx_jal_rsp (vx_jal_rsp), - .vx_branch_rsp (vx_branch_rsp), - .vx_dcache_rsp (vx_dcache_rsp), - .vx_dcache_req (vx_dcache_req), - .vx_writeback_if (vx_writeback_if), + .warp_ctl_if (warp_ctl_if), + .bckE_req_if (bckE_req_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), + .dcache_rsp_if (dcache_rsp_if), + .dcache_req_if (dcache_req_if), + .writeback_if (writeback_if), .out_mem_delay (memory_delay), .out_exec_delay (exec_delay), .gpr_stage_delay (gpr_stage_delay) ); -VX_dmem_controller vx_dmem_controller( +VX_dmem_controller dmem_controller( .clk (clk), .reset (reset), // Dram <-> Dcache - .vx_gpu_dcache_dram_req (vx_gpu_dcache_dram_req), - .vx_gpu_dcache_dram_res (vx_gpu_dcache_dram_res), - .vx_gpu_dcache_snp_req (vx_gpu_dcache_snp_req), + .gpu_dcache_dram_req_if (gpu_dcache_dram_req_if), + .gpu_dcache_dram_res_if (gpu_dcache_dram_res_if), + .gpu_dcache_snp_req_if (gpu_dcache_snp_req_if), // Dram <-> Icache - .vx_gpu_icache_dram_req (vx_gpu_icache_dram_req), - .vx_gpu_icache_dram_res (vx_gpu_icache_dram_res), - .vx_gpu_icache_snp_req (vx_gpu_icache_snp_req), + .gpu_icache_dram_req_if (gpu_icache_dram_req_if), + .gpu_icache_dram_res_if (gpu_icache_dram_res_if), + .gpu_icache_snp_req_if (gpu_icache_snp_req_if), // Core <-> Icache - .vx_icache_req (vx_icache_req), - .vx_icache_rsp (vx_icache_rsp), + .icache_req_if (icache_req_if), + .icache_rsp_if (icache_rsp_if), // Core <-> Dcache - .vx_dcache_req (vx_dcache_req_qual), - .vx_dcache_rsp (vx_dcache_rsp) + .dcache_req_if (dcache_req_qual_if), + .dcache_rsp_if (dcache_rsp_if) ); -// VX_csr_handler vx_csr_handler( +// VX_csr_handler csr_handler( // .clk (clk), // .in_decode_csr_address(decode_csr_address), -// .vx_csr_w_req (vx_csr_w_req), -// .in_wb_valid (vx_writeback_if.wb_valid[0]), +// .csr_w_req_if (csr_w_req_if), +// .in_wb_valid (writeback_if.wb_valid[0]), // .out_decode_csr_data (csr_decode_csr_data) // ); diff --git a/hw/rtl/generic_cache/VX_bank.v b/hw/rtl/generic_cache/VX_bank.v index 0212ded7..621f947d 100644 --- a/hw/rtl/generic_cache/VX_bank.v +++ b/hw/rtl/generic_cache/VX_bank.v @@ -449,7 +449,7 @@ module VX_bank #( .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_tag_data_access ( + ) tag_data_access ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), @@ -477,7 +477,7 @@ module VX_bank #( .miss_st1e (miss_st1e), .dirty_st1e (dirty_st1e), .fill_saw_dirty_st1e(fill_saw_dirty_st1e) - ); + ); wire qual_valid_st1e_2 = valid_st1[STAGE_1_CYCLES-1] && !is_fill_st1[STAGE_1_CYCLES-1]; @@ -581,7 +581,7 @@ module VX_bank #( .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_fill_invalidator ( + ) fill_invalidator ( .clk (clk), .reset (reset), .possible_fill (possible_fill), diff --git a/hw/rtl/generic_cache/VX_cache.v b/hw/rtl/generic_cache/VX_cache.v index 8e26fcc8..a818c5bc 100644 --- a/hw/rtl/generic_cache/VX_cache.v +++ b/hw/rtl/generic_cache/VX_cache.v @@ -157,7 +157,7 @@ module VX_cache #( .PRFQ_SIZE (PRFQ_SIZE), .PRFQ_STRIDE (PRFQ_STRIDE), .SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_cache_dram_req_arb ( + ) cache_dram_req_arb ( .clk (clk), .reset (reset), .dfqq_full (dfqq_full), @@ -191,7 +191,7 @@ module VX_cache #( .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES (SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_cache_core_req_bank_sell ( + ) cache_core_req_bank_sell ( .core_req_valid (core_req_valid), .core_req_addr (core_req_addr), .per_bank_valids (per_bank_valids) @@ -215,7 +215,7 @@ module VX_cache #( .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_cache_core_wb_sel_merge ( + ) cache_core_wb_sel_merge ( .per_bank_wb_valid (per_bank_wb_valid), .per_bank_wb_tid (per_bank_wb_tid), .per_bank_wb_rd (per_bank_wb_rd), @@ -239,7 +239,7 @@ module VX_cache #( // Snoop Forward Logic VX_snp_fwd_arb #( .NUM_BANKS(NUM_BANKS) - ) vx_snp_fwd_arb( + ) snp_fwd_arb( .per_bank_snp_fwd (per_bank_snp_fwd), .per_bank_snp_fwd_addr(per_bank_snp_fwd_addr), .per_bank_snp_fwd_pop (per_bank_snp_fwd_pop), diff --git a/hw/rtl/generic_cache/VX_cache_dfq_queue.v b/hw/rtl/generic_cache/VX_cache_dfq_queue.v index a8e9e181..592d2ea8 100644 --- a/hw/rtl/generic_cache/VX_cache_dfq_queue.v +++ b/hw/rtl/generic_cache/VX_cache_dfq_queue.v @@ -94,7 +94,7 @@ module VX_cache_dfq_queue #( VX_generic_priority_encoder #( .N(NUM_BANKS) - ) vx_sel_bank ( + ) sel_bank ( .valids(qual_bank_dram_fill_req), .index (qual_request_index), .found (qual_has_request) diff --git a/hw/rtl/generic_cache/VX_cache_dram_req_arb.v b/hw/rtl/generic_cache/VX_cache_dram_req_arb.v index 8cfcfa0a..176b4dda 100644 --- a/hw/rtl/generic_cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/generic_cache/VX_cache_dram_req_arb.v @@ -102,7 +102,7 @@ module VX_cache_dram_req_arb #( wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_full; // If no dwb, and dfqq has valids, then pop wire dfqq_push = (|per_bank_dram_fill_req_valid); - VX_cache_dfq_queue vx_cache_dfq_queue( + VX_cache_dfq_queue cache_dfq_queue( .clk (clk), .reset (reset), .dfqq_push (dfqq_push), @@ -121,7 +121,7 @@ module VX_cache_dram_req_arb #( VX_generic_priority_encoder #( .N(NUM_BANKS) - ) vx_sel_dwb ( + ) sel_dwb ( .valids(use_wb_valid), .index (dwb_bank), .found (dwb_valid) diff --git a/hw/rtl/generic_cache/VX_cache_req_queue.v b/hw/rtl/generic_cache/VX_cache_req_queue.v index 19866979..205e9c8c 100644 --- a/hw/rtl/generic_cache/VX_cache_req_queue.v +++ b/hw/rtl/generic_cache/VX_cache_req_queue.v @@ -142,10 +142,11 @@ module VX_cache_req_queue #( assign qual_pc = use_per_pc; wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index; - wire qual_has_request; + wire qual_has_request; + VX_generic_priority_encoder #( .N(NUM_REQUESTS) - ) vx_sel_bank ( + ) sel_bank ( .valids(qual_valids), .index (qual_request_index), .found (qual_has_request) diff --git a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v b/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v index cb5676d1..8022057b 100644 --- a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v +++ b/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v @@ -80,7 +80,7 @@ module VX_cache_wb_sel_merge #( VX_generic_priority_encoder #( .N(NUM_BANKS) - ) vx_sel_bank ( + ) sel_bank ( .valids(per_bank_wb_valid), .index (main_bank_index), .found (found_bank) diff --git a/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v b/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v index fc80d1ee..66977e4b 100644 --- a/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v +++ b/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v @@ -1,7 +1,6 @@ `include "VX_cache_config.vh" -module VX_dcache_llv_resp_bank_sel - #( +module VX_dcache_llv_resp_bank_sel #( // Size of cache in bytes parameter CACHE_SIZE_BYTES = 1024, // Size of line inside a bank in bytes @@ -15,8 +14,7 @@ module VX_dcache_llv_resp_bank_sel // Number of cycles to complete stage 1 (read from memory) parameter STAGE_1_CYCLES = 2, -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - + // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size parameter REQQ_SIZE = 8, // Miss Reserv Queue Knob @@ -26,7 +24,7 @@ module VX_dcache_llv_resp_bank_sel // Snoop Req Queue parameter SNRQ_SIZE = 8, -// Queues for writebacks Knobs {1, 2, 4, 8, ...} + // Queues for writebacks Knobs {1, 2, 4, 8, ...} // Core Writeback Queue Size parameter CWBQ_SIZE = 8, // Dram Writeback Queue Size @@ -39,12 +37,9 @@ module VX_dcache_llv_resp_bank_sel // Fill Invalidator Size {Fill invalidator must be active} parameter FILL_INVALIDAOR_SIZE = 16, -// Dram knobs + // Dram knobs parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 - - - ) - ( +) ( output reg [NUM_BANKS-1:0] per_bank_llvq_pop, input wire[NUM_BANKS-1:0] per_bank_llvq_valid, input wire[NUM_BANKS-1:0][31:0] per_bank_llvq_rsp_addr, @@ -55,20 +50,19 @@ module VX_dcache_llv_resp_bank_sel output reg[NUM_REQUESTS-1:0] llvq_valid, output reg[NUM_REQUESTS-1:0][31:0] llvq_rsp_addr, output reg[NUM_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_rsp_data - - ); wire [(`LOG2UP(NUM_BANKS))-1:0] main_bank_index; - wire found_bank; + wire found_bank; - VX_generic_priority_encoder #(.N(NUM_BANKS)) vx_sel_bank( - .valids(per_bank_llvq_valid), - .index (main_bank_index), - .found (found_bank) + VX_generic_priority_encoder #( + .N(NUM_BANKS) + ) sel_bank( + .valids(per_bank_llvq_valid), + .index (main_bank_index), + .found (found_bank) ); - always @(*) begin llvq_valid = 0; llvq_rsp_addr = 0; diff --git a/hw/rtl/generic_cache/VX_fill_invalidator.v b/hw/rtl/generic_cache/VX_fill_invalidator.v index 678067dd..9b4b42d0 100644 --- a/hw/rtl/generic_cache/VX_fill_invalidator.v +++ b/hw/rtl/generic_cache/VX_fill_invalidator.v @@ -86,7 +86,7 @@ module VX_fill_invalidator VX_generic_priority_encoder #( .N(FILL_INVALIDAOR_SIZE) - ) vx_sel_bank ( + ) sel_bank ( .valids(~fills_active), .index (enqueue_index), .found (enqueue_found) @@ -136,7 +136,7 @@ module VX_fill_invalidator // wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index; // wire enqueue_found; - // VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) vx_sel_bank( + // VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) sel_bank( // .valids(~fills_active), // .index (enqueue_index), // .found (enqueue_found) diff --git a/hw/rtl/generic_cache/VX_snp_fwd_arb.v b/hw/rtl/generic_cache/VX_snp_fwd_arb.v index e57efc6f..d22b6cd5 100644 --- a/hw/rtl/generic_cache/VX_snp_fwd_arb.v +++ b/hw/rtl/generic_cache/VX_snp_fwd_arb.v @@ -22,7 +22,7 @@ module VX_snp_fwd_arb VX_generic_priority_encoder #( .N(NUM_BANKS) - ) vx_sel_ffsq( + ) sel_ffsq ( .valids(qual_per_bank_snp_fwd), .index (fsq_bank), .found (fsq_valid) diff --git a/hw/rtl/generic_cache/VX_tag_data_access.v b/hw/rtl/generic_cache/VX_tag_data_access.v index 50a745fd..1a8af967 100644 --- a/hw/rtl/generic_cache/VX_tag_data_access.v +++ b/hw/rtl/generic_cache/VX_tag_data_access.v @@ -110,7 +110,7 @@ module VX_tag_data_access #( .LLVQ_SIZE (LLVQ_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) - ) vx_tag_data_structure ( + ) tag_data_structure ( .clk (clk), .reset (reset), .stall_bank_pipe(stall_bank_pipe), diff --git a/hw/rtl/pipe_regs/VX_d_e_reg.v b/hw/rtl/pipe_regs/VX_d_e_reg.v index ea6440fc..9bdf1fb8 100644 --- a/hw/rtl/pipe_regs/VX_d_e_reg.v +++ b/hw/rtl/pipe_regs/VX_d_e_reg.v @@ -1,12 +1,12 @@ `include "../VX_define.vh" module VX_d_e_reg ( - input wire clk, - input wire reset, - input wire in_branch_stall, - input wire in_freeze, - VX_frE_to_bckE_req_if vx_frE_to_bckE_req, - VX_frE_to_bckE_req_if vx_bckE_req + input wire clk, + input wire reset, + input wire in_branch_stall, + input wire in_freeze, + VX_frE_to_bckE_req_if frE_to_bckE_req_if, + VX_frE_to_bckE_req_if bckE_req_if ); wire stall = in_freeze; @@ -19,8 +19,8 @@ module VX_d_e_reg ( .reset (reset), .stall (stall), .flush (flush), - .in ({vx_frE_to_bckE_req.csr_address, vx_frE_to_bckE_req.jalQual, vx_frE_to_bckE_req.ebreak, vx_frE_to_bckE_req.is_csr, vx_frE_to_bckE_req.csr_immed, vx_frE_to_bckE_req.csr_mask, vx_frE_to_bckE_req.rd, vx_frE_to_bckE_req.rs1, vx_frE_to_bckE_req.rs2, vx_frE_to_bckE_req.alu_op, vx_frE_to_bckE_req.wb, vx_frE_to_bckE_req.rs2_src, vx_frE_to_bckE_req.itype_immed, vx_frE_to_bckE_req.mem_read, vx_frE_to_bckE_req.mem_write, vx_frE_to_bckE_req.branch_type, vx_frE_to_bckE_req.upper_immed, vx_frE_to_bckE_req.curr_PC, vx_frE_to_bckE_req.jal, vx_frE_to_bckE_req.jal_offset, vx_frE_to_bckE_req.PC_next, vx_frE_to_bckE_req.valid, vx_frE_to_bckE_req.warp_num, vx_frE_to_bckE_req.is_wspawn, vx_frE_to_bckE_req.is_tmc, vx_frE_to_bckE_req.is_split, vx_frE_to_bckE_req.is_barrier}), - .out ({vx_bckE_req.csr_address , vx_bckE_req.jalQual , vx_bckE_req.ebreak ,vx_bckE_req.is_csr , vx_bckE_req.csr_immed , vx_bckE_req.csr_mask , vx_bckE_req.rd , vx_bckE_req.rs1 , vx_bckE_req.rs2 , vx_bckE_req.alu_op , vx_bckE_req.wb , vx_bckE_req.rs2_src , vx_bckE_req.itype_immed , vx_bckE_req.mem_read , vx_bckE_req.mem_write , vx_bckE_req.branch_type , vx_bckE_req.upper_immed , vx_bckE_req.curr_PC , vx_bckE_req.jal , vx_bckE_req.jal_offset , vx_bckE_req.PC_next , vx_bckE_req.valid , vx_bckE_req.warp_num , vx_bckE_req.is_wspawn , vx_bckE_req.is_tmc , vx_bckE_req.is_split , vx_bckE_req.is_barrier }) + .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), + .out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) ); endmodule diff --git a/hw/rtl/shared_memory/VX_priority_encoder_sm.v b/hw/rtl/shared_memory/VX_priority_encoder_sm.v index e55ea1cb..aa1a2187 100644 --- a/hw/rtl/shared_memory/VX_priority_encoder_sm.v +++ b/hw/rtl/shared_memory/VX_priority_encoder_sm.v @@ -28,21 +28,22 @@ module VX_priority_encoder_sm reg[`NUM_THREADS-1:0] left_requests; reg[`NUM_THREADS-1:0] serviced; - wire[`NUM_THREADS-1:0] use_valid; - wire requests_left = (|left_requests); assign use_valid = (requests_left) ? left_requests : in_valid; - wire[NB:0][`NUM_THREADS-1:0] bank_valids; - VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid( + + VX_bank_valids #( + .NB(NB), + .BITS_PER_BANK(BITS_PER_BANK) + ) bank_valid ( .in_valids(use_valid), .in_addr(in_address), .bank_valids(bank_valids) - ); + ); wire[NB:0] more_than_one_valid; @@ -73,11 +74,13 @@ module VX_priority_encoder_sm generate for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1) begin : encoders - VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder( + VX_generic_priority_encoder #( + .N(NUM_REQ) + ) priority_encoder ( .valids(bank_valids[curr_bank_o]), .index(internal_req_num[curr_bank_o]), .found(internal_out_valid[curr_bank_o]) - ); + ); assign out_address[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_address[internal_req_num[curr_bank_o]] : 0; assign out_data[curr_bank_o] = internal_out_valid[curr_bank_o] ? in_data[internal_req_num[curr_bank_o]] : 0; end @@ -91,11 +94,9 @@ module VX_priority_encoder_sm end end - assign req_num = internal_req_num; assign out_valid = internal_out_valid; - wire[`NUM_THREADS-1:0] serviced_qual = in_valid & (serviced); wire[`NUM_THREADS-1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual); diff --git a/hw/rtl/shared_memory/VX_shared_memory.v b/hw/rtl/shared_memory/VX_shared_memory.v index 6a5e07f9..2ee60eeb 100644 --- a/hw/rtl/shared_memory/VX_shared_memory.v +++ b/hw/rtl/shared_memory/VX_shared_memory.v @@ -1,23 +1,21 @@ `include "../VX_define.vh" -module VX_shared_memory - #( - parameter SM_SIZE = 4096, // Bytes - parameter SM_BANKS = 4, - parameter SM_BYTES_PER_READ = 16, - parameter SM_WORDS_PER_READ = 4, - parameter SM_LOG_WORDS_PER_READ = 2, - parameter SM_HEIGHT = 128, // Bytes - parameter SM_BANK_OFFSET_START = 2, - parameter SM_BANK_OFFSET_END = 4, - parameter SM_BLOCK_OFFSET_START = 5, - parameter SM_BLOCK_OFFSET_END = 6, - parameter SM_INDEX_START = 7, - parameter SM_INDEX_END = 13, - parameter NUM_REQ = 4, - parameter BITS_PER_BANK = 3 - ) - ( +module VX_shared_memory #( + parameter SM_SIZE = 4096, // Bytes + parameter SM_BANKS = 4, + parameter SM_BYTES_PER_READ = 16, + parameter SM_WORDS_PER_READ = 4, + parameter SM_LOG_WORDS_PER_READ = 2, + parameter SM_HEIGHT = 128, // Bytes + parameter SM_BANK_OFFSET_START = 2, + parameter SM_BANK_OFFSET_END = 4, + parameter SM_BLOCK_OFFSET_START = 5, + parameter SM_BLOCK_OFFSET_END = 6, + parameter SM_INDEX_START = 7, + parameter SM_INDEX_END = 13, + parameter NUM_REQ = 4, + parameter BITS_PER_BANK = 3 +) ( //INPUTS input wire clk, input wire reset, @@ -30,148 +28,144 @@ module VX_shared_memory output wire[`NUM_THREADS-1:0] out_valid, output wire[`NUM_THREADS-1:0][31:0] out_data, output wire stall +); + + //reg [NB:0][31:0] temp_address; + //reg [NB:0][31:0] temp_in_data; + //reg [NB:0] temp_in_valid; + reg [SM_BANKS - 1:0][31:0] temp_address; + reg [SM_BANKS - 1:0][31:0] temp_in_data; + reg [SM_BANKS - 1:0] temp_in_valid; + + reg [`NUM_THREADS-1:0] temp_out_valid; + reg [`NUM_THREADS-1:0][31:0] temp_out_data; + + //reg [NB:0][6:0] block_addr; + //reg [NB:0][3:0][31:0] block_wdata; + //reg [NB:0][3:0][31:0] block_rdata; + //reg [NB:0][1:0] block_we; + reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr; + reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata; + reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata; + reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we; + + wire send_data; + + //reg [NB:0][1:0] req_num; + reg [SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this + + wire [`NUM_THREADS-1:0] orig_in_valid; + + genvar f; + generate + for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup + assign orig_in_valid[f] = in_valid[f]; + end + + assign out_valid = send_data ? temp_out_valid : 0; + assign out_data = send_data ? temp_out_data : 0; + endgenerate + + VX_priority_encoder_sm #( + .NB(SM_BANKS - 1), + .BITS_PER_BANK(BITS_PER_BANK), + .NUM_REQ(NUM_REQ) + ) priority_encoder_sm ( + .clk(clk), + .reset(reset), + .in_valid(orig_in_valid), + .in_address(in_address), + .in_data(in_data), + + .out_valid(temp_in_valid), + .out_address(temp_address), + .out_data(temp_in_data), + + .req_num(req_num), + .stall(stall), + .send_data(send_data) ); -//reg[NB:0][31:0] temp_address; -//reg[NB:0][31:0] temp_in_data; -//reg[NB:0] temp_in_valid; -reg[SM_BANKS - 1:0][31:0] temp_address; -reg[SM_BANKS - 1:0][31:0] temp_in_data; -reg[SM_BANKS - 1:0] temp_in_valid; + genvar j; + integer i; + generate + for (j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks -reg[`NUM_THREADS-1:0] temp_out_valid; -reg[`NUM_THREADS-1:0][31:0] temp_out_data; + wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j]; -//reg [NB:0][6:0] block_addr; -//reg [NB:0][3:0][31:0] block_wdata; -//reg [NB:0][3:0][31:0] block_rdata; -//reg [NB:0][1:0] block_we; -reg [SM_BANKS - 1:0][$clog2(SM_HEIGHT) - 1:0] block_addr; -reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_wdata; -reg [SM_BANKS - 1:0][SM_WORDS_PER_READ-1:0][31:0] block_rdata; -reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we; + VX_shared_memory_block #( + .SMB_HEIGHT(SM_HEIGHT), + .SMB_WORDS_PER_READ(SM_WORDS_PER_READ), + .SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ) + ) shared_memory_block ( + .clk (clk), + .reset (reset), + .addr (block_addr[j]), + .wdata (block_wdata[j]), + .we (block_we[j]), + .shm_write(shm_write), + .data_out (block_rdata[j]) + ); + end -wire send_data; - -//reg[NB:0][1:0] req_num; -reg[SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this - -wire [`NUM_THREADS-1:0] orig_in_valid; - -genvar f; -generate - for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup - assign orig_in_valid[f] = in_valid[f]; - end - - assign out_valid = send_data ? temp_out_valid : 0; - assign out_data = send_data ? temp_out_data : 0; -endgenerate - - -//VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm( -VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm( - .clk(clk), - .reset(reset), - .in_valid(orig_in_valid), - .in_address(in_address), - .in_data(in_data), - - .out_valid(temp_in_valid), - .out_address(temp_address), - .out_data(temp_in_data), - - .req_num(req_num), - .stall(stall), - .send_data(send_data) - ); - - -genvar j; -integer i; -generate -//for(j=0; j<= NB; j=j+1) begin : sm_mem_block -for(j=0; j<= SM_BANKS - 1; j=j+1) begin : shared_mem_blocks - - wire shm_write = (mem_write != `NO_MEM_WRITE) && temp_in_valid[j]; - - VX_shared_memory_block# - ( - .SMB_HEIGHT(SM_HEIGHT), - .SMB_WORDS_PER_READ(SM_WORDS_PER_READ), - .SMB_LOG_WORDS_PER_READ(SM_LOG_WORDS_PER_READ) - ) vx_shared_memory_block - ( - .clk (clk), - .reset (reset), - .addr (block_addr[j]), - .wdata (block_wdata[j]), - .we (block_we[j]), - .shm_write(shm_write), - .data_out (block_rdata[j]) - ); -end - - -always @(*) begin - block_addr = 0; - block_we = 0; - block_wdata = 0; - //for(i = 0; i <= NB; i = i+1) begin - for(i = 0; i <= SM_BANKS - 1; i = i+1) begin - if(temp_in_valid[i] == 1'b1) begin - //1. Check if the request is actually to the shared memory - if((temp_address[i][31:24]) == 8'hFF) begin - // STORES - if(mem_write != `NO_MEM_WRITE) begin - if(mem_write == `SB_MEM_WRITE) begin - //TODO - end - else if(mem_write == `SH_MEM_WRITE) begin - //TODO - end - else if(mem_write == `SW_MEM_WRITE) begin - //block_addr[i] = temp_address[i][13:7]; - //block_we[i] = temp_address[i][6:5]; - //block_wdata[i][temp_address[i][6:5]] = temp_in_data[i]; - block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; - block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]; - block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i]; - end - end - //LOADS - else if(mem_read != `NO_MEM_READ) begin - if(mem_read == `LB_MEM_READ) begin - //TODO - end - else if (mem_read == `LH_MEM_READ) - begin - //TODO - end - else if (mem_read == `LW_MEM_READ) - begin - //block_addr[i] = temp_address[i][13:7]; - //temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]]; - //temp_out_valid[req_num[i]] = 1'b1; - block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; - temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]]; - temp_out_valid[req_num[i]] = 1'b1; - end - else if (mem_read == `LBU_MEM_READ) - begin - //TODO - end - else if (mem_read == `LHU_MEM_READ) - begin - //TODO + always @(*) begin + block_addr = 0; + block_we = 0; + block_wdata = 0; + //for(i = 0; i <= NB; i = i+1) begin + for (i = 0; i <= SM_BANKS - 1; i = i+1) begin + if (temp_in_valid[i] == 1'b1) begin + //1. Check if the request is actually to the shared memory + if ((temp_address[i][31:24]) == 8'hFF) begin + // STORES + if (mem_write != `NO_MEM_WRITE) begin + if (mem_write == `SB_MEM_WRITE) begin + //TODO + end + else if (mem_write == `SH_MEM_WRITE) begin + //TODO + end + else if (mem_write == `SW_MEM_WRITE) begin + //block_addr[i] = temp_address[i][13:7]; + //block_we[i] = temp_address[i][6:5]; + //block_wdata[i][temp_address[i][6:5]] = temp_in_data[i]; + block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; + block_we[i] = temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]; + block_wdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]] = temp_in_data[i]; + end + end + //LOADS + else if(mem_read != `NO_MEM_READ) begin + if(mem_read == `LB_MEM_READ) begin + //TODO + end + else if (mem_read == `LH_MEM_READ) + begin + //TODO + end + else if (mem_read == `LW_MEM_READ) + begin + //block_addr[i] = temp_address[i][13:7]; + //temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][6:5]]; + //temp_out_valid[req_num[i]] = 1'b1; + block_addr[i] = temp_address[i][SM_INDEX_END:SM_INDEX_START]; + temp_out_data[req_num[i]] = block_rdata[i][temp_address[i][SM_BLOCK_OFFSET_END:SM_BLOCK_OFFSET_START]]; + temp_out_valid[req_num[i]] = 1'b1; + end + else if (mem_read == `LBU_MEM_READ) + begin + //TODO + end + else if (mem_read == `LHU_MEM_READ) + begin + //TODO + end + end end end end end - end -end - -endgenerate + endgenerate endmodule