diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index b345ddb1..3522c5ba 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -90,7 +90,7 @@ module VX_alu_unit ( assign alu_stall = inst_delay_stall; always @(*) begin - case(alu_op) + case (alu_op) `DIV, `DIVU, `REM, @@ -136,7 +136,7 @@ module VX_alu_unit ( assign upper_immed = {upper_immed, {12{1'b0}}}; always @(*) begin - case(alu_op) + case (alu_op) `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); `SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; @@ -177,7 +177,7 @@ module VX_alu_unit ( assign upper_immed_s = {upper_immed, {12{1'b0}}}; always @(*) begin - case(alu_op) + case (alu_op) `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); `SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 10ba7ded..8727db3c 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -20,7 +20,9 @@ module VX_back_end #( VX_frE_to_bckE_req_if bckE_req_if, VX_wb_if writeback_if, - VX_warp_ctl_if warp_ctl_if + VX_warp_ctl_if warp_ctl_if, + + output wire ebreak ); VX_wb_if wb_temp_if(); @@ -69,6 +71,8 @@ module VX_back_end #( .gpr_stage_delay (gpr_stage_delay) ); + assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid); + VX_lsu_unit lsu_unit ( .clk (clk), .reset (reset), @@ -81,14 +85,14 @@ module VX_back_end #( ); VX_exec_unit exec_unit ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), .exec_unit_req_if(exec_unit_req_if), - .inst_exec_wb_if (inst_exec_wb_if), - .jal_rsp_if (jal_rsp_if), - .branch_rsp_if (branch_rsp_if), - .delay (exec_delay), - .no_slot_exec (no_slot_exec) + .inst_exec_wb_if(inst_exec_wb_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), + .delay (exec_delay), + .no_slot_exec (no_slot_exec) ); VX_gpu_inst gpu_inst ( @@ -119,6 +123,6 @@ module VX_back_end #( .no_slot_mem (no_slot_mem), .no_slot_exec (no_slot_exec), .no_slot_csr (no_slot_csr) - ); + ); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_csr_wrapper.v b/hw/rtl/VX_csr_wrapper.v index 443f8b2d..5c34b8d4 100644 --- a/hw/rtl/VX_csr_wrapper.v +++ b/hw/rtl/VX_csr_wrapper.v @@ -9,14 +9,14 @@ module VX_csr_wrapper ( wire[`NUM_THREADS-1:0][31:0] thread_ids; wire[`NUM_THREADS-1:0][31:0] warp_ids; - genvar cur_t, cur_tw; + genvar i; generate - for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init - assign thread_ids[cur_t] = cur_t; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init + assign thread_ids[i] = i; end - for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init - assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init + assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; end endgenerate diff --git a/hw/rtl/VX_dcache_io_arb.v b/hw/rtl/VX_dcache_io_arb.v index 3b89e476..0a392628 100644 --- a/hw/rtl/VX_dcache_io_arb.v +++ b/hw/rtl/VX_dcache_io_arb.v @@ -37,7 +37,7 @@ module VX_dcache_io_arb ( assign core_req_if.core_req_ready = io_select ? io_core_req_if.core_req_ready : dcache_core_req_if.core_req_ready; - wire dcache_rsp_valid = (|dcache_core_rsp_if.core_rsp_valid); + wire dcache_rsp_valid = (| dcache_core_rsp_if.core_rsp_valid); assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_valid : io_core_rsp_if.core_rsp_valid; assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? dcache_core_rsp_if.core_rsp_data : io_core_rsp_if.core_rsp_data; diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index d176a924..0edd07b1 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -8,18 +8,15 @@ module VX_decode( // Outputs VX_frE_to_bckE_req_if frE_to_bckE_req_if, VX_wstall_if wstall_if, - VX_join_if join_if, - - output wire terminate_sim + VX_join_if join_if ); + wire[31:0] in_instruction = fd_inst_meta_de.instruction; + wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; + wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; - wire[31:0] in_instruction = fd_inst_meta_de.instruction; - wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; - wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; + assign frE_to_bckE_req_if.curr_PC = in_curr_PC; - assign frE_to_bckE_req_if.curr_PC = in_curr_PC; - - wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid; + wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid; wire[6:0] curr_opcode; @@ -34,7 +31,7 @@ module VX_decode( wire is_auipc; wire is_csr; wire is_csr_immed; - wire is_e_inst; + wire is_etype; wire is_gpgpu; wire is_wspawn; @@ -47,7 +44,6 @@ module VX_decode( wire[6:0] func7; wire[11:0] u_12; - wire[7:0] jal_b_19_to_12; wire jal_b_11; wire[9:0] jal_b_10_to_1; @@ -77,11 +73,11 @@ module VX_decode( reg[4:0] alu_op; reg[4:0] mul_alu; reg[19:0] temp_upper_immed; - reg temp_jal; - reg[31:0] temp_jal_offset; - reg[31:0] temp_itype_immed; - reg[2:0] temp_branch_type; - reg temp_branch_stall; + reg temp_jal; + reg[31:0] temp_jal_offset; + reg[31:0] temp_itype_immed; + reg[2:0] temp_branch_type; + reg temp_branch_stall; assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid; @@ -89,12 +85,12 @@ module VX_decode( assign curr_opcode = in_instruction[6:0]; - assign frE_to_bckE_req_if.rd = in_instruction[11:7]; - assign frE_to_bckE_req_if.rs1 = in_instruction[19:15]; - assign frE_to_bckE_req_if.rs2 = in_instruction[24:20]; - assign func3 = in_instruction[14:12]; - assign func7 = in_instruction[31:25]; - assign u_12 = in_instruction[31:20]; + assign frE_to_bckE_req_if.rd = in_instruction[11:7]; + assign frE_to_bckE_req_if.rs1 = in_instruction[19:15]; + assign frE_to_bckE_req_if.rs2 = in_instruction[24:20]; + assign func3 = in_instruction[14:12]; + assign func7 = in_instruction[31:25]; + assign u_12 = in_instruction[31:20]; assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4; @@ -110,8 +106,6 @@ module VX_decode( assign is_auipc = (curr_opcode == `AUIPC_INST); assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); assign is_csr_immed = (is_csr) && (func3[2] == 1); - // assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); - assign is_e_inst = in_instruction == 32'h00000073; assign is_gpgpu = (curr_opcode == `GPGPU_INST); @@ -132,10 +126,10 @@ module VX_decode( assign frE_to_bckE_req_if.csr_immed = is_csr_immed; assign frE_to_bckE_req_if.is_csr = is_csr; - assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL : - is_linst ? `WB_MEM : - (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : - `NO_WB; + assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL : + is_linst ? `WB_MEM : + (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : + `NO_WB; assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; @@ -145,7 +139,7 @@ module VX_decode( // UPPER IMMEDIATE always @(*) begin - case(curr_opcode) + case (curr_opcode) `LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; `AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; default: temp_upper_immed = 20'h0; @@ -165,34 +159,34 @@ module VX_decode( assign jalr_immed = {func7, frE_to_bckE_req_if.rs2}; assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed}; - assign jal_sys_cond1 = func3 == 3'h0; - assign jal_sys_cond2 = u_12 < 12'h2; + assign jal_sys_cond1 = (func3 == 3'h0); + assign jal_sys_cond2 = (u_12 < 12'h2); assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0; assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef; // JAL always @(*) begin - case(curr_opcode) + case (curr_opcode) `JAL_INST: begin - temp_jal = 1'b1 && (|in_valid); + temp_jal = 1'b1 && (| in_valid); temp_jal_offset = jal_1_offset; end `JALR_INST: begin - temp_jal = 1'b1 && (|in_valid); + temp_jal = 1'b1 && (| in_valid); temp_jal_offset = jal_2_offset; end `SYS_INST: begin - // $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) ); - temp_jal = jal_sys_jal && (|in_valid); + // $display("SYS EBREAK %h", (jal_sys_jal && (| in_valid))); + temp_jal = jal_sys_jal && (| in_valid); temp_jal_offset = jal_sys_off; end default: begin - temp_jal = 1'b0 && (|in_valid); + temp_jal = 1'b0 && (| in_valid); temp_jal_offset = 32'hdeadbeef; end endcase @@ -202,12 +196,9 @@ module VX_decode( assign frE_to_bckE_req_if.jal = temp_jal; assign frE_to_bckE_req_if.jal_offset = temp_jal_offset; - // wire is_ebreak; - - // assign is_ebreak = is_e_inst; - wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid)); - assign frE_to_bckE_req_if.ebreak = ebreak; - assign terminate_sim = is_e_inst; + // ecall/ebreak + assign is_etype = (curr_opcode == `SYS_INST) && jal_sys_jal; + assign frE_to_bckE_req_if.is_etype = is_etype; // CSR @@ -222,60 +213,55 @@ module VX_decode( assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12; always @(*) begin - case(curr_opcode) - `ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; - `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd}; - `L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12}; - `B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; - default: temp_itype_immed = 32'hdeadbeef; - endcase + case (curr_opcode) + `ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; + `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd}; + `L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12}; + `B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; + default: temp_itype_immed = 32'hdeadbeef; + endcase end assign frE_to_bckE_req_if.itype_immed = temp_itype_immed; always @(*) begin - case(curr_opcode) - `B_INST: - begin - // $display("BRANCH IN DECODE"); - temp_branch_stall = 1'b1 && (|in_valid); - case(func3) - 3'h0: temp_branch_type = `BEQ; - 3'h1: temp_branch_type = `BNE; - 3'h4: temp_branch_type = `BLT; - 3'h5: temp_branch_type = `BGT; - 3'h6: temp_branch_type = `BLTU; - 3'h7: temp_branch_type = `BGTU; - default: temp_branch_type = `NO_BRANCH; - endcase - end - - `JAL_INST: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b1 && (|in_valid); - end - `JALR_INST: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b1 && (|in_valid); - end - default: - begin - temp_branch_type = `NO_BRANCH; - temp_branch_stall = 1'b0 && (|in_valid); - end + case (curr_opcode) + `B_INST: begin + // $display("BRANCH IN DECODE"); + temp_branch_stall = 1'b1 && (| in_valid); + case (func3) + 3'h0: temp_branch_type = `BEQ; + 3'h1: temp_branch_type = `BNE; + 3'h4: temp_branch_type = `BLT; + 3'h5: temp_branch_type = `BGT; + 3'h6: temp_branch_type = `BLTU; + 3'h7: temp_branch_type = `BGTU; + default: temp_branch_type = `NO_BRANCH; + endcase + end + `JAL_INST: begin + temp_branch_type = `NO_BRANCH; + temp_branch_stall = 1'b1 && (| in_valid); + end + `JALR_INST: begin + temp_branch_type = `NO_BRANCH; + temp_branch_stall = 1'b1 && (| in_valid); + end + default: begin + temp_branch_type = `NO_BRANCH; + temp_branch_stall = 1'b0 && (| in_valid); + end endcase end assign frE_to_bckE_req_if.branch_type = temp_branch_type; - assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid); - assign wstall_if.warp_num = in_warp_num; + assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (| in_valid); + assign wstall_if.warp_num = in_warp_num; always @(*) begin // ALU OP - case(func3) + case (func3) 3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB); 3'h1: alu_op = `SLLA; 3'h2: alu_op = `SLT; @@ -290,7 +276,7 @@ module VX_decode( always @(*) begin // ALU OP - case(func3) + case (func3) 3'h0: mul_alu = `MUL; 3'h1: mul_alu = `MULH; 3'h2: mul_alu = `MULHSU; @@ -306,7 +292,7 @@ module VX_decode( assign csr_type = func3[1:0]; always @(*) begin - case(csr_type) + case (csr_type) 2'h1: csr_alu = `CSR_ALU_RW; 2'h2: csr_alu = `CSR_ALU_RS; 2'h3: csr_alu = `CSR_ALU_RC; @@ -326,7 +312,7 @@ module VX_decode( assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; /*always_comb begin - if (1'($time & 1) && |fd_inst_meta_de.valid) begin + if (1'($time & 1) && (| fd_inst_meta_de.valid)) begin $display("*** %t: decode: opcode=%h", $time, curr_opcode); end end*/ diff --git a/hw/rtl/VX_dmem_ctrl.v b/hw/rtl/VX_dmem_ctrl.v index 9857b102..5c78aa47 100644 --- a/hw/rtl/VX_dmem_ctrl.v +++ b/hw/rtl/VX_dmem_ctrl.v @@ -64,7 +64,7 @@ module VX_dmem_ctrl ( .PRFQ_SIZE (`SPRFQ_SIZE), .PRFQ_STRIDE (`SPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), - .SNOOP_FORWARDING_ENABLE(0), + .SNOOP_FORWARDING (0), .DRAM_ENABLE (0), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), @@ -135,7 +135,7 @@ module VX_dmem_ctrl ( .PRFQ_SIZE (`DPRFQ_SIZE), .PRFQ_STRIDE (`DPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), - .SNOOP_FORWARDING_ENABLE(0), + .SNOOP_FORWARDING (0), .DRAM_ENABLE (1), .WRITE_ENABLE (1), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), @@ -206,7 +206,7 @@ module VX_dmem_ctrl ( .PRFQ_SIZE (`IPRFQ_SIZE), .PRFQ_STRIDE (`IPRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), - .SNOOP_FORWARDING_ENABLE(0), + .SNOOP_FORWARDING (0), .DRAM_ENABLE (1), .WRITE_ENABLE (0), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), diff --git a/hw/rtl/VX_exec_unit.v b/hw/rtl/VX_exec_unit.v index 5b63ecdd..13f2c303 100644 --- a/hw/rtl/VX_exec_unit.v +++ b/hw/rtl/VX_exec_unit.v @@ -44,27 +44,28 @@ module VX_exec_unit ( wire[`NUM_THREADS-1:0][31:0] alu_result; wire[`NUM_THREADS-1:0] alu_stall; - genvar index_out_reg; + + genvar i; generate - for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs VX_alu_unit alu_unit ( .clk (clk), .reset (reset), - .src_a (in_a_reg_data[index_out_reg]), - .src_b (in_b_reg_data[index_out_reg]), + .src_a (in_a_reg_data[i]), + .src_b (in_b_reg_data[i]), .src_rs2 (in_rs2_src), .itype_immed (in_itype_immed), .upper_immed (in_upper_immed), .alu_op (in_alu_op), .curr_PC (in_curr_PC), - .alu_result (alu_result[index_out_reg]), - .alu_stall (alu_stall[index_out_reg]) + .alu_result (alu_result[i]), + .alu_stall (alu_stall[i]) ); end endgenerate wire internal_stall; - assign internal_stall = |alu_stall; + assign internal_stall = (| alu_stall); assign delay = no_slot_exec || internal_stall; @@ -98,11 +99,10 @@ module VX_exec_unit ( endcase // in_branch_type end - wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; - genvar i; + generate - for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup + for (i = 0; i < `NUM_THREADS; i=i+1) begin assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; end endgenerate @@ -128,7 +128,7 @@ module VX_exec_unit ( assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num; // Branch rsp - assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid); + assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (| exec_unit_req_if.valid); assign branch_rsp_temp_if.branch_dir = temp_branch_dir; assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num; assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset @@ -167,7 +167,7 @@ module VX_exec_unit ( ); // always @(*) begin - // case(in_alu_op) + // case (in_alu_op) // `CSR_ALU_RW: out_csr_result = in_csr_mask; // `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask; // `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask); diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index ab0df6f5..64735889 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -9,8 +9,7 @@ module VX_fetch ( input wire icache_stage_delay, input wire[`NW_BITS-1:0] icache_stage_wid, input wire[`NUM_THREADS-1:0] icache_stage_valids, - - output wire ebreak, + output wire busy, VX_jal_rsp_if jal_rsp_if, VX_branch_rsp_if branch_rsp_if, VX_inst_meta_if fe_inst_meta_fi, @@ -45,7 +44,7 @@ module VX_fetch ( .ctm_warp_num (warp_ctl_if.warp_num), // WHALT - .whalt (warp_ctl_if.ebreak), + .whalt (warp_ctl_if.whalt), .whalt_warp_num (warp_ctl_if.warp_num), // Wstall @@ -83,7 +82,7 @@ module VX_fetch ( .thread_mask (thread_mask), .warp_num (warp_num), .warp_pc (warp_pc), - .ebreak (ebreak), + .busy (busy), .scheduled_warp (scheduled_warp) ); diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index 0cb3bb90..7e31e2c0 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -15,8 +15,7 @@ module VX_front_end ( VX_branch_rsp_if branch_rsp_if, VX_frE_to_bckE_req_if bckE_req_if, - - output wire fetch_ebreak + output wire busy ); VX_inst_meta_if fe_inst_meta_fi(); @@ -29,18 +28,13 @@ module VX_front_end ( wire total_freeze = schedule_delay; wire icache_stage_delay; - wire vortex_ebreak; - wire terminate_sim; - wire[`NW_BITS-1:0] icache_stage_wid; - wire[`NUM_THREADS-1:0] icache_stage_valids; + wire[`NUM_THREADS-1:0] icache_stage_valids; - assign fetch_ebreak = vortex_ebreak || terminate_sim; + VX_wstall_if wstall_if(); + VX_join_if join_if(); - VX_wstall_if wstall_if(); - VX_join_if join_if(); - - VX_fetch fetch( + VX_fetch fetch ( .clk (clk), .reset (reset), .icache_stage_wid (icache_stage_wid), @@ -52,7 +46,7 @@ module VX_front_end ( .warp_ctl_if (warp_ctl_if), .icache_stage_delay (icache_stage_delay), .branch_rsp_if (branch_rsp_if), - .ebreak (vortex_ebreak), // fetch_ebreak + .busy (busy), .fe_inst_meta_fi (fe_inst_meta_fi) ); @@ -91,9 +85,8 @@ module VX_front_end ( .fd_inst_meta_de (fd_inst_meta_de), .frE_to_bckE_req_if (frE_to_bckE_req_if), .wstall_if (wstall_if), - .join_if (join_if), - .terminate_sim (terminate_sim) - ); + .join_if (join_if) + ); wire no_br_stall = 0; diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index 54d39931..6fce0877 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -29,13 +29,13 @@ module VX_gpr ( ); `else assign write_enable = valid_write_request && ((writeback_if.wb != 0)); - wire going_to_write = write_enable & (|writeback_if.wb_valid); + wire going_to_write = write_enable & (| writeback_if.wb_valid); wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; - genvar curr_t; - for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin - wire local_write = write_enable & writeback_if.wb_valid[curr_t]; - assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}}; + genvar i; + for (i = 0; i < `NUM_THREADS; i=i+1) begin + wire local_write = write_enable & writeback_if.wb_valid[i]; + assign write_bit_mask[i] = {`NUM_GPRS{~local_write}}; end // wire cenb = !going_to_write; @@ -50,14 +50,11 @@ module VX_gpr ( wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b; `ifndef SYN - genvar thread; - genvar curr_bit; - for (thread = 0; thread < `NUM_THREADS; thread = thread + 1) - begin - for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1) - begin - assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit]; - assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit]; + genvar j; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin + for (j = 0; j < `NUM_GPRS; j=j+1) begin + assign a_reg_data[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j]; + assign b_reg_data[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j]; end end `else @@ -67,8 +64,7 @@ module VX_gpr ( wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0; - genvar curr_base_thread; - for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) + for (i = 0; i < 'NT; i=i+4) begin `IGNORE_WARNINGS_BEGIN rf2_32x128_wm1 first_ram ( @@ -77,17 +73,17 @@ module VX_gpr ( .CENYB(), .WENYB(), .AYB(), - .QA(temp_a[(curr_base_thread+3):(curr_base_thread)]), + .QA(temp_a[(i+3):(i)]), .SOA(), .SOB(), .CLKA(clk), .CENA(cena_1), - .AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]), + .AA(gpr_read_if.rs1[(i+3):(i)]), .CLKB(clk), .CENB(cenb), - .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), - .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), + .WENB(write_bit_mask[(i+3):(i)]), + .AB(writeback_if.rd[(i+3):(i)]), + .DB(to_write[(i+3):(i)]), .EMAA(3'b011), .EMASA(1'b0), .EMAB(3'b011), @@ -116,17 +112,17 @@ module VX_gpr ( .CENYB(), .WENYB(), .AYB(), - .QA(temp_b[(curr_base_thread+3):(curr_base_thread)]), + .QA(temp_b[(i+3):(i)]), .SOA(), .SOB(), .CLKA(clk), .CENA(cena_2), - .AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]), + .AA(gpr_read_if.rs2[(i+3):(i)]), .CLKB(clk), .CENB(cenb), - .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), - .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), - .DB(to_write[(curr_base_thread+3):(curr_base_thread)]), + .WENB(write_bit_mask[(i+3):(i)]), + .AB(writeback_if.rd[(i+3):(i)]), + .DB(to_write[(i+3):(i)]), .EMAA(3'b011), .EMASA(1'b0), .EMAB(3'b011), diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index 65b8f81b..b34bd8f0 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -20,13 +20,13 @@ module VX_gpr_ram ( //-- end else begin if (we) begin - integer t; - for (t = 0; t < `NUM_THREADS; t = t + 1) begin - if (be[t]) begin - ram[waddr][t][0] <= wdata[t][7:0]; - ram[waddr][t][1] <= wdata[t][15:8]; - ram[waddr][t][2] <= wdata[t][23:16]; - ram[waddr][t][3] <= wdata[t][31:24]; + integer i; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin + if (be[i]) begin + ram[waddr][i][0] <= wdata[i][7:0]; + ram[waddr][i][1] <= wdata[i][15:8]; + ram[waddr][i][2] <= wdata[i][23:16]; + ram[waddr][i][3] <= wdata[i][31:24]; end end end diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index f47b58cd..3bdaaaf4 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -59,9 +59,6 @@ module VX_gpr_stage ( .b_reg_data (gpr_datf_if.b_reg_data) ); - // assign bckE_req_if.is_csr = is_csr; - // assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0]; - // Outputs VX_exec_unit_req_if exec_unit_req_temp_if(); VX_lsu_req_if lsu_req_temp_if(); @@ -77,7 +74,7 @@ module VX_gpr_stage ( .csr_req_if (csr_req_temp_if) ); `DEBUG_BEGIN - wire is_lsu = (|lsu_req_temp_if.valid); + wire is_lsu = (| lsu_req_temp_if.valid); `DEBUG_END wire stall_rest = 0; wire flush_rest = schedule_delay; @@ -88,7 +85,7 @@ module VX_gpr_stage ( wire stall_exec = exec_delay; wire flush_exec = schedule_delay && !stall_exec; - wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid); + wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (| bckE_req_if.valid); assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr; @@ -149,8 +146,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_exec), .flush (flush_exec), - .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), - .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); assign exec_unit_req_if.a_reg_data = real_base_address; @@ -202,8 +199,8 @@ module VX_gpr_stage ( .reset (reset), .stall (stall_exec), .flush (flush_exec), - .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), - .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) + .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), + .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) ); VX_generic_register #( diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index 02f06cc4..2f22deb7 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -15,10 +15,10 @@ module VX_gpr_wrapper ( wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data; wire[`NUM_THREADS-1:0][31:0] jal_data; - genvar index; + genvar i; generate - for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign - assign jal_data[index] = gpr_jal_if.curr_PC; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign + assign jal_data[i] = gpr_jal_if.curr_PC; end endgenerate @@ -46,22 +46,19 @@ module VX_gpr_wrapper ( `endif - genvar warp_index; - generate - - for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs - wire valid_write_request = warp_index == writeback_if.warp_num; + generate + for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs + wire valid_write_request = i == writeback_if.warp_num; VX_gpr gpr( .clk (clk), .reset (reset), .valid_write_request (valid_write_request), .gpr_read_if (gpr_read_if), .writeback_if (writeback_if), - .a_reg_data (temp_a_reg_data[warp_index]), - .b_reg_data (temp_b_reg_data[warp_index]) + .a_reg_data (temp_a_reg_data[i]), + .b_reg_data (temp_b_reg_data[i]) ); end - endgenerate endmodule diff --git a/hw/rtl/VX_gpu_inst.v b/hw/rtl/VX_gpu_inst.v index 78497c45..4029c17d 100644 --- a/hw/rtl/VX_gpu_inst.v +++ b/hw/rtl/VX_gpu_inst.v @@ -13,31 +13,29 @@ module VX_gpu_inst ( wire[`NUM_THREADS-1:0] tmc_new_mask; wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0]; - genvar curr_t; + genvar i; generate - for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init - assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0]; + for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init + assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0]; end endgenerate - wire valid_inst = (|curr_valids); + wire valid_inst = (| curr_valids); assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num; assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst; assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0; - // assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst; - assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0); + assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0); wire wspawn = gpu_inst_req_if.is_wspawn; wire[31:0] wspawn_pc = gpu_inst_req_if.rd2; wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0]; wire[`NUM_WARPS-1:0] wspawn_new_active; - genvar curr_w; generate - for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init - assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0]; + for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init + assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0]; end endgenerate @@ -57,14 +55,11 @@ module VX_gpu_inst ( wire[`NUM_THREADS-1:0] split_new_use_mask; wire[`NUM_THREADS-1:0] split_new_later_mask; - // VX_gpu_inst_req.pc - genvar curr_s_t; generate - for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init - wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1); - - assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); - assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool); + for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init + wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1); + assign split_new_use_mask[i] = curr_valids[i] & (curr_bool); + assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool); end endgenerate diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index d507483f..f2329ef1 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -16,7 +16,7 @@ module VX_icache_stage ( reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0]; - wire valid_inst = (|fe_inst_meta_fi.valid); + wire valid_inst = (| fe_inst_meta_fi.valid); // Icache Request assign icache_req_if.core_req_valid = valid_inst && !total_freeze; @@ -45,11 +45,12 @@ module VX_icache_stage ( // Core can't accept response assign icache_rsp_if.core_rsp_ready = ~total_freeze; - integer w; + integer i; + always @(posedge clk) begin if (reset) begin - for (w = 0; w < `NUM_WARPS; w = w + 1) begin - threads_active[w] <= 0; + for (i = 0; i < `NUM_WARPS; i = i + 1) begin + threads_active[i] <= 0; end end else begin if (valid_inst && !icache_stage_delay) begin diff --git a/hw/rtl/VX_inst_multiplex.v b/hw/rtl/VX_inst_multiplex.v index 90c5449a..199fd83c 100644 --- a/hw/rtl/VX_inst_multiplex.v +++ b/hw/rtl/VX_inst_multiplex.v @@ -21,12 +21,12 @@ module VX_inst_multiplex ( wire is_csr = bckE_req_if.is_csr; // wire is_gpu = 0; - genvar currT; + genvar i; generate - for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init - assign is_mem_mask[currT] = is_mem; - assign is_gpu_mask[currT] = is_gpu; - assign is_csr_mask[currT] = is_csr; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init + assign is_mem_mask[i] = is_mem; + assign is_gpu_mask[i] = is_gpu; + assign is_csr_mask[i] = is_csr; end endgenerate @@ -64,7 +64,7 @@ module VX_inst_multiplex ( assign exec_unit_req_if.jalQual = bckE_req_if.jalQual; assign exec_unit_req_if.jal = bckE_req_if.jal; assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset; - assign exec_unit_req_if.ebreak = bckE_req_if.ebreak; + assign exec_unit_req_if.is_etype = bckE_req_if.is_etype; // GPR Req diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 638b06d2..f7e1dfd6 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -61,10 +61,10 @@ module VX_lsu_unit ( assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag; /*always_comb begin - if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin + if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin $display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data); end - if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin + if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin $display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data); end end*/ diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index bd1af774..d58fdce8 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -18,7 +18,7 @@ module VX_scheduler ( reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; - wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.valid) && (writeback_if.rd != 0); + wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0); wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0; @@ -42,7 +42,7 @@ module VX_scheduler ( wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; - assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid)) + assign schedule_delay = ((rename_valid) && (| bckE_req_if.valid)) || (memory_delay && is_mem) || (gpr_stage_delay && (is_mem || is_exec)) || (exec_delay && is_exec); diff --git a/hw/rtl/VX_warp.v b/hw/rtl/VX_warp.v index c6be7442..2433d950 100644 --- a/hw/rtl/VX_warp.v +++ b/hw/rtl/VX_warp.v @@ -25,12 +25,12 @@ module VX_warp ( reg [`NUM_THREADS-1:0] valid_t; reg [`NUM_THREADS-1:0] valid_zero; - integer ti; + integer i; initial begin real_PC = 0; - for (ti = 1; ti < `NUM_THREADS; ti=ti+1) begin - valid_t[ti] = 0; // Thread 1 active - valid_zero[ti] = 0; + for (i = 1; i < `NUM_THREADS; i=i+1) begin + valid_t[i] = 0; // Thread 1 active + valid_zero[i] = 0; end valid_t = 1; valid_zero[0] = 0; @@ -44,10 +44,10 @@ module VX_warp ( end end - genvar tv; + genvar i; generate - for (tv = 0; tv < `NUM_THREADS; tv = tv+1) begin : valid_assign - assign valid[tv] = change_mask ? thread_mask[tv] : stall ? 1'b0 : valid_t[tv]; + for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign + assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i]; end endgenerate diff --git a/hw/rtl/VX_warp_sched.v b/hw/rtl/VX_warp_sched.v index 246b3290..40f96071 100644 --- a/hw/rtl/VX_warp_sched.v +++ b/hw/rtl/VX_warp_sched.v @@ -56,7 +56,7 @@ module VX_warp_sched ( output wire[`NUM_THREADS-1:0] thread_mask, output wire[`NW_BITS-1:0] warp_num, output wire[31:0] warp_pc, - output wire ebreak, + output wire busy, output wire scheduled_warp, input wire[`NW_BITS-1:0] icache_stage_wid, @@ -162,14 +162,14 @@ module VX_warp_sched ( warp_pcs[join_warp_num] <= join_pc; end thread_masks[join_warp_num] <= join_tm; - didnt_split <= 0; + didnt_split <= 0; end else if (is_split) begin - warp_stalled[split_warp_num] <= 0; + warp_stalled[split_warp_num] <= 0; if (!dont_split) begin thread_masks[split_warp_num] <= split_new_mask; - didnt_split <= 0; + didnt_split <= 0; end else begin - didnt_split <= 1; + didnt_split <= 1; end end @@ -218,7 +218,7 @@ module VX_warp_sched ( warp_lock[warp_num] <= 1'b1; // warp_lock <= {`NUM_WARPS{1'b1}}; end - if (|icache_stage_valids && !stall) begin + if ((| icache_stage_valids) && !stall) begin warp_lock[icache_stage_wid] <= 1'b0; // warp_lock <= {`NUM_WARPS{1'b0}}; end @@ -251,15 +251,6 @@ module VX_warp_sched ( assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3]; - // integer curr_b; - // always @(*) begin - // total_barrier_stall = 0; - // for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1) - // begin - // total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b]; - // end - // end - assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join); wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]}; @@ -267,11 +258,11 @@ module VX_warp_sched ( assign {join_fall, join_pc, join_tm} = d[join_warp_num]; - genvar curr_warp; + genvar i; generate - for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks - wire correct_warp_s = (curr_warp == split_warp_num); - wire correct_warp_j = (curr_warp == join_warp_num); + for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks + wire correct_warp_s = (i == split_warp_num); + wire correct_warp_j = (i == join_warp_num); wire push = (is_split && !dont_split) && correct_warp_s; wire pop = is_join && correct_warp_j; @@ -284,7 +275,7 @@ module VX_warp_sched ( .reset(reset), .push (push), .pop (pop), - .d (d[curr_warp]), + .d (d[i]), .q1 (q1), .q2 (q2) ); @@ -330,6 +321,6 @@ module VX_warp_sched ( // $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]); // end - assign ebreak = (warp_active == 0); + assign busy = (warp_active != 0); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 7c55e5a2..ed64cde2 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -22,9 +22,9 @@ module VX_writeback ( VX_wb_if writeback_tmp_if(); - wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.valid); - wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.valid); - wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid); + wire exec_wb = (inst_exec_wb_if.wb != 0) && (| inst_exec_wb_if.valid); + wire mem_wb = (mem_wb_if.wb != 0) && (| mem_wb_if.valid); + wire csr_wb = (csr_wb_if.wb != 0) && (| csr_wb_if.valid); assign no_slot_mem = mem_wb && (exec_wb || csr_wb); assign no_slot_csr = csr_wb && (exec_wb); @@ -78,7 +78,7 @@ module VX_writeback ( reg [31:0] last_data_wb /* verilator public */; always @(posedge clk) begin - if ((|writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin + if ( (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin last_data_wb <= use_wb_data[0]; end end diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 35e0939e..3a975bd0 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -56,7 +56,8 @@ module Vortex #( input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, - // Debug + // Status + output wire busy, output wire ebreak ); `DEBUG_BEGIN @@ -157,100 +158,101 @@ module Vortex #( assign icache_dram_rsp_if.dram_rsp_tag = I_dram_rsp_tag; assign I_dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready; -/////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// -// Front-end to Back-end -VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM + // Front-end to Back-end + VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM -// Back-end to Front-end -VX_wb_if writeback_if(); // Writeback to GPRs -VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch -VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch + // Back-end to Front-end + VX_wb_if writeback_if(); // Writeback to GPRs + VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch + VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch -// Warp controls -VX_warp_ctl_if warp_ctl_if(); + // Warp controls + VX_warp_ctl_if warp_ctl_if(); -// Cache snooping -VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); + // Cache snooping + VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); -assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid; -assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr; -assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready; + assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid; + assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr; + assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready; -VX_front_end front_end ( - .clk (clk), - .reset (reset), - .warp_ctl_if (warp_ctl_if), - .bckE_req_if (bckE_req_if), - .schedule_delay (schedule_delay), - .icache_rsp_if (icache_core_rsp_if), - .icache_req_if (icache_core_req_if), - .jal_rsp_if (jal_rsp_if), - .branch_rsp_if (branch_rsp_if), - .fetch_ebreak (ebreak) -); + VX_front_end front_end ( + .clk (clk), + .reset (reset), + .warp_ctl_if (warp_ctl_if), + .bckE_req_if (bckE_req_if), + .schedule_delay (schedule_delay), + .icache_rsp_if (icache_core_rsp_if), + .icache_req_if (icache_core_req_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), + .busy (busy) + ); -VX_scheduler scheduler ( - .clk (clk), - .reset (reset), - .memory_delay (memory_delay), - .exec_delay (exec_delay), - .gpr_stage_delay(gpr_stage_delay), - .bckE_req_if (bckE_req_if), - .writeback_if (writeback_if), - .schedule_delay (schedule_delay), - .is_empty (scheduler_empty) -); + VX_scheduler scheduler ( + .clk (clk), + .reset (reset), + .memory_delay (memory_delay), + .exec_delay (exec_delay), + .gpr_stage_delay(gpr_stage_delay), + .bckE_req_if (bckE_req_if), + .writeback_if (writeback_if), + .schedule_delay (schedule_delay), + .is_empty (scheduler_empty) + ); -VX_back_end #( - .CORE_ID(CORE_ID) -) back_end ( - .clk (clk), - .reset (reset), - .schedule_delay (schedule_delay), - .warp_ctl_if (warp_ctl_if), - .bckE_req_if (bckE_req_if), - .jal_rsp_if (jal_rsp_if), - .branch_rsp_if (branch_rsp_if), - .dcache_req_if (dcache_io_core_req_if), - .dcache_rsp_if (dcache_io_core_rsp_if), - .writeback_if (writeback_if), - .mem_delay (memory_delay), - .exec_delay (exec_delay), - .gpr_stage_delay (gpr_stage_delay) -); + VX_back_end #( + .CORE_ID(CORE_ID) + ) back_end ( + .clk (clk), + .reset (reset), + .schedule_delay (schedule_delay), + .warp_ctl_if (warp_ctl_if), + .bckE_req_if (bckE_req_if), + .jal_rsp_if (jal_rsp_if), + .branch_rsp_if (branch_rsp_if), + .dcache_req_if (dcache_io_core_req_if), + .dcache_rsp_if (dcache_io_core_rsp_if), + .writeback_if (writeback_if), + .mem_delay (memory_delay), + .exec_delay (exec_delay), + .gpr_stage_delay (gpr_stage_delay), + .ebreak (ebreak) + ); -VX_dmem_ctrl dmem_ctrl ( - .clk (clk), - .reset (reset), + VX_dmem_ctrl dmem_ctrl ( + .clk (clk), + .reset (reset), - // Core <-> Dcache - .dcache_core_req_if (dcache_core_req_if), - .dcache_core_rsp_if (dcache_core_rsp_if), + // Core <-> Dcache + .dcache_core_req_if (dcache_core_req_if), + .dcache_core_rsp_if (dcache_core_rsp_if), - // Dram <-> Dcache - .dcache_dram_req_if (dcache_dram_req_if), - .dcache_dram_rsp_if (dcache_dram_rsp_if), - .dcache_snp_req_if (dcache_snp_req_if), + // Dram <-> Dcache + .dcache_dram_req_if (dcache_dram_req_if), + .dcache_dram_rsp_if (dcache_dram_rsp_if), + .dcache_snp_req_if (dcache_snp_req_if), - // Core <-> Icache - .icache_core_req_if (icache_core_req_if), - .icache_core_rsp_if (icache_core_rsp_if), + // Core <-> Icache + .icache_core_req_if (icache_core_req_if), + .icache_core_rsp_if (icache_core_rsp_if), - // Dram <-> Icache - .icache_dram_req_if (icache_dram_req_if), - .icache_dram_rsp_if (icache_dram_rsp_if) -); + // Dram <-> Icache + .icache_dram_req_if (icache_dram_req_if), + .icache_dram_rsp_if (icache_dram_rsp_if) + ); -VX_dcache_io_arb dcache_io_arb ( - .io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR), - .core_req_if (dcache_io_core_req_if), - .dcache_core_req_if (dcache_core_req_if), - .io_core_req_if (io_core_req_if), - .dcache_core_rsp_if (dcache_core_rsp_if), - .io_core_rsp_if (io_core_rsp_if), - .core_rsp_if (dcache_io_core_rsp_if) -); + VX_dcache_io_arb dcache_io_arb ( + .io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR), + .core_req_if (dcache_io_core_req_if), + .dcache_core_req_if (dcache_core_req_if), + .io_core_req_if (io_core_req_if), + .dcache_core_rsp_if (dcache_core_rsp_if), + .io_core_rsp_if (io_core_rsp_if), + .core_rsp_if (dcache_io_core_rsp_if) + ); endmodule // Vortex diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index c7e4664b..506578b4 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -42,7 +42,8 @@ module Vortex_Cluster #( input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, - // Debug + // Status + output wire busy, output wire ebreak ); wire[`NUM_CORES-1:0] per_core_D_dram_req_read; @@ -83,6 +84,7 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0] per_core_io_rsp_ready; `IGNORE_WARNINGS_END + wire[`NUM_CORES-1:0] per_core_busy; wire[`NUM_CORES-1:0] per_core_ebreak; genvar i; @@ -92,48 +94,49 @@ module Vortex_Cluster #( ) vortex_core ( .clk (clk), .reset (reset), - .D_dram_req_read (per_core_D_dram_req_read [i]), - .D_dram_req_write (per_core_D_dram_req_write [i]), - .D_dram_req_addr (per_core_D_dram_req_addr [i]), - .D_dram_req_data (per_core_D_dram_req_data [i]), - .D_dram_req_tag (per_core_D_dram_req_tag [i]), - .D_dram_req_ready (per_core_D_dram_req_ready [i]), - .D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]), - .D_dram_rsp_data (per_core_D_dram_rsp_data [i]), - .D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]), - .D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]), - .I_dram_req_read (per_core_I_dram_req_read [i]), + .D_dram_req_read (per_core_D_dram_req_read [i]), + .D_dram_req_write (per_core_D_dram_req_write [i]), + .D_dram_req_addr (per_core_D_dram_req_addr [i]), + .D_dram_req_data (per_core_D_dram_req_data [i]), + .D_dram_req_tag (per_core_D_dram_req_tag [i]), + .D_dram_req_ready (per_core_D_dram_req_ready [i]), + .D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]), + .D_dram_rsp_data (per_core_D_dram_rsp_data [i]), + .D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]), + .D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]), + .I_dram_req_read (per_core_I_dram_req_read [i]), `IGNORE_WARNINGS_BEGIN .I_dram_req_write (), `IGNORE_WARNINGS_END - .I_dram_req_addr (per_core_I_dram_req_addr [i]), - .I_dram_req_data (per_core_I_dram_req_data [i]), - .I_dram_req_tag (per_core_I_dram_req_tag [i]), - .I_dram_req_ready (per_core_I_dram_req_ready [i]), - .I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]), - .I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]), - .I_dram_rsp_data (per_core_I_dram_rsp_data [i]), - .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), + .I_dram_req_addr (per_core_I_dram_req_addr [i]), + .I_dram_req_data (per_core_I_dram_req_data [i]), + .I_dram_req_tag (per_core_I_dram_req_tag [i]), + .I_dram_req_ready (per_core_I_dram_req_ready [i]), + .I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]), + .I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]), + .I_dram_rsp_data (per_core_I_dram_rsp_data [i]), + .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), .llc_snp_req_valid (snp_fwd_valid), .llc_snp_req_addr (snp_fwd_addr), - .llc_snp_req_ready (per_core_snp_fwd_ready [i]), + .llc_snp_req_ready (per_core_snp_fwd_ready [i]), - .io_req_read (per_core_io_req_read [i]), - .io_req_write (per_core_io_req_write [i]), - .io_req_addr (per_core_io_req_addr [i]), - .io_req_data (per_core_io_req_data [i]), - .io_req_byteen (per_core_io_req_byteen [i]), - .io_req_tag (per_core_io_req_tag [i]), + .io_req_read (per_core_io_req_read [i]), + .io_req_write (per_core_io_req_write [i]), + .io_req_addr (per_core_io_req_addr [i]), + .io_req_data (per_core_io_req_data [i]), + .io_req_byteen (per_core_io_req_byteen [i]), + .io_req_tag (per_core_io_req_tag [i]), .io_req_ready (io_req_ready), .io_rsp_valid (io_rsp_valid), .io_rsp_data (io_rsp_data), .io_rsp_tag (io_rsp_tag), - .io_rsp_ready (per_core_io_rsp_ready [i]), + .io_rsp_ready (per_core_io_rsp_ready [i]), - .ebreak (per_core_ebreak [i]) + .busy (per_core_busy [i]), + .ebreak (per_core_ebreak [i]) ); end @@ -145,7 +148,8 @@ module Vortex_Cluster #( assign io_req_tag = per_core_io_req_tag[0]; assign io_rsp_ready = per_core_io_rsp_ready[0]; - + + assign busy = (| per_core_busy); assign ebreak = (& per_core_ebreak); if (`L2_ENABLE) begin @@ -184,8 +188,8 @@ module Vortex_Cluster #( assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; - assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready; - assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready; + assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready; + assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready; assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i]; assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1]; @@ -221,7 +225,7 @@ module Vortex_Cluster #( .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE), .DRAM_ENABLE (1), .WRITE_ENABLE (1), - .SNOOP_FORWARDING_ENABLE(1), + .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index 01f6ecbb..7a2d379f 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -40,7 +40,8 @@ module Vortex_Socket ( input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, - // Debug + // Status + output wire busy, output wire ebreak ); if (`NUM_CLUSTERS == 1) begin @@ -80,6 +81,7 @@ module Vortex_Socket ( .io_rsp_tag (io_rsp_tag), .io_rsp_ready (io_rsp_ready), + .busy (busy), .ebreak (ebreak) ); @@ -112,6 +114,7 @@ module Vortex_Socket ( wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; `IGNORE_WARNINGS_END + wire[`NUM_CLUSTERS-1:0] per_cluster_busy; wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak; genvar i; @@ -151,6 +154,7 @@ module Vortex_Socket ( .io_rsp_tag (io_rsp_tag), .io_rsp_ready (per_cluster_io_rsp_ready [i]), + .busy (per_cluster_busy [i]), .ebreak (per_cluster_ebreak [i]) ); end @@ -164,6 +168,7 @@ module Vortex_Socket ( assign io_rsp_ready = per_cluster_io_rsp_ready[0]; + assign busy = (| per_cluster_busy); assign ebreak = (& per_cluster_ebreak); // L3 Cache /////////////////////////////////////////////////////////// @@ -219,7 +224,7 @@ module Vortex_Socket ( .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .DRAM_ENABLE (1), .WRITE_ENABLE (1), - .SNOOP_FORWARDING_ENABLE(1), + .SNOOP_FORWARDING (1), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_ID_BITS (0), .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 2a6585b5..93dd3bcf 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -11,7 +11,7 @@ module VX_bank #( parameter WORD_SIZE = 4, // Number of Word requests per cycle {1, 2, 4, 8, ...} parameter NUM_REQUESTS = 2, - // Number of cycles to complete stage 1 (read from memory) + // Number of cycles to complete i 1 (read from memory) parameter STAGE_1_CYCLES = 2, // Queues feeding into banks Knobs {1, 2, 4, 8, ...} @@ -46,7 +46,7 @@ module VX_bank #( parameter DRAM_ENABLE = 1, // Enable snoop forwarding - parameter SNOOP_FORWARDING_ENABLE = 0, + parameter SNOOP_FORWARDING = 0, // core request tag size parameter CORE_TAG_WIDTH = 1, @@ -108,7 +108,7 @@ module VX_bank #( if (reset) begin snoop_state <= 0; end else begin - snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE; + snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING; end end @@ -169,7 +169,7 @@ module VX_bank #( wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0; - assign reqq_push = core_req_ready && (|core_req_valids); + assign reqq_push = core_req_ready && (| core_req_valids); VX_cache_req_queue #( .CACHE_SIZE (CACHE_SIZE), @@ -241,16 +241,16 @@ module VX_bank #( wire stall_bank_pipe; reg is_fill_in_pipe; - wire is_fill_st1 [STAGE_1_CYCLES-1:0]; + wire is_fill_st1 [STAGE_1_CYCLES-1:0]; `DEBUG_BEGIN - wire going_to_write_st1[STAGE_1_CYCLES-1:0]; + wire going_to_write_st1 [STAGE_1_CYCLES-1:0]; `DEBUG_END - integer i; + integer j; always @(*) begin is_fill_in_pipe = 0; - for (i = 0; i < STAGE_1_CYCLES; i=i+1) begin - if (is_fill_st1[i]) begin + for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin + if (is_fill_st1[j]) begin is_fill_in_pipe = 1; end end @@ -327,8 +327,8 @@ module VX_bank #( .out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) ); - genvar stage; - for (stage = 1; stage < STAGE_1_CYCLES; stage = stage + 1) begin + genvar i; + for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin VX_generic_register #( .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + (`BANK_LINE_WORDS*`WORD_WIDTH)) ) s0_1_cc ( @@ -336,8 +336,8 @@ module VX_bank #( .reset(reset), .stall(stall_bank_pipe), .flush(0), - .in ({is_snp_st1[stage-1], going_to_write_st1[stage-1], valid_st1[stage-1], addr_st1[stage-1], wsel_st1[stage-1], writeword_st1[stage-1], inst_meta_st1[stage-1], is_fill_st1[stage-1], writedata_st1[stage-1]}), - .out ({is_snp_st1[stage], going_to_write_st1[stage], valid_st1[stage], addr_st1[stage], wsel_st1[stage], writeword_st1[stage], inst_meta_st1[stage], is_fill_st1[stage], writedata_st1[stage]}) + .in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), + .out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) ); end @@ -506,9 +506,10 @@ module VX_bank #( ); // Enqueue to CWB Queue + // TODO: should investigae the need for "SNOOP_FORWARDING" here wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full - && !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO)) + && !(SNOOP_FORWARDING && (miss_add_mem_write == `BYTE_EN_NO)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) @@ -554,7 +555,7 @@ module VX_bank #( wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data; - if (SNOOP_FORWARDING_ENABLE) begin + if (SNOOP_FORWARDING) begin assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; end else begin diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 9e4ef24a..759201b2 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -47,7 +47,7 @@ module VX_cache #( parameter DRAM_ENABLE = 1, // Enable snoop forwarding - parameter SNOOP_FORWARDING_ENABLE = 0, + parameter SNOOP_FORWARDING = 0, // Prefetcher parameter PRFQ_SIZE = 64, @@ -135,9 +135,9 @@ module VX_cache #( `DEBUG_END assign dram_req_tag = dram_req_addr; - assign core_req_ready = ~(|per_bank_reqq_full); - assign snp_req_ready = ~(|per_bank_snp_req_full); - assign dram_rsp_ready = (|per_bank_dram_fill_rsp_ready); + assign core_req_ready = ~(| per_bank_reqq_full); + assign snp_req_ready = ~(| per_bank_snp_req_full); + assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); VX_cache_core_req_bank_sel #( .CACHE_SIZE (CACHE_SIZE), @@ -265,7 +265,7 @@ module VX_cache #( .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .DRAM_ENABLE (DRAM_ENABLE), .WRITE_ENABLE (WRITE_ENABLE), - .SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE), + .SNOOP_FORWARDING (SNOOP_FORWARDING), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) ) bank ( diff --git a/hw/rtl/cache/VX_cache_dfq_queue.v b/hw/rtl/cache/VX_cache_dfq_queue.v index 080b49d1..d1fad1ed 100644 --- a/hw/rtl/cache/VX_cache_dfq_queue.v +++ b/hw/rtl/cache/VX_cache_dfq_queue.v @@ -63,8 +63,8 @@ module VX_cache_dfq_queue #( wire o_empty; - wire use_empty = !(|use_per_bank_dram_fill_req_valid); - wire out_empty = !(|out_per_bank_dram_fill_req_valid) || o_empty; + wire use_empty = !(| use_per_bank_dram_fill_req_valid); + wire out_empty = !(| out_per_bank_dram_fill_req_valid) || o_empty; wire push_qual = dfqq_push && !dfqq_full; wire pop_qual = dfqq_pop && use_empty && !out_empty; diff --git a/hw/rtl/cache/VX_cache_dram_req_arb.v b/hw/rtl/cache/VX_cache_dram_req_arb.v index 8b060622..dbe16f49 100644 --- a/hw/rtl/cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/cache/VX_cache_dram_req_arb.v @@ -98,7 +98,7 @@ module VX_cache_dram_req_arb #( `DEBUG_END wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop - wire dfqq_push = (|per_bank_dram_fill_req_valid); + wire dfqq_push = (| per_bank_dram_fill_req_valid); VX_cache_dfq_queue cache_dfq_queue( .clk (clk), diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 48de1238..eb8e1440 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -90,10 +90,10 @@ module VX_cache_miss_resrv #( wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; reg [MRVQ_SIZE-1:0] make_ready; - genvar curr_e; + genvar i; generate - for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin - assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e] && (addr_table[curr_e] == fill_addr_st1); + for (i = 0; i < MRVQ_SIZE; i=i+1) begin + assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1); end endgenerate @@ -107,7 +107,7 @@ module VX_cache_miss_resrv #( wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2); wire mrvq_pop = miss_resrv_pop && dequeue_possible; - wire update_ready = (|make_ready); + wire update_ready = (| make_ready); always @(posedge clk) begin if (reset) begin diff --git a/hw/rtl/cache/VX_cache_req_queue.v b/hw/rtl/cache/VX_cache_req_queue.v index 5c9e2e66..2325e606 100644 --- a/hw/rtl/cache/VX_cache_req_queue.v +++ b/hw/rtl/cache/VX_cache_req_queue.v @@ -97,8 +97,8 @@ module VX_cache_req_queue #( wire o_empty; - wire use_empty = !(|use_per_valids); - wire out_empty = !(|out_per_valids) || o_empty; + wire use_empty = !(| use_per_valids); + wire out_empty = !(| out_per_valids) || o_empty; wire push_qual = reqq_push && !reqq_full; wire pop_qual = !out_empty && use_empty; diff --git a/hw/rtl/cache/VX_fill_invalidator.v b/hw/rtl/cache/VX_fill_invalidator.v index f64cffb0..e7400b89 100644 --- a/hw/rtl/cache/VX_fill_invalidator.v +++ b/hw/rtl/cache/VX_fill_invalidator.v @@ -60,10 +60,10 @@ module VX_fill_invalidator #( reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill; wire matched; - integer fi; + integer i; always @(*) begin - for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin - matched_fill[fi] = fills_active[fi] && (fills_address[fi] == fill_addr); + for (i = 0; i < FILL_INVALIDAOR_SIZE; i+=1) begin + matched_fill[i] = fills_active[i] && (fills_address[i] == fill_addr); end end diff --git a/hw/rtl/cache/VX_tag_data_structure.v b/hw/rtl/cache/VX_tag_data_structure.v index e30968dd..774aeab2 100644 --- a/hw/rtl/cache/VX_tag_data_structure.v +++ b/hw/rtl/cache/VX_tag_data_structure.v @@ -68,7 +68,7 @@ module VX_tag_data_structure #( assign read_tag = tag [read_addr]; assign read_data = data [read_addr]; - wire going_to_write = (|write_enable); + wire going_to_write = (| write_enable); integer i; always @(posedge clk) begin diff --git a/hw/rtl/interfaces/VX_exec_unit_req_if.v b/hw/rtl/interfaces/VX_exec_unit_req_if.v index 53e400f4..29a55ebd 100644 --- a/hw/rtl/interfaces/VX_exec_unit_req_if.v +++ b/hw/rtl/interfaces/VX_exec_unit_req_if.v @@ -33,10 +33,8 @@ interface VX_exec_unit_req_if (); wire jal; wire [31:0] jal_offset; -`IGNORE_WARNINGS_BEGIN - wire ebreak; + wire is_etype; wire wspawn; -`IGNORE_WARNINGS_END // CSR info wire is_csr; diff --git a/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v b/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v index aa2c937f..6c6ecdfa 100644 --- a/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v +++ b/hw/rtl/interfaces/VX_frE_to_bckE_req_if.v @@ -21,9 +21,7 @@ interface VX_frE_to_bckE_req_if (); wire [2:0] branch_type; wire [19:0] upper_immed; wire [31:0] curr_PC; -`IGNORE_WARNINGS_BEGIN - wire ebreak; -`IGNORE_WARNINGS_END + wire is_etype; wire jalQual; wire jal; wire [31:0] jal_offset; diff --git a/hw/rtl/interfaces/VX_warp_ctl_if.v b/hw/rtl/interfaces/VX_warp_ctl_if.v index b16eb95b..a8169d0c 100644 --- a/hw/rtl/interfaces/VX_warp_ctl_if.v +++ b/hw/rtl/interfaces/VX_warp_ctl_if.v @@ -14,7 +14,7 @@ interface VX_warp_ctl_if (); wire [31:0] wspawn_pc; wire [`NUM_WARPS-1:0] wspawn_new_active; - wire ebreak; + wire whalt; // barrier wire is_barrier; diff --git a/hw/rtl/libs/VX_divide.v b/hw/rtl/libs/VX_divide.v index 1e274da4..1098fecf 100644 --- a/hw/rtl/libs/VX_divide.v +++ b/hw/rtl/libs/VX_divide.v @@ -65,16 +65,16 @@ module VX_divide #( reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1]; reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1]; - genvar pipe_stage; - for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages + genvar i; + for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages always @(posedge clock or posedge aclr) begin if (aclr) begin - numer_pipe[pipe_stage+1] <= 0; - denom_pipe[pipe_stage+1] <= 0; + numer_pipe[i+1] <= 0; + denom_pipe[i+1] <= 0; end else if (clken) begin - numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage]; - denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage]; + numer_pipe[i+1] <= numer_pipe[i]; + denom_pipe[i+1] <= denom_pipe[i]; end end end diff --git a/hw/rtl/libs/VX_mult.v b/hw/rtl/libs/VX_mult.v index b4b48288..3d689bfa 100644 --- a/hw/rtl/libs/VX_mult.v +++ b/hw/rtl/libs/VX_mult.v @@ -83,16 +83,16 @@ module VX_mult #( reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; - genvar pipe_stage; - for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages + genvar i; + for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages always @(posedge clock or posedge aclr) begin if (aclr) begin - dataa_pipe[pipe_stage+1] <= 0; - datab_pipe[pipe_stage+1] <= 0; + dataa_pipe[i+1] <= 0; + datab_pipe[i+1] <= 0; end else if (clken) begin - dataa_pipe[pipe_stage+1] <= dataa_pipe[pipe_stage]; - datab_pipe[pipe_stage+1] <= datab_pipe[pipe_stage]; + dataa_pipe[i+1] <= dataa_pipe[i]; + datab_pipe[i+1] <= datab_pipe[i]; end end end diff --git a/hw/rtl/pipe_regs/VX_d_e_reg.v b/hw/rtl/pipe_regs/VX_d_e_reg.v index c44fbca2..41339e4d 100644 --- a/hw/rtl/pipe_regs/VX_d_e_reg.v +++ b/hw/rtl/pipe_regs/VX_d_e_reg.v @@ -19,8 +19,8 @@ module VX_d_e_reg ( .reset (reset), .stall (stall), .flush (flush), - .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), - .out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) + .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), + .out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) ); endmodule diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 11c40f7a..51ba9e6e 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -141,7 +141,7 @@ void Simulator::wait(uint32_t cycles) { } bool Simulator::is_busy() { - return (0 == vortex_->ebreak); + return vortex_->busy; } void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { @@ -174,7 +174,8 @@ bool Simulator::run() { this->reset(); // execute program - while (!vortex_->ebreak) { + while (vortex_->busy + && !vortex_->ebreak) { this->step(); }