From e2100e9e87760c77a61030773bd03189516d6585 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Mon, 20 Jul 2020 09:38:54 -0400 Subject: [PATCH] pipeline refactoring --- hw/rtl/VX_alu_unit.v | 56 +++++++++- hw/rtl/VX_branch_unit.v | 82 --------------- hw/rtl/VX_commit.v | 16 +-- hw/rtl/VX_decode.v | 27 +++-- hw/rtl/VX_define.vh | 169 ++++++++++++++++-------------- hw/rtl/VX_execute.v | 15 +-- hw/rtl/VX_gpr_mux.v | 16 +-- hw/rtl/VX_gpr_stage.v | 19 +--- hw/rtl/VX_issue.v | 2 - hw/rtl/VX_pipeline.v | 6 -- hw/rtl/VX_scheduler.v | 1 - hw/rtl/VX_writeback.v | 35 ++----- hw/rtl/interfaces/VX_alu_req_if.v | 3 + hw/rtl/interfaces/VX_execute_if.v | 3 +- 14 files changed, 182 insertions(+), 268 deletions(-) delete mode 100644 hw/rtl/VX_branch_unit.v diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 8848318e..f47c90a6 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -10,12 +10,12 @@ module VX_alu_unit #( VX_alu_req_if alu_req_if, // Outputs + VX_branch_ctl_if branch_ctl_if, VX_commit_if alu_commit_if ); wire [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][32:0] sub_result; wire [`NUM_THREADS-1:0][32:0] shift_result; - `UNUSED_VAR (shift_result); wire [`ALU_BITS-1:0] alu_op = alu_req_if.alu_op; wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data; @@ -25,8 +25,8 @@ module VX_alu_unit #( for (i = 0; i < `NUM_THREADS; i++) begin - wire [32:0] sub_in1 = {(alu_op != `ALU_SLTU) & alu_in1[i][31], alu_in1[i]}; - wire [32:0] sub_in2 = {(alu_op != `ALU_SLTU) & alu_in2[i][31], alu_in2[i]}; + wire [32:0] sub_in1 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in1[i][31], alu_in1[i]}; + wire [32:0] sub_in2 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in2[i][31], alu_in2[i]}; assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2); wire [32:0] shift_in1 = {(alu_op == `ALU_SRA) & alu_in1[i][31], alu_in1[i]}; @@ -48,8 +48,56 @@ module VX_alu_unit #( end end + wire [`NT_BITS-1:0] br_result_index; + + VX_priority_encoder #( + .N(`NUM_THREADS) + ) choose_alu_result ( + .data_in (alu_req_if.valid), + .data_out (br_result_index), + `UNUSED_PIN (valid_out) + ); + + wire [32:0] br_result = sub_result[br_result_index]; + wire br_sign = br_result[32]; + wire br_nzero = (| br_result[31:0]); + + wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.alu_op); + + reg br_taken; + always @(*) begin + case (br_op) + `BR_NE: br_taken = br_nzero; + `BR_EQ: br_taken = ~br_nzero; + `BR_LT, + `BR_LTU: br_taken = br_sign; + `BR_GE, + `BR_GEU: br_taken = ~br_sign; + default: br_taken = 1'b1; + endcase + end + + wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC; + wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset); + + wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR); + wire is_br_valid = `IS_BR_OP(alu_op) && (| alu_req_if.valid); + + wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result; + wire stall = ~alu_commit_if.ready && (| alu_commit_if.valid); + VX_generic_register #( + .N(1 + `NW_BITS + 1 + 32) + ) rsp_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (0), + .in ({is_br_valid, alu_req_if.warp_num, br_taken, br_dest}), + .out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest}) + ); + VX_generic_register #( .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), ) alu_reg ( @@ -57,7 +105,7 @@ module VX_alu_unit #( .reset (reset), .stall (stall), .flush (0), - .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_result}), + .in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result}), .out ({alu_commit_if.valid, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data}) ); diff --git a/hw/rtl/VX_branch_unit.v b/hw/rtl/VX_branch_unit.v deleted file mode 100644 index a708e1da..00000000 --- a/hw/rtl/VX_branch_unit.v +++ /dev/null @@ -1,82 +0,0 @@ -`include "VX_define.vh" - -module VX_branch_unit #( - parameter CORE_ID = 0 -) ( - input wire clk, - input wire reset, - - // Inputs - VX_branch_req_if branch_req_if, - - // Outputs - VX_branch_ctl_if branch_ctl_if, - VX_commit_if branch_commit_if -); - - wire [`NT_BITS-1:0] br_result_index; - - VX_priority_encoder #( - .N(`NUM_THREADS) - ) choose_alu_result ( - .data_in (branch_req_if.valid), - .data_out (br_result_index), - `UNUSED_PIN (valid_out) - ); - - wire [`BR_BITS-1:0] br_op = branch_req_if.br_op; - wire [31:0] rs1_data = branch_req_if.rs1_data[br_result_index]; - wire [31:0] rs2_data = branch_req_if.rs2_data[br_result_index]; - - wire [32:0] sub_in1 = {(br_op != `BR_LTU) & (br_op != `BR_GEU) & rs1_data[31], rs1_data}; - wire [32:0] sub_in2 = {(br_op != `BR_LTU) & (br_op != `BR_GEU) & rs2_data[31], rs2_data}; - wire [32:0] sub_res = $signed(sub_in1) - $signed(sub_in2); - - wire sub_sign = sub_res[32]; - wire sub_nzero = (| sub_res[31:0]); - - reg br_taken; - always @(*) begin - case (br_op) - `BR_NE: br_taken = sub_nzero; - `BR_EQ: br_taken = ~sub_nzero; - `BR_LT, - `BR_LTU: br_taken = sub_sign; - `BR_GE, - `BR_GEU: br_taken = ~sub_sign; - default: br_taken = 1'b1; - endcase - end - - wire in_valid = (| branch_req_if.valid); - - wire [31:0] base_addr = (br_op == `BR_JALR) ? rs1_data : branch_req_if.curr_PC; - wire [31:0] br_dest = $signed(base_addr) + $signed(branch_req_if.offset); - - wire stall = (~branch_commit_if.ready && (| branch_commit_if.valid)); - - VX_generic_register #( - .N(1 + `NW_BITS + 1 + 32) - ) rsp_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (0), - .in ({in_valid, branch_req_if.warp_num, br_taken, br_dest}), - .out ({branch_ctl_if.valid, branch_ctl_if.warp_num, branch_ctl_if.taken, branch_ctl_if.dest}) - ); - - VX_generic_register #( - .N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)), - ) wb_reg ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (0), - .in ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.rd, branch_req_if.wb, {`NUM_THREADS{branch_req_if.next_PC}}}), - .out ({branch_commit_if.valid, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.rd, branch_commit_if.wb, branch_commit_if.data}) - ); - - assign branch_req_if.ready = ~stall; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index 6c0b2fb2..02ae7aa0 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -8,7 +8,6 @@ module VX_commit #( // inputs VX_commit_if alu_commit_if, - VX_commit_if branch_commit_if, VX_commit_if lsu_commit_if, VX_commit_if mul_commit_if, VX_commit_if csr_commit_if, @@ -21,7 +20,6 @@ module VX_commit #( wire [`NUM_EXS-1:0] commited_mask; assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready), - ((| branch_commit_if.valid) && branch_commit_if.ready), ((| lsu_commit_if.valid) && lsu_commit_if.ready), ((| mul_commit_if.valid) && mul_commit_if.ready), ((| csr_commit_if.valid) && csr_commit_if.ready), @@ -64,7 +62,6 @@ module VX_commit #( .reset (reset), .alu_commit_if (alu_commit_if), - .branch_commit_if(branch_commit_if), .lsu_commit_if (lsu_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), @@ -75,22 +72,19 @@ module VX_commit #( `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin if ((| alu_commit_if.valid) && alu_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); - end - if ((| branch_commit_if.valid) && branch_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, branch_commit_if.warp_num, branch_commit_if.curr_PC, branch_commit_if.wb, branch_commit_if.rd, branch_commit_if.data); + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.warp_num, alu_commit_if.curr_PC, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data); end if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data); end if ((| mul_commit_if.valid) && mul_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data); end if ((| csr_commit_if.valid) && csr_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data); end if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin - $display("%t: Core%0d-commit: warp=%0d, PC=%0h, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); + $display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data); end end `endif diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 44a84c88..418eb70c 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -92,7 +92,7 @@ module VX_decode #( // BRANCH always @(*) begin - br_op = `BR_OTHER; + br_op = `BR_EQ; case (opcode) `INST_B: begin case (func3) @@ -192,20 +192,20 @@ module VX_decode #( assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC; assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4; - assign decode_tmp_if.ex_type = is_br ? `EX_BR : - is_lsu ? `EX_LSU : - is_csr ? `EX_CSR : - is_mul ? `EX_MUL : - is_gpu ? `EX_GPU : + assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU : + is_csr ? `EX_CSR : + is_mul ? `EX_MUL : + is_gpu ? `EX_GPU : + is_br ? `EX_ALU : (is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU : `EX_NOP; - assign decode_tmp_if.instr_op = is_br ? `OP_BITS'(br_op) : - is_lsu ? `OP_BITS'(lsu_op) : - is_csr ? `OP_BITS'(csr_op) : - is_mul ? `OP_BITS'(mul_op) : - is_gpu ? `OP_BITS'(gpu_op) : - (is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) : + assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) : + is_csr ? `OP_BITS'(csr_op) : + is_mul ? `OP_BITS'(mul_op) : + is_gpu ? `OP_BITS'(gpu_op) : + is_br ? `OP_BITS'({1'b1, br_op}) : + (is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) : 0; assign decode_tmp_if.rd = rd; @@ -219,7 +219,7 @@ module VX_decode #( is_csr ? 32'(u_12) : src2_imm; - assign decode_tmp_if.rs1_is_PC = is_auipc; + assign decode_tmp_if.rs1_is_PC = is_auipc; assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm; @@ -269,7 +269,6 @@ module VX_decode #( // trap unsupported instructions assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER)); - assert(~(~stall && (decode_tmp_if.ex_type == `EX_BR) && `BR_OP(decode_tmp_if.instr_op) == `BR_OTHER)); assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.instr_op) == `CSR_OTHER)); assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.instr_op) == `GPU_OTHER)); end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 775028e5..b1b8759d 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -95,35 +95,12 @@ `define INST_SYS 7'b1110011 `define INST_GPU 7'b1101011 -`define OP_BITS 4 - -`define ALU_ADD 4'h0 -`define ALU_SUB 4'h1 -`define ALU_SLL 4'h2 -`define ALU_SRL 4'h3 -`define ALU_SRA 4'h4 -`define ALU_SLT 4'h5 -`define ALU_SLTU 4'h6 -`define ALU_XOR 4'h7 -`define ALU_OR 4'h8 -`define ALU_AND 4'h9 -`define ALU_LUI 4'hA -`define ALU_AUIPC 4'hB -`define ALU_OTHER 4'hF -`define ALU_BITS 4 -`define ALU_OP(x) x[`ALU_BITS-1:0] - -`define MUL_MUL 3'h0 -`define MUL_MULH 3'h1 -`define MUL_MULHSU 3'h2 -`define MUL_MULHU 3'h3 -`define MUL_DIV 3'h4 -`define MUL_DIVU 3'h5 -`define MUL_REM 3'h6 -`define MUL_REMU 3'h7 -`define MUL_BITS 3 -`define MUL_OP(x) x[`MUL_BITS-1:0] -`define IS_DIV_OP(x) x[2] +`define BYTEEN_SB 3'h0 +`define BYTEEN_SH 3'h1 +`define BYTEEN_SW 3'h2 +`define BYTEEN_UB 3'h4 +`define BYTEEN_UH 3'h5 +`define BYTEEN_BITS 3 `define BR_EQ 4'h0 `define BR_NE 4'h1 @@ -138,16 +115,63 @@ `define BR_MRET 4'hA `define BR_SRET 4'hB `define BR_DRET 4'hC -`define BR_OTHER 4'hF `define BR_BITS 4 -`define BR_OP(x) x[`BR_BITS-1:0] -`define BYTEEN_SB 3'h0 -`define BYTEEN_SH 3'h1 -`define BYTEEN_SW 3'h2 -`define BYTEEN_UB 3'h4 -`define BYTEEN_UH 3'h5 -`define BYTEEN_BITS 3 +`define OP_BITS 5 + +`define ALU_ADD 5'h00 +`define ALU_SUB 5'h01 +`define ALU_SLL 5'h02 +`define ALU_SRL 5'h03 +`define ALU_SRA 5'h04 +`define ALU_SLT 5'h05 +`define ALU_SLTU 5'h06 +`define ALU_XOR 5'h07 +`define ALU_OR 5'h08 +`define ALU_AND 5'h09 +`define ALU_LUI 5'h0A +`define ALU_AUIPC 5'h0B +`define ALU_BEQ {1'b1, `BR_EQ} +`define ALU_BNE {1'b1, `BR_NE} +`define ALU_BLT {1'b1, `BR_LT} +`define ALU_BGE {1'b1, `BR_GE} +`define ALU_BLTU {1'b1, `BR_LTU} +`define ALU_BGEU {1'b1, `BR_GEU} +`define ALU_JAL {1'b1, `BR_JAL} +`define ALU_JALR {1'b1, `BR_JALR} +`define ALU_ECALL {1'b1, `BR_ECALL} +`define ALU_EBREAK {1'b1, `BR_EBREAK} +`define ALU_MRET {1'b1, `BR_MRET} +`define ALU_SRET {1'b1, `BR_SRET} +`define ALU_DRET {1'b1, `BR_DRET} +`define ALU_OTHER 5'h1F +`define ALU_BITS 5 +`define ALU_OP(x) x[`ALU_BITS-1:0] +`define BR_OP(x) x[`BR_BITS-1:0] +`define IS_BR_OP(x) x[4] + +`define MUL_MUL 3'h0 +`define MUL_MULH 3'h1 +`define MUL_MULHSU 3'h2 +`define MUL_MULHU 3'h3 +`define MUL_DIV 3'h4 +`define MUL_DIVU 3'h5 +`define MUL_REM 3'h6 +`define MUL_REMU 3'h7 +`define MUL_BITS 3 +`define MUL_OP(x) x[`MUL_BITS-1:0] +`define IS_DIV_OP(x) x[2] + +`define LSU_LB {1'b0, `BYTEEN_SB} +`define LSU_LH {1'b0, `BYTEEN_SH} +`define LSU_LW {1'b0, `BYTEEN_SW} +`define LSU_LBU {1'b0, `BYTEEN_UB} +`define LSU_LHU {1'b0, `BYTEEN_UH} +`define LSU_SB {1'b1, `BYTEEN_SB} +`define LSU_SH {1'b1, `BYTEEN_SH} +`define LSU_SW {1'b1, `BYTEEN_SW} +`define LSU_SBU {1'b1, `BYTEEN_UB} +`define LSU_SHU {1'b1, `BYTEEN_UH} `define LSU_BITS 4 `define LSU_RW(x) x[3] `define LSU_BE(x) x[2:0] @@ -170,14 +194,13 @@ `define EX_NOP 3'h0 `define EX_ALU 3'h1 -`define EX_BR 3'h2 -`define EX_MUL 3'h3 -`define EX_LSU 3'h4 -`define EX_CSR 3'h5 -`define EX_GPU 3'h6 +`define EX_MUL 3'h2 +`define EX_LSU 3'h3 +`define EX_CSR 3'h4 +`define EX_GPU 3'h5 `define EX_BITS 3 -`define NUM_EXS 6 +`define NUM_EXS 5 `define NE_BITS `LOG2UP(`NUM_EXS) `define WB_NO 2'h0 @@ -371,8 +394,7 @@ task print_ex_type; input [`EX_BITS-1:0] ex; begin case (ex) - `EX_ALU: $write("ALU"); - `EX_BR: $write("BR"); + `EX_ALU: $write("ALU"); `EX_LSU: $write("LSU"); `EX_CSR: $write("CSR"); `EX_MUL: $write("MUL"); @@ -401,32 +423,27 @@ task print_instr_op; `ALU_AND: $write("AND"); `ALU_LUI: $write("LUI"); `ALU_AUIPC: $write("AUIPC"); + `ALU_BEQ: $write("EQ"); + `ALU_BNE: $write("NE"); + `ALU_BLT: $write("LT"); + `ALU_BGE: $write("GE"); + `ALU_BLTU: $write("LTU"); + `ALU_BGEU: $write("GEU"); + `ALU_JAL: $write("JAL"); + `ALU_JALR: $write("JALR"); + `ALU_ECALL: $write("ECALL"); + `ALU_EBREAK:$write("EBREAK"); + `ALU_MRET: $write("MRET"); + `ALU_SRET: $write("SRET"); + `ALU_DRET: $write("DRET"); default: $write("?"); endcase end - `EX_BR: begin - case (`BR_BITS'(op)) - `BR_EQ: $write("EQ"); - `BR_NE: $write("NE"); - `BR_LT: $write("LT"); - `BR_GE: $write("GE"); - `BR_LTU: $write("LTU"); - `BR_GEU: $write("GEU"); - `BR_JAL: $write("JAL"); - `BR_JALR: $write("JALR"); - `BR_ECALL: $write("ECALL"); - `BR_EBREAK: $write("EBREAK"); - `BR_MRET: $write("MRET"); - `BR_SRET: $write("SRET"); - `BR_DRET: $write("DRET"); - default: $write("?"); - endcase - end `EX_MUL: begin case (`MUL_BITS'(op)) `MUL_MUL: $write("MUL"); `MUL_MULH: $write("MULH"); - `MUL_MULHSU: $write("MULHSU"); + `MUL_MULHSU:$write("MULHSU"); `MUL_MULHU: $write("MULHU"); `MUL_DIV: $write("DIV"); `MUL_DIVU: $write("DIVU"); @@ -437,17 +454,17 @@ task print_instr_op; end `EX_LSU: begin case (`LSU_BITS'(op)) - 4'b0000: $write("LB"); - 4'b0001: $write("LH"); - 4'b0010: $write("LW"); - 4'b0100: $write("LBU"); - 4'b0101: $write("LHU"); - 4'b1000: $write("SB"); - 4'b1001: $write("SH"); - 4'b1010: $write("SW"); - 4'b1100: $write("SBU"); - 4'b1101: $write("SHU"); - default: $write("?"); + `LSU_LB: $write("LB"); + `LSU_LH: $write("LH"); + `LSU_LW: $write("LW"); + `LSU_LBU: $write("LBU"); + `LSU_LHU: $write("LHU"); + `LSU_SB: $write("SB"); + `LSU_SH: $write("SH"); + `LSU_SW: $write("SW"); + `LSU_SBU: $write("SBU"); + `LSU_SHU: $write("SHU"); + default: $write("?"); endcase end `EX_CSR: begin @@ -461,7 +478,7 @@ task print_instr_op; `EX_GPU: begin case (`GPU_BITS'(op)) `GPU_TMC: $write("TMC"); - `GPU_WSPAWN: $write("WSPAWN"); + `GPU_WSPAWN:$write("WSPAWN"); `GPU_SPLIT: $write("SPLIT"); `GPU_JOIN: $write("JOIN"); `GPU_BAR: $write("BAR"); diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index 166e5c53..871fcaaf 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -22,7 +22,6 @@ module VX_execute #( // inputs VX_alu_req_if alu_req_if, - VX_branch_req_if branch_req_if, VX_lsu_req_if lsu_req_if, VX_csr_req_if csr_req_if, VX_mul_req_if mul_req_if, @@ -32,7 +31,6 @@ module VX_execute #( VX_branch_ctl_if branch_ctl_if, VX_warp_ctl_if warp_ctl_if, VX_commit_if alu_commit_if, - VX_commit_if branch_commit_if, VX_commit_if lsu_commit_if, VX_commit_if csr_commit_if, VX_commit_if mul_commit_if, @@ -47,17 +45,8 @@ module VX_execute #( .clk (clk), .reset (reset), .alu_req_if (alu_req_if), - .alu_commit_if (alu_commit_if) - ); - - VX_branch_unit #( - .CORE_ID(CORE_ID) - ) branch_unit ( - .clk (clk), - .reset (reset), - .branch_req_if (branch_req_if), .branch_ctl_if (branch_ctl_if), - .branch_commit_if(branch_commit_if) + .alu_commit_if (alu_commit_if) ); VX_lsu_unit #( @@ -101,7 +90,7 @@ module VX_execute #( .gpu_commit_if (gpu_commit_if) ); - assign ebreak = (| branch_req_if.valid) && (branch_req_if.br_op == `BR_EBREAK || branch_req_if.br_op == `BR_ECALL); + assign ebreak = (| alu_req_if.valid) && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL); `SCOPE_ASSIGN(scope_decode_valid, decode_if.valid); `SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num); diff --git a/hw/rtl/VX_gpr_mux.v b/hw/rtl/VX_gpr_mux.v index 4b5d9615..9de519bc 100644 --- a/hw/rtl/VX_gpr_mux.v +++ b/hw/rtl/VX_gpr_mux.v @@ -8,7 +8,6 @@ module VX_gpr_mux ( // outputs VX_alu_req_if alu_req_if, - VX_branch_req_if branch_req_if, VX_lsu_req_if lsu_req_if, VX_csr_req_if csr_req_if, VX_mul_req_if mul_req_if, @@ -16,7 +15,6 @@ module VX_gpr_mux ( ); wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{execute_if.ex_type == `EX_ALU}}; - wire[`NUM_THREADS-1:0] is_br = {`NUM_THREADS{execute_if.ex_type == `EX_BR}}; wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{execute_if.ex_type == `EX_LSU}}; wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{execute_if.ex_type == `EX_CSR}}; wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{execute_if.ex_type == `EX_MUL}}; @@ -31,18 +29,8 @@ module VX_gpr_mux ( assign alu_req_if.wb = execute_if.wb; assign alu_req_if.rs1_data = rs1_data; assign alu_req_if.rs2_data = rs2_data; - - // BR unit - assign branch_req_if.valid = execute_if.valid & is_br; - assign branch_req_if.warp_num = execute_if.warp_num; - assign branch_req_if.curr_PC = execute_if.curr_PC; - assign branch_req_if.br_op = `BR_OP(execute_if.instr_op); - assign branch_req_if.offset = execute_if.imm; - assign branch_req_if.next_PC = execute_if.next_PC; - assign branch_req_if.rs1_data = rs1_data; - assign branch_req_if.rs2_data = rs2_data; - assign branch_req_if.rd = execute_if.rd; - assign branch_req_if.wb = execute_if.wb; + assign alu_req_if.offset = execute_if.imm; + assign alu_req_if.next_PC = execute_if.next_PC; // LSU unit assign lsu_req_if.valid = execute_if.valid & is_lsu; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index bfa46e58..4f04fd75 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -12,7 +12,6 @@ module VX_gpr_stage #( // outputs VX_alu_req_if alu_req_if, - VX_branch_req_if branch_req_if, VX_lsu_req_if lsu_req_if, VX_csr_req_if csr_req_if, VX_mul_req_if mul_req_if, @@ -53,7 +52,6 @@ module VX_gpr_stage #( endgenerate VX_alu_req_if alu_req_tmp_if(); - VX_branch_req_if branch_req_tmp_if(); VX_lsu_req_if lsu_req_tmp_if(); VX_csr_req_if csr_req_tmp_if(); VX_mul_req_if mul_req_tmp_if(); @@ -64,7 +62,6 @@ module VX_gpr_stage #( .rs1_data (rs1_data), .rs2_data (rs2_data), .alu_req_if (alu_req_if), - .branch_req_if (branch_req_tmp_if), .lsu_req_if (lsu_req_tmp_if), .csr_req_if (csr_req_tmp_if), .mul_req_if (mul_req_tmp_if), @@ -72,7 +69,6 @@ module VX_gpr_stage #( ); wire stall_alu = ~alu_req_if.ready && (| alu_req_if.valid); - wire stall_br = ~branch_req_if.ready && (| branch_req_if.valid); wire stall_lsu = ~lsu_req_if.ready && (| lsu_req_if.valid); wire stall_csr = ~csr_req_if.ready && (| csr_req_if.valid); wire stall_mul = ~mul_req_if.ready && (| mul_req_if.valid); @@ -89,17 +85,6 @@ module VX_gpr_stage #( .out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.rd, alu_req_if.wb}) ); - VX_generic_register #( - .N(`NUM_THREADS +`NW_BITS + 32 + 32 + `BR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + `NR_BITS + `WB_BITS) - ) br_reg ( - .clk (clk), - .reset (reset), - .stall (stall_br), - .flush (0), - .in ({branch_req_tmp_if.valid, branch_req_tmp_if.warp_num, branch_req_tmp_if.curr_PC, branch_req_tmp_if.next_PC, branch_req_tmp_if.br_op, branch_req_tmp_if.rs1_data, branch_req_tmp_if.rs2_data, branch_req_tmp_if.offset, branch_req_tmp_if.rd, branch_req_tmp_if.wb}), - .out ({branch_req_if.valid, branch_req_if.warp_num, branch_req_if.curr_PC, branch_req_if.next_PC, branch_req_if.br_op, branch_req_if.rs1_data, branch_req_if.rs2_data, branch_req_if.offset, branch_req_if.rd, branch_req_if.wb}) - ); - VX_generic_register #( .N(`NUM_THREADS + `NW_BITS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 1 + `BYTEEN_BITS + `NR_BITS + `WB_BITS) ) lsu_reg ( @@ -145,7 +130,6 @@ module VX_gpr_stage #( ); assign execute_if.alu_ready = ~stall_alu; - assign execute_if.br_ready = ~stall_br; assign execute_if.lsu_ready = ~stall_lsu; assign execute_if.csr_ready = ~stall_csr; assign execute_if.mul_ready = ~stall_mul; @@ -159,8 +143,7 @@ module VX_gpr_stage #( $display("%t: Core%0d-GPR: warp=%0d, PC=%0h, a=%0h, b=%0h", $time, CORE_ID, execute_if.warp_num, execute_if.curr_PC, rs1_data, rs2_data); // scheduler ensures the destination execute unit is ready (garanteed by the scheduler) - assert((execute_if.ex_type != `EX_ALU) || alu_req_if.ready); - assert((execute_if.ex_type != `EX_BR) || branch_req_if.ready); + assert((execute_if.ex_type != `EX_ALU) || alu_req_if.ready); assert((execute_if.ex_type != `EX_LSU) || lsu_req_if.ready); assert((execute_if.ex_type != `EX_CSR) || csr_req_if.ready); assert((execute_if.ex_type != `EX_MUL) || mul_req_if.ready); diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 125b070b..446453fb 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -10,7 +10,6 @@ module VX_issue #( VX_wb_if writeback_if, VX_alu_req_if alu_req_if, - VX_branch_req_if branch_req_if, VX_lsu_req_if lsu_req_if, VX_csr_req_if csr_req_if, VX_mul_req_if mul_req_if, @@ -39,7 +38,6 @@ module VX_issue #( .writeback_if (writeback_if), .alu_req_if (alu_req_if), - .branch_req_if (branch_req_if), .lsu_req_if (lsu_req_if), .csr_req_if (csr_req_if), .mul_req_if (mul_req_if), diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 2f007b35..a548e0db 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -107,7 +107,6 @@ module VX_pipeline #( VX_warp_ctl_if warp_ctl_if(); VX_ifetch_rsp_if ifetch_rsp_if(); VX_alu_req_if alu_req_if(); - VX_branch_req_if branch_req_if(); VX_lsu_req_if lsu_req_if(); VX_csr_req_if csr_req_if(); VX_mul_req_if mul_req_if(); @@ -116,7 +115,6 @@ module VX_pipeline #( VX_wstall_if wstall_if(); VX_join_if join_if(); VX_commit_if alu_commit_if(); - VX_commit_if branch_commit_if(); VX_commit_if lsu_commit_if(); VX_commit_if csr_commit_if(); VX_commit_if mul_commit_if(); @@ -158,7 +156,6 @@ module VX_pipeline #( .writeback_if (writeback_if), .alu_req_if (alu_req_if), - .branch_req_if (branch_req_if), .lsu_req_if (lsu_req_if), .csr_req_if (csr_req_if), .mul_req_if (mul_req_if), @@ -181,7 +178,6 @@ module VX_pipeline #( .perf_cntrs_if (perf_cntrs_if), .alu_req_if (alu_req_if), - .branch_req_if (branch_req_if), .lsu_req_if (lsu_req_if), .csr_req_if (csr_req_if), .mul_req_if (mul_req_if), @@ -190,7 +186,6 @@ module VX_pipeline #( .warp_ctl_if (warp_ctl_if), .branch_ctl_if (branch_ctl_if), .alu_commit_if (alu_commit_if), - .branch_commit_if(branch_commit_if), .lsu_commit_if (lsu_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), @@ -206,7 +201,6 @@ module VX_pipeline #( .reset (reset), .alu_commit_if (alu_commit_if), - .branch_commit_if(branch_commit_if), .lsu_commit_if (lsu_commit_if), .csr_commit_if (csr_commit_if), .mul_commit_if (mul_commit_if), diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 0b90695c..29c77d52 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -29,7 +29,6 @@ module VX_scheduler #( wire ex_stalled = (| decode_if.valid) && ((!execute_if.alu_ready && (decode_if.ex_type == `EX_ALU)) - || (!execute_if.br_ready && (decode_if.ex_type == `EX_BR)) || (!execute_if.lsu_ready && (decode_if.ex_type == `EX_LSU)) || (!execute_if.csr_ready && (decode_if.ex_type == `EX_CSR)) || (!execute_if.mul_ready && (decode_if.ex_type == `EX_MUL)) diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 0d4cef3f..66797e94 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -8,7 +8,6 @@ module VX_writeback #( // inputs VX_commit_if alu_commit_if, - VX_commit_if branch_commit_if, VX_commit_if lsu_commit_if, VX_commit_if mul_commit_if, VX_commit_if csr_commit_if, @@ -17,7 +16,6 @@ module VX_writeback #( VX_wb_if writeback_if ); - wire br_valid = (| branch_commit_if.valid) && (branch_commit_if.wb != `WB_NO); wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO); wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO); wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO); @@ -25,30 +23,25 @@ module VX_writeback #( VX_wb_if writeback_tmp_if(); - assign writeback_tmp_if.valid = br_valid ? branch_commit_if.valid : - lsu_valid ? lsu_commit_if.valid : + assign writeback_tmp_if.valid = lsu_valid ? lsu_commit_if.valid : mul_valid ? mul_commit_if.valid : alu_valid ? alu_commit_if.valid : csr_valid ? csr_commit_if.valid : 0; - assign writeback_tmp_if.warp_num = br_valid ? branch_commit_if.warp_num : - lsu_valid ? lsu_commit_if.warp_num : + assign writeback_tmp_if.warp_num = lsu_valid ? lsu_commit_if.warp_num : mul_valid ? mul_commit_if.warp_num : alu_valid ? alu_commit_if.warp_num : - csr_valid ? csr_commit_if.warp_num : - + csr_valid ? csr_commit_if.warp_num : 0; - assign writeback_tmp_if.data = br_valid ? branch_commit_if.data : - lsu_valid ? lsu_commit_if.data : + assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data : mul_valid ? mul_commit_if.data : alu_valid ? alu_commit_if.data : csr_valid ? csr_commit_if.data : 0; - assign writeback_tmp_if.rd = br_valid ? branch_commit_if.rd : - lsu_valid ? lsu_commit_if.rd : + assign writeback_tmp_if.rd = lsu_valid ? lsu_commit_if.rd : mul_valid ? mul_commit_if.rd : alu_valid ? alu_commit_if.rd : csr_valid ? csr_commit_if.rd : @@ -67,11 +60,10 @@ module VX_writeback #( .out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data}) ); - assign branch_commit_if.ready = !stall; - assign lsu_commit_if.ready = !stall && !br_valid; - assign mul_commit_if.ready = !stall && !br_valid && !lsu_valid; - assign alu_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid; - assign csr_commit_if.ready = !stall && !br_valid && !lsu_valid && !mul_valid && !alu_valid; + assign lsu_commit_if.ready = !stall; + assign mul_commit_if.ready = !stall && !lsu_valid; + assign alu_commit_if.ready = !stall && !lsu_valid && !mul_valid; + assign csr_commit_if.ready = !stall && !lsu_valid && !mul_valid && !alu_valid; // special workaround to control RISC-V benchmarks termination on Verilator reg [31:0] last_data_wb /* verilator public */; @@ -81,11 +73,4 @@ module VX_writeback #( end end -endmodule - - - - - - - +endmodule \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_alu_req_if.v b/hw/rtl/interfaces/VX_alu_req_if.v index 42e427d5..ce90f96f 100644 --- a/hw/rtl/interfaces/VX_alu_req_if.v +++ b/hw/rtl/interfaces/VX_alu_req_if.v @@ -13,6 +13,9 @@ interface VX_alu_req_if (); wire [`NUM_THREADS-1:0][31:0] rs1_data; wire [`NUM_THREADS-1:0][31:0] rs2_data; + + wire [31:0] offset; + wire [31:0] next_PC; wire [`NR_BITS-1:0] rd; wire [`WB_BITS-1:0] wb; diff --git a/hw/rtl/interfaces/VX_execute_if.v b/hw/rtl/interfaces/VX_execute_if.v index 8188c0e7..4f698c76 100644 --- a/hw/rtl/interfaces/VX_execute_if.v +++ b/hw/rtl/interfaces/VX_execute_if.v @@ -21,8 +21,7 @@ interface VX_execute_if (); wire [`WB_BITS-1:0] wb; - wire alu_ready; - wire br_ready; + wire alu_ready; wire mul_ready; wire lsu_ready; wire csr_ready;