diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 7934a4b3..205ce8da 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -90,11 +90,6 @@ module VX_alu_unit #( wire is_less = cmp_result[32]; wire is_equal = ~(| cmp_result[31:0]); - wire br_neg = `INST_BR_NEG(br_op); - wire br_less = `INST_BR_LESS(br_op); - wire br_static = `INST_BR_STATIC(br_op); - wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static; - // output wire result_valid; @@ -178,24 +173,33 @@ module VX_alu_unit #( `endif + wire [`INST_BR_BITS-1:0] br_op_r; + wire is_less_r; + wire is_equal_r; wire is_br_op_r; assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid; VX_pipe_register #( - .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32), + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (!stall_out), - .data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}), - .data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest}) + .data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_op, is_less, is_equal, br_dest}), + .data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, is_less_r, is_equal_r, branch_ctl_if.dest}) ); assign alu_commit_if.eop = 1'b1; + `UNUSED_VAR (br_op_r) + wire br_neg = `INST_BR_NEG(br_op_r); + wire br_less = `INST_BR_LESS(br_op_r); + wire br_static = `INST_BR_STATIC(br_op_r); + assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r; + assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static; assign branch_ctl_if.wid = alu_commit_if.wid; // can accept new request? diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 17bc317c..7b80e084 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -22,91 +22,67 @@ module VX_gpr_stage #( // ensure r0 never gets written, which can happen before the reset wire write_enable = writeback_if.valid && (writeback_if.rd != 0); - wire [(`NUM_THREADS * 4)-1:0] wren; + wire [`NUM_THREADS-1:0] wren; for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}}; + assign wren[i] = write_enable && writeback_if.tmask[i]; end - reg [`NUM_THREADS-1:0][31:0] last_wdata; - reg [$clog2(RAM_SIZE)-1:0] last_waddr; - reg [`NUM_THREADS-1:0] last_wmask; - - always @(posedge clk) begin - last_wdata <= writeback_if.data; - last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask; - last_waddr <= waddr; - end - - wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2; wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2; - assign waddr = {writeback_if.wid, writeback_if.rd}; assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1}; assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2}; - VX_dp_ram #( - .DATAW (32 * `NUM_THREADS), - .SIZE (RAM_SIZE), - .BYTEENW (`NUM_THREADS * 4), - .INIT_ENABLE (1), - .INIT_VALUE (0), - .NO_RWCHECK (1) - ) dp_ram1 ( - .clk (clk), - .wren (wren), - .waddr (waddr), - .wdata (writeback_if.data), - .rden (1'b1), - .raddr (raddr1), - .rdata (rdata1) - ); - - VX_dp_ram #( - .DATAW (32 * `NUM_THREADS), - .SIZE (RAM_SIZE), - .BYTEENW (`NUM_THREADS * 4), - .INIT_ENABLE (1), - .INIT_VALUE (0), - .NO_RWCHECK (1) - ) dp_ram2 ( - .clk (clk), - .wren (wren), - .waddr (waddr), - .wdata (writeback_if.data), - .rden (1'b1), - .raddr (raddr2), - .rdata (rdata2) - ); - for (genvar i = 0; i < `NUM_THREADS; ++i) begin - assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i]; - assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i]; + VX_dp_ram #( + .DATAW (32), + .SIZE (RAM_SIZE), + .INIT_ENABLE (1), + .INIT_VALUE (0) + ) dp_ram1 ( + .clk (clk), + .wren (wren[i]), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .rden (1'b1), + .raddr (raddr1), + .rdata (gpr_rsp_if.rs1_data[i]) + ); + + VX_dp_ram #( + .DATAW (32), + .SIZE (RAM_SIZE), + .INIT_ENABLE (1), + .INIT_VALUE (0) + ) dp_ram2 ( + .clk (clk), + .wren (wren[i]), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .rden (1'b1), + .raddr (raddr2), + .rdata (gpr_rsp_if.rs2_data[i]) + ); end `ifdef EXT_F_ENABLE - wire [`NUM_THREADS-1:0][31:0] rdata3; wire [$clog2(RAM_SIZE)-1:0] raddr3; assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3}; - VX_dp_ram #( - .DATAW (32 * `NUM_THREADS), - .SIZE (RAM_SIZE), - .BYTEENW (`NUM_THREADS * 4), - .INIT_ENABLE (1), - .INIT_VALUE (0), - .NO_RWCHECK (1) - ) dp_ram3 ( - .clk (clk), - .wren (wren), - .waddr (waddr), - .wdata (writeback_if.data), - .rden (1'b1), - .raddr (raddr3), - .rdata (rdata3) - ); - - for (genvar i = 0; i < `NUM_THREADS; i++) begin - assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i]; + for (genvar i = 0; i < `NUM_THREADS; ++i) begin + VX_dp_ram #( + .DATAW (32), + .SIZE (RAM_SIZE), + .INIT_ENABLE (1), + .INIT_VALUE (0) + ) dp_ram3 ( + .clk (clk), + .wren (wren[i]), + .waddr (waddr), + .wdata (writeback_if.data[i]), + .rden (1'b1), + .raddr (raddr3), + .rdata (gpr_rsp_if.rs3_data[i]) + ); end `else `UNUSED_VAR (gpr_req_if.rs3) diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 8aaa02bb..5deb5e72 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -38,8 +38,8 @@ module VX_ibuffer #( wire is_head_ptr = empty_r[i] || (alm_empty_r[i] && reading); VX_elastic_buffer #( - .DATAW (DATAW), - .SIZE (`IBUF_SIZE), + .DATAW (DATAW), + .SIZE (`IBUF_SIZE), .OUTPUT_REG (`IBUF_SIZE > 2) ) queue ( .clk (clk), @@ -98,6 +98,8 @@ module VX_ibuffer #( reg [DATAW-1:0] deq_instr, deq_instr_n; reg [NWARPSW-1:0] num_warps; + `UNUSED_VAR (deq_instr) + // calculate valid table always @(*) begin valid_table_n = valid_table; @@ -147,11 +149,10 @@ module VX_ibuffer #( valid_table <= 0; deq_valid <= 0; num_warps <= 0; - deq_wid_rr <= 0; end else begin valid_table <= valid_table_n; deq_valid <= deq_valid_n; - deq_wid_rr <= deq_wid_rr_n; + if (warp_added && !warp_removed) begin num_warps <= num_warps + NWARPSW'(1); @@ -160,8 +161,9 @@ module VX_ibuffer #( end end - deq_wid <= deq_wid_n; - deq_instr <= deq_instr_n; + deq_wid <= deq_wid_n; + deq_wid_rr <= deq_wid_rr_n; + deq_instr <= deq_instr_n; end assign decode_if.ready = ~q_full[decode_if.wid]; @@ -183,7 +185,6 @@ module VX_ibuffer #( assign ibuffer_if.valid = deq_valid; assign ibuffer_if.wid = deq_wid; - assign ibuffer_if.wid_n = deq_wid_n; assign {ibuffer_if.tmask, ibuffer_if.PC, ibuffer_if.ex_type, @@ -195,8 +196,10 @@ module VX_ibuffer #( ibuffer_if.rs2, ibuffer_if.rs3, ibuffer_if.imm, - ibuffer_if.use_PC, - ibuffer_if.use_imm, - ibuffer_if.used_regs} = deq_instr; + ibuffer_if.use_PC, + ibuffer_if.use_imm} = deq_instr[DATAW-1:`NUM_REGS]; + + assign ibuffer_if.used_regs_n = deq_instr_n[`NUM_REGS-1:0]; + assign ibuffer_if.wid_n = deq_wid_n; endmodule \ No newline at end of file diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index 6b0423d7..662b6bcb 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -27,7 +27,7 @@ module VX_instr_demux ( wire gpu_req_ready; VX_lzc #( - .WIDTH (`NUM_THREADS) + .N (`NUM_THREADS) ) tid_select ( .in_i (ibuffer_if.tmask), .cnt_o (tid), diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 12623e3d..98a50892 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -283,13 +283,20 @@ module VX_mem_unit # ( ); end else begin // core to D-cache request - assign dcache_req_tmp_if.valid = dcache_req_if.valid; - assign dcache_req_tmp_if.addr = dcache_req_if.addr; - assign dcache_req_tmp_if.rw = dcache_req_if.rw; - assign dcache_req_tmp_if.byteen = dcache_req_if.byteen; - assign dcache_req_tmp_if.data = dcache_req_if.data; - assign dcache_req_tmp_if.tag = dcache_req_if.tag; - assign dcache_req_if.ready = dcache_req_tmp_if.ready; + for (genvar i = 0; i < `DNUM_REQS; ++i) begin + VX_skid_buffer #( + .DATAW ((32-`CLOG2(`DWORD_SIZE)) + 1 + `DWORD_SIZE + (8*`DWORD_SIZE) + `DCORE_TAG_WIDTH) + ) req_buf ( + .clk (clk), + .reset (reset), + .valid_in (dcache_req_if.valid[i]), + .data_in ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}), + .ready_in (dcache_req_if.ready[i]), + .valid_out (dcache_req_tmp_if.valid[i]), + .data_out ({dcache_req_tmp_if.addr[i], dcache_req_tmp_if.rw[i], dcache_req_tmp_if.byteen[i], dcache_req_tmp_if.data[i], dcache_req_tmp_if.tag[i]}), + .ready_out (dcache_req_tmp_if.ready[i]) + ); + end // D-cache to core reponse assign dcache_rsp_if.valid = dcache_rsp_tmp_if.valid; diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index d54e1982..94304c49 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -14,7 +14,7 @@ module VX_scoreboard #( reg [`NUM_REGS-1:0] deq_inuse_regs; - assign delay = |(deq_inuse_regs & ibuffer_if.used_regs); + assign delay = (| deq_inuse_regs); wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb; @@ -36,7 +36,7 @@ module VX_scoreboard #( end else begin inuse_regs <= inuse_regs_n; end - deq_inuse_regs <= inuse_regs_n[ibuffer_if.wid_n]; + deq_inuse_regs <= inuse_regs_n[ibuffer_if.wid_n] & ibuffer_if.used_regs_n; end reg [31:0] deadlock_ctr; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index f5497e0c..124d3047 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -118,9 +118,8 @@ module VX_bank #( wire creq_valid, creq_ready; VX_elastic_buffer #( - .DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)), - .SIZE (CREQ_SIZE), - .OUTPUT_REG (CREQ_SIZE > 2) + .DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH)), + .SIZE (CREQ_SIZE) ) core_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/interfaces/VX_ibuffer_if.v b/hw/rtl/interfaces/VX_ibuffer_if.v index d2277e48..87b2c15b 100644 --- a/hw/rtl/interfaces/VX_ibuffer_if.v +++ b/hw/rtl/interfaces/VX_ibuffer_if.v @@ -7,7 +7,6 @@ interface VX_ibuffer_if (); wire valid; wire [`NW_BITS-1:0] wid; - wire [`NW_BITS-1:0] wid_n; wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; wire [`EX_BITS-1:0] ex_type; @@ -21,9 +20,11 @@ interface VX_ibuffer_if (); wire [31:0] imm; wire use_PC; wire use_imm; - wire [`NUM_REGS-1:0] used_regs; wire ready; + wire [`NUM_REGS-1:0] used_regs_n; + wire [`NW_BITS-1:0] wid_n; + endinterface `endif \ No newline at end of file