pipeline refactoring: centralized issue buffer
This commit is contained in:
@@ -8,6 +8,7 @@ module VX_issue #(
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_commit_is_if commit_is_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
@@ -19,6 +20,7 @@ module VX_issue #(
|
||||
VX_gpr_data_if gpr_data_if();
|
||||
wire schedule_delay;
|
||||
wire gpr_delay;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||
|
||||
wire alu_busy = ~alu_req_if.ready;
|
||||
wire lsu_busy = ~lsu_req_if.ready;
|
||||
@@ -33,14 +35,16 @@ module VX_issue #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.writeback_if (writeback_if),
|
||||
.commit_is_if (commit_is_if),
|
||||
.gpr_busy (gpr_delay),
|
||||
.alu_busy (alu_busy),
|
||||
.lsu_busy (lsu_busy),
|
||||
.csr_busy (csr_busy),
|
||||
.mul_busy (mul_busy),
|
||||
.fpu_busy (fpu_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.issue_tag (issue_tag),
|
||||
.schedule_delay (schedule_delay),
|
||||
`UNUSED_PIN (is_empty)
|
||||
);
|
||||
@@ -57,123 +61,54 @@ module VX_issue #(
|
||||
.gpr_delay (gpr_delay)
|
||||
);
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
VX_decode_if decode_tmp_if();
|
||||
VX_gpr_data_if gpr_data_tmp_if();
|
||||
|
||||
wire stall = ~alu_req_if.ready || schedule_delay;
|
||||
wire flush = alu_req_if.ready && schedule_delay;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_data_if.rs1_data, gpr_data_if.rs2_data, gpr_data_if.rs3_data}),
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
|
||||
);
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_if),
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.fpu_req_if (fpu_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
.decode_if (decode_tmp_if),
|
||||
.gpr_data_if (gpr_data_tmp_if),
|
||||
.issue_tag (issue_tmp_tag),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
wire stall_alu = ~alu_req_if.ready || schedule_delay;
|
||||
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
|
||||
wire stall_csr = ~csr_req_if.ready || schedule_delay;
|
||||
wire stall_mul = ~mul_req_if.ready || schedule_delay;
|
||||
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
|
||||
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
|
||||
|
||||
wire flush_alu = alu_req_if.ready && schedule_delay;
|
||||
wire flush_lsu = lsu_req_if.ready && schedule_delay;
|
||||
wire flush_csr = csr_req_if.ready && schedule_delay;
|
||||
wire flush_mul = mul_req_if.ready && schedule_delay;
|
||||
wire flush_fpu = fpu_req_if.ready && schedule_delay;
|
||||
wire flush_gpu = gpu_req_if.ready && schedule_delay;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_alu),
|
||||
.flush (flush_alu),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
||||
.out ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.alu_op, alu_req_if.wb, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_lsu),
|
||||
.flush (flush_lsu),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.wb, lsu_req_tmp_if.rd, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.wb, lsu_req_if.rd, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_csr),
|
||||
.flush (flush_csr),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
||||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_mul),
|
||||
.flush (flush_mul),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
||||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_fpu),
|
||||
.flush (flush_fpu),
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rd_is_fp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_gpu),
|
||||
.flush (flush_gpu),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if ((| alu_req_tmp_if.valid) && ~stall_alu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.alu_op, alu_req_tmp_if.wb, alu_req_tmp_if.rd, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC);
|
||||
if (alu_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
end
|
||||
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
|
||||
if (lsu_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, lsu_req_if.rw, decode_tmp_if.rd, decode_tmp_if.wb, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
|
||||
end
|
||||
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
|
||||
if (csr_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
|
||||
if (mul_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if ((| csr_req_tmp_if.valid) && ~stall_csr) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, op=%0d, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.csr_op, csr_req_tmp_if.wb, csr_req_tmp_if.rd, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask);
|
||||
if (fpu_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%d, rd=%0d, frm=%0h, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if ((| gpu_req_tmp_if.valid) && ~stall_gpu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, op=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data);
|
||||
if (gpu_req_if.valid && ~stall) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
Reference in New Issue
Block a user