pipeline optimization: fixed GPR fanout delay to execute units
This commit is contained in:
@@ -29,8 +29,8 @@ module VX_alu_unit #(
|
|||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
@@ -40,7 +40,7 @@ module VX_alu_unit #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ module VX_csr_data #(
|
|||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||||
VX_csr_to_issue_if csr_to_issue_if,
|
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||||
|
|
||||||
input wire read_enable,
|
input wire read_enable,
|
||||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||||
@@ -144,6 +144,6 @@ module VX_csr_data #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
assign read_data = read_data_r;
|
assign read_data = read_data_r;
|
||||||
assign csr_to_issue_if.frm = csr_frm[csr_to_issue_if.wid];
|
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid];
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -7,7 +7,7 @@ module VX_csr_unit #(
|
|||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||||
VX_csr_to_issue_if csr_to_issue_if,
|
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||||
|
|
||||||
VX_csr_io_req_if csr_io_req_if,
|
VX_csr_io_req_if csr_io_req_if,
|
||||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||||
@@ -47,7 +47,7 @@ module VX_csr_unit #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.cmt_to_csr_if (cmt_to_csr_if),
|
.cmt_to_csr_if (cmt_to_csr_if),
|
||||||
.csr_to_issue_if(csr_to_issue_if),
|
.csr_to_fpu_if (csr_to_fpu_if),
|
||||||
.read_enable (csr_pipe_req_if.valid),
|
.read_enable (csr_pipe_req_if.valid),
|
||||||
.read_addr (csr_pipe_req_if.csr_addr),
|
.read_addr (csr_pipe_req_if.csr_addr),
|
||||||
.read_wid (csr_pipe_req_if.wid),
|
.read_wid (csr_pipe_req_if.wid),
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ module VX_execute #(
|
|||||||
VX_gpu_req_if gpu_req_if,
|
VX_gpu_req_if gpu_req_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_csr_to_issue_if csr_to_issue_if,
|
|
||||||
VX_branch_ctl_if branch_ctl_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_exu_to_cmt_if alu_commit_if,
|
VX_exu_to_cmt_if alu_commit_if,
|
||||||
@@ -41,6 +40,7 @@ module VX_execute #(
|
|||||||
input wire busy,
|
input wire busy,
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
);
|
);
|
||||||
|
VX_csr_to_fpu_if csr_to_fpu_if();
|
||||||
|
|
||||||
VX_alu_unit #(
|
VX_alu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -70,7 +70,7 @@ module VX_execute #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.cmt_to_csr_if (cmt_to_csr_if),
|
.cmt_to_csr_if (cmt_to_csr_if),
|
||||||
.csr_to_issue_if(csr_to_issue_if),
|
.csr_to_fpu_if (csr_to_fpu_if),
|
||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
@@ -104,7 +104,8 @@ module VX_execute #(
|
|||||||
) fpu_unit (
|
) fpu_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.fpu_req_if (fpu_req_if),
|
.fpu_req_if (fpu_req_if),
|
||||||
|
.csr_to_fpu_if (csr_to_fpu_if),
|
||||||
.fpu_commit_if (fpu_commit_if)
|
.fpu_commit_if (fpu_commit_if)
|
||||||
);
|
);
|
||||||
`else
|
`else
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_fpu_req_if fpu_req_if,
|
VX_fpu_req_if fpu_req_if,
|
||||||
|
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_fpu_to_cmt_if fpu_commit_if
|
VX_fpu_to_cmt_if fpu_commit_if
|
||||||
@@ -56,6 +57,10 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
wire valid_in = fpu_req_if.valid && ~fpuq_full;
|
wire valid_in = fpu_req_if.valid && ~fpuq_full;
|
||||||
|
|
||||||
|
// resolve dynamic FRM
|
||||||
|
assign csr_to_fpu_if.wid = fpu_req_if.wid;
|
||||||
|
wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.op_mod;
|
||||||
|
|
||||||
`ifdef FPU_FAST
|
`ifdef FPU_FAST
|
||||||
|
|
||||||
VX_fp_fpga #(
|
VX_fp_fpga #(
|
||||||
@@ -70,7 +75,7 @@ module VX_fpu_unit #(
|
|||||||
.tag_in (tag_in),
|
.tag_in (tag_in),
|
||||||
|
|
||||||
.op_type (fpu_req_if.op_type),
|
.op_type (fpu_req_if.op_type),
|
||||||
.frm (fpu_req_if.frm),
|
.frm (fpu_frm),
|
||||||
|
|
||||||
.dataa (fpu_req_if.rs1_data),
|
.dataa (fpu_req_if.rs1_data),
|
||||||
.datab (fpu_req_if.rs2_data),
|
.datab (fpu_req_if.rs2_data),
|
||||||
@@ -104,7 +109,7 @@ module VX_fpu_unit #(
|
|||||||
.tag_in (tag_in),
|
.tag_in (tag_in),
|
||||||
|
|
||||||
.op_type (fpu_req_if.op_type),
|
.op_type (fpu_req_if.op_type),
|
||||||
.frm (fpu_req_if.frm),
|
.frm (fpu_frm),
|
||||||
|
|
||||||
.dataa (fpu_req_if.rs1_data),
|
.dataa (fpu_req_if.rs1_data),
|
||||||
.datab (fpu_req_if.rs2_data),
|
.datab (fpu_req_if.rs2_data),
|
||||||
|
|||||||
@@ -40,27 +40,25 @@ module VX_gpr_bypass #(
|
|||||||
buffer2 <= 0;
|
buffer2 <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
delayed_push <= push;
|
delayed_push <= push;
|
||||||
assert(!use_buffer2 || use_buffer);
|
assert(!use_buffer2 || use_buffer);
|
||||||
if (pop) begin
|
if (pop) begin
|
||||||
if (use_buffer) begin
|
buffer <= buffer2;
|
||||||
buffer <= buffer2;
|
use_buffer <= use_buffer2;
|
||||||
use_buffer <= use_buffer2;
|
use_buffer2 <= 0;
|
||||||
use_buffer2 <= 0;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
if (delayed_push) begin
|
if (delayed_push) begin
|
||||||
if (use_buffer) begin
|
if (use_buffer) begin
|
||||||
assert(!use_buffer2); // queue full!
|
assert(!use_buffer2); // full!
|
||||||
|
use_buffer <= 1;
|
||||||
if (pop) begin
|
if (pop) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
end else begin
|
end else begin
|
||||||
buffer2 <= data_in;
|
buffer2 <= data_in;
|
||||||
use_buffer2 <= 1;
|
use_buffer2 <= 1;
|
||||||
end
|
end
|
||||||
use_buffer <= 1;
|
|
||||||
end else if (!pop) begin
|
end else if (!pop) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
use_buffer <= 1;
|
use_buffer <= 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ module VX_instr_demux (
|
|||||||
// inputs
|
// inputs
|
||||||
VX_decode_if execute_if,
|
VX_decode_if execute_if,
|
||||||
VX_gpr_rsp_if gpr_rsp_if,
|
VX_gpr_rsp_if gpr_rsp_if,
|
||||||
VX_csr_to_issue_if csr_to_issue_if,
|
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
@@ -34,78 +33,47 @@ module VX_instr_demux (
|
|||||||
wire alu_req_ready;
|
wire alu_req_ready;
|
||||||
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
|
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_opd_collect #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS)
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS),
|
||||||
) alu_reg (
|
.OPDSW (2 * `NUM_THREADS * 32),
|
||||||
|
.PASSTHRU (1) // ALU has no backpressure
|
||||||
|
) alu_opc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.ready_in (alu_req_ready),
|
.ready_in (alu_req_ready),
|
||||||
.valid_in (alu_req_valid),
|
.valid_in (alu_req_valid),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
|
||||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}),
|
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||||
|
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||||
.ready_out (alu_req_if.ready),
|
.ready_out (alu_req_if.ready),
|
||||||
.valid_out (alu_req_if.valid)
|
.valid_out (alu_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
|
||||||
.DATAW (2 * `NUM_THREADS * 32),
|
|
||||||
.PASSTHRU (1) // ALU has no back-pressure, bypass not needed
|
|
||||||
) alu_bypass (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (alu_req_valid && alu_req_ready),
|
|
||||||
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
|
||||||
.data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
|
||||||
.pop (alu_req_if.valid && alu_req_if.ready)
|
|
||||||
);
|
|
||||||
|
|
||||||
// lsu unit
|
// lsu unit
|
||||||
|
|
||||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||||
wire lsu_req_ready;
|
wire lsu_req_ready;
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_opd_collect #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1)
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1),
|
||||||
) lsu_reg (
|
.OPDSW (2 * `NUM_THREADS * 32)
|
||||||
|
) lsu_opc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.ready_in (lsu_req_ready),
|
.ready_in (lsu_req_ready),
|
||||||
.valid_in (lsu_req_valid),
|
.valid_in (lsu_req_valid),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_RW(execute_if.op_type), `LSU_BE(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb}),
|
||||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}),
|
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||||
|
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||||
.ready_out (lsu_req_if.ready),
|
.ready_out (lsu_req_if.ready),
|
||||||
.valid_out (lsu_req_if.valid)
|
.valid_out (lsu_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
|
||||||
.DATAW ((2 * `NUM_THREADS * 32))
|
|
||||||
) lsu_bypass (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (lsu_req_valid && lsu_req_ready),
|
|
||||||
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
|
||||||
.data_out ({lsu_req_if.base_addr, lsu_req_if.store_data}),
|
|
||||||
.pop (lsu_req_if.valid && lsu_req_if.ready)
|
|
||||||
);
|
|
||||||
|
|
||||||
// csr unit
|
// csr unit
|
||||||
|
|
||||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||||
wire csr_req_ready;
|
wire csr_req_ready;
|
||||||
|
|
||||||
VX_skid_buffer #(
|
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1)
|
|
||||||
) csr_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.ready_in (csr_req_ready),
|
|
||||||
.valid_in (csr_req_valid),
|
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
|
|
||||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}),
|
|
||||||
.ready_out (csr_req_if.ready),
|
|
||||||
.valid_out (csr_req_if.valid)
|
|
||||||
);
|
|
||||||
|
|
||||||
reg tmp_rs2_is_imm;
|
reg tmp_rs2_is_imm;
|
||||||
reg [`NR_BITS-1:0] tmp_rs1;
|
reg [`NR_BITS-1:0] tmp_rs1;
|
||||||
|
|
||||||
@@ -116,15 +84,19 @@ module VX_instr_demux (
|
|||||||
|
|
||||||
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
|
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_rsp_if.rs1_data[0];
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
VX_opd_collect #(
|
||||||
.DATAW (32)
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1),
|
||||||
) csr_bypass (
|
.OPDSW (32)
|
||||||
|
) csr_opc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.push (csr_req_valid && csr_req_ready),
|
.ready_in (csr_req_ready),
|
||||||
.data_in (csr_req_mask),
|
.valid_in (csr_req_valid),
|
||||||
.data_out (csr_req_if.csr_mask),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
|
||||||
.pop (csr_req_if.valid && csr_req_if.ready)
|
.opds_in ({csr_req_mask}),
|
||||||
|
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io, csr_req_if.csr_mask}),
|
||||||
|
.ready_out (csr_req_if.ready),
|
||||||
|
.valid_out (csr_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
// mul unit
|
// mul unit
|
||||||
@@ -133,29 +105,20 @@ module VX_instr_demux (
|
|||||||
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
||||||
wire mul_req_ready;
|
wire mul_req_ready;
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_opd_collect #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1)
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1),
|
||||||
) mul_reg (
|
.OPDSW (2 * `NUM_THREADS * 32)
|
||||||
|
) mul_opc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.ready_in (mul_req_ready),
|
.ready_in (mul_req_ready),
|
||||||
.valid_in (mul_req_valid),
|
.valid_in (mul_req_valid),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
||||||
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb}),
|
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||||
|
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
||||||
.ready_out (mul_req_if.ready),
|
.ready_out (mul_req_if.ready),
|
||||||
.valid_out (mul_req_if.valid)
|
.valid_out (mul_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
|
||||||
.DATAW ((2 * `NUM_THREADS * 32))
|
|
||||||
) mul_bypass (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (mul_req_valid && mul_req_ready),
|
|
||||||
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
|
||||||
.data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
|
||||||
.pop (mul_req_if.valid && mul_req_if.ready)
|
|
||||||
);
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// fpu unit
|
// fpu unit
|
||||||
@@ -164,33 +127,20 @@ module VX_instr_demux (
|
|||||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||||
wire fpu_req_ready;
|
wire fpu_req_ready;
|
||||||
|
|
||||||
// resolve dynamic FRM
|
VX_opd_collect #(
|
||||||
assign csr_to_issue_if.wid = execute_if.wid;
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1),
|
||||||
wire [`FRM_BITS-1:0] fpu_frm = (execute_if.op_mod == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.op_mod;
|
.OPDSW (3 * `NUM_THREADS * 32)
|
||||||
|
) fpu_opc (
|
||||||
VX_skid_buffer #(
|
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `FRM_BITS + `NR_BITS + 1)
|
|
||||||
) fpu_reg (
|
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.ready_in (fpu_req_ready),
|
.ready_in (fpu_req_ready),
|
||||||
.valid_in (fpu_req_valid),
|
.valid_in (fpu_req_valid),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), fpu_frm, execute_if.rd, execute_if.wb}),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb}),
|
||||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}),
|
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||||
|
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||||
.ready_out (fpu_req_if.ready),
|
.ready_out (fpu_req_if.ready),
|
||||||
.valid_out (fpu_req_if.valid)
|
.valid_out (fpu_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
|
||||||
.DATAW ((3 * `NUM_THREADS * 32))
|
|
||||||
) fpu_bypass (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (fpu_req_valid && fpu_req_ready),
|
|
||||||
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
|
||||||
.data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
|
||||||
.pop (fpu_req_if.valid && fpu_req_if.ready)
|
|
||||||
);
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// gpu unit
|
// gpu unit
|
||||||
@@ -198,30 +148,21 @@ module VX_instr_demux (
|
|||||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||||
wire gpu_req_ready;
|
wire gpu_req_ready;
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_opd_collect #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1)
|
.INSTW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1),
|
||||||
) gpu_reg (
|
.OPDSW (`NUM_THREADS * 32 + 32)
|
||||||
|
) gpu_opc (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.ready_in (gpu_req_ready),
|
.ready_in (gpu_req_ready),
|
||||||
.valid_in (gpu_req_valid),
|
.valid_in (gpu_req_valid),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
.inst_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb}),
|
||||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb}),
|
.opds_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||||
|
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||||
.ready_out (gpu_req_if.ready),
|
.ready_out (gpu_req_if.ready),
|
||||||
.valid_out (gpu_req_if.valid)
|
.valid_out (gpu_req_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_gpr_bypass #(
|
|
||||||
.DATAW ((`NUM_THREADS * 32) + 32)
|
|
||||||
) gpu_bypass (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (gpu_req_valid && gpu_req_ready),
|
|
||||||
.data_in ({gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
|
||||||
.data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
|
||||||
.pop (gpu_req_if.valid && gpu_req_if.ready)
|
|
||||||
);
|
|
||||||
|
|
||||||
// can take next request?
|
// can take next request?
|
||||||
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|
||||||
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ module VX_issue #(
|
|||||||
|
|
||||||
VX_decode_if decode_if,
|
VX_decode_if decode_if,
|
||||||
VX_writeback_if writeback_if,
|
VX_writeback_if writeback_if,
|
||||||
VX_csr_to_issue_if csr_to_issue_if,
|
|
||||||
|
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
@@ -71,7 +70,7 @@ module VX_issue #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
`UNUSED_VAR (gpr_rsp_if.valid);
|
`UNUSED_VAR (gpr_rsp_if.valid);
|
||||||
|
|
||||||
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
|
assign execute_if.valid = ibuf_deq_if.valid && gpr_req_if.ready && ~scoreboard_delay;
|
||||||
assign execute_if.wid = ibuf_deq_if.wid;
|
assign execute_if.wid = ibuf_deq_if.wid;
|
||||||
assign execute_if.tmask = ibuf_deq_if.tmask;
|
assign execute_if.tmask = ibuf_deq_if.tmask;
|
||||||
@@ -91,7 +90,6 @@ module VX_issue #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.execute_if (execute_if),
|
.execute_if (execute_if),
|
||||||
.gpr_rsp_if (gpr_rsp_if),
|
.gpr_rsp_if (gpr_rsp_if),
|
||||||
.csr_to_issue_if(csr_to_issue_if),
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
@@ -136,22 +134,22 @@ module VX_issue #(
|
|||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.PC, alu_req_if.tmask, alu_req_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.PC, lsu_req_if.tmask, lsu_req_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||||
end
|
end
|
||||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||||
end
|
end
|
||||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||||
end
|
end
|
||||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.PC, gpu_req_if.tmask, gpu_req_if.rd, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
62
hw/rtl/VX_opd_collect.v
Normal file
62
hw/rtl/VX_opd_collect.v
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
`include "VX_platform.vh"
|
||||||
|
|
||||||
|
module VX_opd_collect #(
|
||||||
|
parameter INSTW = 1,
|
||||||
|
parameter OPDSW = 1,
|
||||||
|
parameter PASSTHRU = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
input wire [INSTW-1:0] inst_in,
|
||||||
|
input wire [OPDSW-1:0] opds_in,
|
||||||
|
output wire [INSTW+OPDSW-1:0] data_out,
|
||||||
|
output wire valid_out,
|
||||||
|
input wire ready_out
|
||||||
|
);
|
||||||
|
wire [INSTW-1:0] inst_out;
|
||||||
|
wire [OPDSW-1:0] opds_out;
|
||||||
|
wire valid_out_tmp, ready_out_tmp;
|
||||||
|
|
||||||
|
VX_skid_buffer #(
|
||||||
|
.DATAW (INSTW)
|
||||||
|
) skid_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (valid_in),
|
||||||
|
.ready_in (ready_in),
|
||||||
|
.data_in (inst_in),
|
||||||
|
.data_out (inst_out),
|
||||||
|
.valid_out (valid_out_tmp),
|
||||||
|
.ready_out (ready_out_tmp)
|
||||||
|
);
|
||||||
|
|
||||||
|
VX_gpr_bypass #(
|
||||||
|
.DATAW (OPDSW),
|
||||||
|
.PASSTHRU (PASSTHRU)
|
||||||
|
) gpr_bypass (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.push (valid_in && ready_in),
|
||||||
|
.pop (valid_out_tmp && ready_out_tmp),
|
||||||
|
.data_in (opds_in),
|
||||||
|
.data_out (opds_out)
|
||||||
|
);
|
||||||
|
|
||||||
|
wire stall_out = valid_out && ~ready_out;
|
||||||
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(1+INSTW+OPDSW)
|
||||||
|
) pipe_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (stall_out),
|
||||||
|
.flush (1'b0),
|
||||||
|
.in ({valid_out_tmp, inst_out, opds_out}),
|
||||||
|
.out ({valid_out, data_out})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign ready_out_tmp = ~stall_out;
|
||||||
|
|
||||||
|
endmodule
|
||||||
@@ -98,7 +98,6 @@ module VX_pipeline #(
|
|||||||
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
||||||
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
||||||
|
|
||||||
VX_csr_to_issue_if csr_to_issue_if();
|
|
||||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||||
VX_decode_if decode_if();
|
VX_decode_if decode_if();
|
||||||
VX_branch_ctl_if branch_ctl_if();
|
VX_branch_ctl_if branch_ctl_if();
|
||||||
@@ -157,7 +156,6 @@ module VX_pipeline #(
|
|||||||
|
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.csr_to_issue_if(csr_to_issue_if),
|
|
||||||
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
@@ -181,7 +179,6 @@ module VX_pipeline #(
|
|||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
|
|
||||||
.csr_to_issue_if(csr_to_issue_if),
|
|
||||||
.cmt_to_csr_if (cmt_to_csr_if),
|
.cmt_to_csr_if (cmt_to_csr_if),
|
||||||
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
`ifndef VX_CSR_TO_ISSUE_IF
|
`ifndef VX_CSR_TO_FPU_IF
|
||||||
`define VX_CSR_TO_ISSUE_IF
|
`define VX_CSR_TO_FPU_IF
|
||||||
|
|
||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
interface VX_csr_to_issue_if ();
|
interface VX_csr_to_fpu_if ();
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] wid;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`FRM_BITS-1:0] frm;
|
wire [`FRM_BITS-1:0] frm;
|
||||||
@@ -15,7 +15,7 @@ interface VX_fpu_req_if ();
|
|||||||
wire [`NUM_THREADS-1:0] tmask;
|
wire [`NUM_THREADS-1:0] tmask;
|
||||||
wire [31:0] PC;
|
wire [31:0] PC;
|
||||||
wire [`FPU_BITS-1:0] op_type;
|
wire [`FPU_BITS-1:0] op_type;
|
||||||
wire [`FRM_BITS-1:0] frm;
|
wire [`MOD_BITS-1:0] op_mod;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||||
|
|||||||
@@ -17,24 +17,26 @@ module VX_skid_buffer #(
|
|||||||
reg valid_out_r;
|
reg valid_out_r;
|
||||||
reg use_buffer;
|
reg use_buffer;
|
||||||
|
|
||||||
|
wire push = valid_in && ready_in;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
data_out_r <= 0;
|
data_out_r <= 0;
|
||||||
buffer <= 0;
|
buffer <= 0;
|
||||||
use_buffer <= 0;
|
use_buffer <= 0;
|
||||||
valid_out_r <= 0;
|
valid_out_r <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
if (valid_in && ready_in && valid_out && !ready_out) begin
|
|
||||||
assert(!use_buffer);
|
|
||||||
use_buffer <= 1;
|
|
||||||
end
|
|
||||||
if (ready_out) begin
|
if (ready_out) begin
|
||||||
use_buffer <= 0;
|
use_buffer <= 0;
|
||||||
end
|
end
|
||||||
if (valid_in && ready_in) begin
|
if (push) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
|
if (valid_out_r && !ready_out) begin
|
||||||
|
assert(!use_buffer);
|
||||||
|
use_buffer <= 1;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
if (!valid_out || ready_out) begin
|
if (!valid_out_r || ready_out) begin
|
||||||
valid_out_r <= valid_in || use_buffer;
|
valid_out_r <= valid_in || use_buffer;
|
||||||
data_out_r <= use_buffer ? buffer : data_in;
|
data_out_r <= use_buffer ? buffer : data_in;
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user