342 lines
13 KiB
Systemverilog
342 lines
13 KiB
Systemverilog
// Copyright © 2019-2023
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
`include "VX_define.vh"
|
|
|
|
module VX_muldiv_unit #(
|
|
parameter CORE_ID = 0,
|
|
parameter NUM_LANES = 1
|
|
) (
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
// Inputs
|
|
VX_execute_if.slave execute_if,
|
|
|
|
// Outputs
|
|
VX_commit_if.master commit_if
|
|
);
|
|
`UNUSED_PARAM (CORE_ID)
|
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
|
localparam PID_WIDTH = `UP(PID_BITS);
|
|
localparam TAGW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
|
|
|
`UNUSED_VAR (execute_if.data.rs3_data)
|
|
|
|
wire [`INST_M_BITS-1:0] muldiv_op = `INST_M_BITS'(execute_if.data.op_type);
|
|
|
|
wire is_mulx_op = `INST_M_IS_MULX(muldiv_op);
|
|
wire is_signed_op = `INST_M_SIGNED(muldiv_op);
|
|
`ifdef XLEN_64
|
|
wire is_alu_w = `INST_ALU_IS_W(execute_if.data.op_mod);
|
|
`else
|
|
wire is_alu_w = 0;
|
|
`endif
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_out;
|
|
wire [`UUID_WIDTH-1:0] mul_uuid_out;
|
|
wire [`NW_WIDTH-1:0] mul_wid_out;
|
|
wire [NUM_LANES-1:0] mul_tmask_out;
|
|
wire [`XLEN-1:0] mul_PC_out;
|
|
wire [`NR_BITS-1:0] mul_rd_out;
|
|
wire mul_wb_out;
|
|
wire [PID_WIDTH-1:0] mul_pid_out;
|
|
wire mul_sop_out, mul_eop_out;
|
|
|
|
wire mul_valid_in = execute_if.valid && is_mulx_op;
|
|
wire mul_ready_in;
|
|
wire mul_valid_out;
|
|
wire mul_ready_out;
|
|
|
|
wire is_mulh_in = `INST_M_IS_MULH(muldiv_op);
|
|
wire is_signed_mul_a = `INST_M_SIGNED_A(muldiv_op);
|
|
wire is_signed_mul_b = is_signed_op;
|
|
|
|
`ifdef IMUL_DPI
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] mul_result_tmp;
|
|
|
|
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
wire [`XLEN-1:0] mul_resultl, mul_resulth;
|
|
wire [`XLEN-1:0] mul_in1 = is_alu_w ? (execute_if.data.rs1_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs1_data[i];
|
|
wire [`XLEN-1:0] mul_in2 = is_alu_w ? (execute_if.data.rs2_data[i] & `XLEN'hFFFFFFFF) : execute_if.data.rs2_data[i];
|
|
always @(*) begin
|
|
dpi_imul (mul_fire_in, is_signed_mul_a, is_signed_mul_b, mul_in1, mul_in2, mul_resultl, mul_resulth);
|
|
end
|
|
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : (is_alu_w ? `XLEN'($signed(mul_resultl[31:0])) : mul_resultl);
|
|
end
|
|
|
|
VX_shift_register #(
|
|
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
|
.DEPTH (`LATENCY_IMUL),
|
|
.RESETW (1)
|
|
) mul_shift_reg (
|
|
.clk(clk),
|
|
.reset (reset),
|
|
.enable (mul_ready_in),
|
|
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),
|
|
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out})
|
|
);
|
|
|
|
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
|
|
|
`else
|
|
|
|
wire [NUM_LANES-1:0][2*(`XLEN+1)-1:0] mul_result_tmp;
|
|
wire is_mulh_out;
|
|
wire is_mul_w_out;
|
|
|
|
`ifdef XLEN_64
|
|
|
|
wire [NUM_LANES-1:0][`XLEN:0] mul_in1;
|
|
wire [NUM_LANES-1:0][`XLEN:0] mul_in2;
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]} : {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
|
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]} : {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
|
end
|
|
|
|
wire mul_strode;
|
|
wire mul_busy;
|
|
|
|
VX_elastic_adapter mul_elastic_adapter (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (mul_valid_in),
|
|
.ready_in (mul_ready_in),
|
|
.valid_out (mul_valid_out),
|
|
.ready_out (mul_ready_out),
|
|
.strobe (mul_strode),
|
|
.busy (mul_busy)
|
|
);
|
|
|
|
VX_serial_mul #(
|
|
.A_WIDTH (`XLEN+1),
|
|
.LANES (NUM_LANES),
|
|
.SIGNED (1)
|
|
) serial_mul (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
|
|
.strobe (mul_strode),
|
|
.busy (mul_busy),
|
|
|
|
.dataa (mul_in1),
|
|
.datab (mul_in2),
|
|
.result (mul_result_tmp)
|
|
);
|
|
|
|
reg [TAGW+2-1:0] mul_tag_r;
|
|
always @(posedge clk) begin
|
|
if (mul_valid_in && mul_ready_in) begin
|
|
mul_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_mulh_in, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
|
end
|
|
end
|
|
|
|
assign {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out, mul_pid_out, mul_sop_out, mul_eop_out} = mul_tag_r;
|
|
|
|
`else
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
wire [`XLEN:0] mul_in1 = {is_signed_mul_a && execute_if.data.rs1_data[i][`XLEN-1], execute_if.data.rs1_data[i]};
|
|
wire [`XLEN:0] mul_in2 = {is_signed_mul_b && execute_if.data.rs2_data[i][`XLEN-1], execute_if.data.rs2_data[i]};
|
|
|
|
VX_multiplier #(
|
|
.A_WIDTH (`XLEN+1),
|
|
.B_WIDTH (`XLEN+1),
|
|
.R_WIDTH (2*(`XLEN+1)),
|
|
.SIGNED (1),
|
|
.LATENCY (`LATENCY_IMUL)
|
|
) multiplier (
|
|
.clk (clk),
|
|
.enable (mul_ready_in),
|
|
.dataa (mul_in1),
|
|
.datab (mul_in2),
|
|
.result (mul_result_tmp[i])
|
|
);
|
|
end
|
|
|
|
VX_shift_register #(
|
|
.DATAW (1 + TAGW + 1 + 1),
|
|
.DEPTH (`LATENCY_IMUL),
|
|
.RESETW (1)
|
|
) mul_shift_reg (
|
|
.clk(clk),
|
|
.reset (reset),
|
|
.enable (mul_ready_in),
|
|
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, is_mulh_in, is_alu_w}),
|
|
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, is_mulh_out, is_mul_w_out})
|
|
);
|
|
|
|
assign mul_ready_in = mul_ready_out || ~mul_valid_out;
|
|
|
|
`endif
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
`ifdef XLEN_64
|
|
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
|
(is_mul_w_out ? `XLEN'($signed(mul_result_tmp[i][31:0])) :
|
|
mul_result_tmp[i][`XLEN-1:0]);
|
|
`else
|
|
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] : mul_result_tmp[i][`XLEN-1:0];
|
|
`UNUSED_VAR (is_mul_w_out)
|
|
`endif
|
|
end
|
|
|
|
`endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_out;
|
|
wire [`UUID_WIDTH-1:0] div_uuid_out;
|
|
wire [`NW_WIDTH-1:0] div_wid_out;
|
|
wire [NUM_LANES-1:0] div_tmask_out;
|
|
wire [`XLEN-1:0] div_PC_out;
|
|
wire [`NR_BITS-1:0] div_rd_out;
|
|
wire div_wb_out;
|
|
wire [PID_WIDTH-1:0] div_pid_out;
|
|
wire div_sop_out, div_eop_out;
|
|
|
|
wire is_rem_op = `INST_M_IS_REM(muldiv_op);
|
|
|
|
wire div_valid_in = execute_if.valid && ~is_mulx_op;
|
|
wire div_ready_in;
|
|
wire div_valid_out;
|
|
wire div_ready_out;
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] div_in1;
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] div_in2;
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
`ifdef XLEN_64
|
|
assign div_in1[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs1_data[i][31]}}, execute_if.data.rs1_data[i][31:0]}: execute_if.data.rs1_data[i];
|
|
assign div_in2[i] = is_alu_w ? {{(`XLEN-32){is_signed_op && execute_if.data.rs2_data[i][31]}}, execute_if.data.rs2_data[i][31:0]}: execute_if.data.rs2_data[i];
|
|
`else
|
|
assign div_in1[i] = execute_if.data.rs1_data[i];
|
|
assign div_in2[i] = execute_if.data.rs2_data[i];
|
|
`endif
|
|
end
|
|
|
|
`ifdef IDIV_DPI
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] div_result_in;
|
|
wire div_fire_in = div_valid_in && div_ready_in;
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
wire [`XLEN-1:0] div_quotient, div_remainder;
|
|
always @(*) begin
|
|
dpi_idiv (div_fire_in, is_signed_op, div_in1[i], div_in2[i], div_quotient, div_remainder);
|
|
end
|
|
assign div_result_in[i] = is_rem_op ? (is_alu_w ? `XLEN'($signed(div_remainder[31:0])) : div_remainder) :
|
|
(is_alu_w ? `XLEN'($signed(div_quotient[31:0])) : div_quotient);
|
|
end
|
|
|
|
VX_shift_register #(
|
|
.DATAW (1 + TAGW + (NUM_LANES * `XLEN)),
|
|
.DEPTH (`LATENCY_IMUL),
|
|
.RESETW (1)
|
|
) div_shift_reg (
|
|
.clk(clk),
|
|
.reset (reset),
|
|
.enable (div_ready_in),
|
|
.data_in ({div_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, div_result_in}),
|
|
.data_out ({div_valid_out, div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out})
|
|
);
|
|
|
|
assign div_ready_in = div_ready_out || ~div_valid_out;
|
|
|
|
`else
|
|
|
|
wire [NUM_LANES-1:0][`XLEN-1:0] div_quotient, div_remainder;
|
|
wire is_rem_op_out;
|
|
wire is_div_w_out;
|
|
wire div_strode;
|
|
wire div_busy;
|
|
|
|
VX_elastic_adapter div_elastic_adapter (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (div_valid_in),
|
|
.ready_in (div_ready_in),
|
|
.valid_out (div_valid_out),
|
|
.ready_out (div_ready_out),
|
|
.strobe (div_strode),
|
|
.busy (div_busy)
|
|
);
|
|
|
|
VX_serial_div #(
|
|
.WIDTHN (`XLEN),
|
|
.WIDTHD (`XLEN),
|
|
.WIDTHQ (`XLEN),
|
|
.WIDTHR (`XLEN),
|
|
.LANES (NUM_LANES)
|
|
) serial_div (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
|
|
.strobe (div_strode),
|
|
.busy (div_busy),
|
|
|
|
.is_signed (is_signed_op),
|
|
.numer (div_in1),
|
|
.denom (div_in2),
|
|
|
|
.quotient (div_quotient),
|
|
.remainder (div_remainder)
|
|
);
|
|
|
|
reg [TAGW+2-1:0] div_tag_r;
|
|
always @(posedge clk) begin
|
|
if (div_valid_in && div_ready_in) begin
|
|
div_tag_r <= {execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, is_rem_op, is_alu_w, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop};
|
|
end
|
|
end
|
|
|
|
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out, div_pid_out, div_sop_out, div_eop_out} = div_tag_r;
|
|
|
|
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
|
`ifdef XLEN_64
|
|
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
|
(is_div_w_out ? `XLEN'($signed(div_quotient[i][31:0])) : div_quotient[i]);
|
|
`else
|
|
assign div_result_out[i] = is_rem_op_out ? div_remainder[i] : div_quotient[i];
|
|
`UNUSED_VAR (is_div_w_out)
|
|
`endif
|
|
end
|
|
|
|
`endif
|
|
|
|
// can accept new request?
|
|
assign execute_if.ready = is_mulx_op ? mul_ready_in : div_ready_in;
|
|
|
|
VX_stream_arb #(
|
|
.NUM_INPUTS (2),
|
|
.DATAW (TAGW + (NUM_LANES * `XLEN)),
|
|
.OUT_REG (1)
|
|
) rsp_buf (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in ({div_valid_out, mul_valid_out}),
|
|
.ready_in ({div_ready_out, mul_ready_out}),
|
|
.data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out},
|
|
{mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}),
|
|
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}),
|
|
.valid_out (commit_if.valid),
|
|
.ready_out (commit_if.ready),
|
|
`UNUSED_PIN (sel_out)
|
|
);
|
|
|
|
endmodule
|