adding using serial divider to save area cost

This commit is contained in:
Blaise Tine
2020-08-25 02:29:27 -07:00
parent df25bae456
commit ee81e81818
10 changed files with 239 additions and 135 deletions

View File

@@ -15,6 +15,7 @@ module VX_mul_unit #(
localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE);
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
wire is_div_op = `IS_DIV_OP(alu_op);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
@@ -81,7 +82,7 @@ module VX_mul_unit #(
wire [MULQ_BITS-1:0] mul_tag;
wire mul_valid_out;
wire mul_fire = mul_req_if.valid && mul_req_if.ready && ~`IS_DIV_OP(alu_op);
wire mul_fire = mul_req_if.valid && mul_req_if.ready && !is_div_op;
VX_shift_register #(
.DATAW(1 + MULQ_BITS + 1),
@@ -96,88 +97,50 @@ module VX_mul_unit #(
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result;
wire is_div = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
wire is_signed_div = (alu_op == `MUL_DIV || alu_op == `MUL_REM);
reg [`NUM_THREADS-1:0] is_div_qual;
wire is_div_out;
wire stall_div;
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
reg [31:0] div_in1_qual, div_in2_qual;
reg [32:0] div_in1, div_in2;
wire [31:0] div_result_tmp, rem_result_tmp;
// handle divide by zero
always @(*) begin
is_div_qual[i] = is_div;
div_in1_qual = alu_in1[i];
div_in2_qual = alu_in2[i];
if (0 == alu_in2[i]) begin
div_in2_qual = 1;
if (is_div) begin
div_in1_qual = 32'hFFFFFFFF; // quotient = (0xFFFFFFFF / 1)
end else begin
is_div_qual[i] = 1; // remainder = (in1 / 1)
end
end
end
// latch divider inputs
always @(posedge clk) begin
if (~stall_div) begin
div_in1 <= {is_signed_div & alu_in1[i][31], div_in1_qual};
div_in2 <= {is_signed_div & alu_in2[i][31], div_in2_qual};
end
end
VX_divide #(
.WIDTHN(33),
.WIDTHD(33),
.WIDTHQ(32),
.WIDTHR(32),
.NSIGNED(1),
.DSIGNED(1),
.PIPELINE(`LATENCY_IDIV)
) divide (
.clk(clk),
.reset(reset),
.clk_en(~stall_div),
.numer(div_in1),
.denom(div_in2),
.quotient(div_result_tmp),
.remainder(rem_result_tmp)
);
assign div_result[i] = is_div_out ? div_result_tmp : rem_result_tmp;
end
wire [MULQ_BITS-1:0] div_tag;
wire div_valid_out;
wire div_fire = mul_req_if.valid && mul_req_if.ready && `IS_DIV_OP(alu_op);
VX_shift_register #(
.DATAW(1 + MULQ_BITS + 1),
.DEPTH(`LATENCY_IDIV + 1)
) div_shift_reg (
wire is_div_only = (alu_op == `MUL_DIV) || (alu_op == `MUL_DIVU);
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
wire div_valid_in = mul_req_if.valid && is_div_op;
wire div_ready_in;
wire div_ready_out;
wire div_valid_out;
wire is_div_out;
wire [MULQ_BITS-1:0] div_tag;
VX_serial_div #(
.WIDTHN(32),
.WIDTHD(32),
.WIDTHQ(32),
.WIDTHR(32),
.LANES(`NUM_THREADS),
.TAGW(MULQ_BITS + 1)
) divide (
.clk(clk),
.reset(reset),
.enable(~stall_div),
.in({div_fire, tag_in, (| is_div_qual)}),
.out({div_valid_out, div_tag, is_div_out})
.ready_in(div_ready_in),
.valid_in(div_valid_in),
.signed_mode(is_signed_div),
.tag_in({tag_in, is_div_only}),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_tmp),
.remainder(rem_result_tmp),
.ready_out(div_ready_out),
.valid_out(div_valid_out),
.tag_out({div_tag, is_div_out})
);
wire [`NUM_THREADS-1:0][31:0] div_result = is_div_out ? div_result_tmp : rem_result_tmp;
///////////////////////////////////////////////////////////////////////////
wire arbiter_hazard = mul_valid_out && div_valid_out;
assign stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
assign stall_mul = stall_out || mulq_full;
assign stall_div = stall_out || mulq_full
|| arbiter_hazard; // arbitration prioritizes MUL
wire stall_in = stall_mul || stall_div;
assign stall_mul = (stall_out && !is_div_op) || mulq_full;
assign div_ready_out = ~stall_out && ~arbiter_hazard; // arbitration prioritizes MUL
wire stall_in = stall_mul || ~div_ready_in;
assign valid_out = mul_valid_out || div_valid_out;
assign tag_out = mul_valid_out ? mul_tag : div_tag;
@@ -186,7 +149,7 @@ module VX_mul_unit #(
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
) alu_reg (
) mul_reg (
.clk (clk),
.reset (reset),
.stall (stall_out),