decode op_mod optimization

This commit is contained in:
Blaise Tine
2020-08-24 02:55:14 -07:00
parent f292e5003d
commit 57971f6c76
25 changed files with 221 additions and 209 deletions

View File

@@ -12,8 +12,8 @@ module VX_fp_fpga #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -34,7 +34,7 @@ module VX_fp_fpga #(
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
wire [NUM_FPC-1:0] per_core_ready_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire fpnew_has_fflags;
@@ -46,7 +46,7 @@ module VX_fp_fpga #(
always @(*) begin
core_select = 0;
fmadd_negate = 0;
case (op)
case (op_type)
`FPU_ADD: core_select = 1;
`FPU_SUB: core_select = 2;
`FPU_MUL: core_select = 3;
@@ -73,8 +73,8 @@ module VX_fp_fpga #(
.valid_in (valid_in && (core_select == 0)),
.ready_in (per_core_ready_in[0]),
.tag_in (tag_in),
.op (op),
.frm (frm),
.op_type (op_type),
.frm (op_mod),
.dataa (dataa),
.datab (datab),
.result (per_core_result[0]),
@@ -271,26 +271,34 @@ module VX_fp_fpga #(
.valid_out (per_core_valid_out[11])
);
wire [FPC_BITS-1:0] fp_index;
wire fp_valid;
VX_priority_encoder #(
.N(NUM_FPC)
) wb_select (
.data_in (per_core_valid_out),
.data_out (fp_index),
.valid_out (fp_valid)
);
reg valid_out_r;
reg has_fflags_r;
reg [`NUM_THREADS-1:0][31:0] result_r;
reg [TAGW-1:0] tag_out_r;
for (genvar i = 0; i < NUM_FPC; i++) begin
assign per_core_ready_out[i] = ready_out && (i == fp_index);
always @(*) begin
per_core_ready_out = 0;
valid_out_r = 0;
has_fflags_r = 0;
result_r = 'x;
tag_out_r = 'x;
for (integer i = 0; i < NUM_FPC; i++) begin
if (per_core_valid_out[i]) begin
per_core_ready_out[i] = 1;
valid_out_r = i;
has_fflags_r = fpnew_has_fflags && (i == 0);
result_r = per_core_result[i];
tag_out_r = per_core_tag_out[i];
break;
end
end
end
assign ready_in = (& per_core_ready_in);
assign valid_out = fp_valid;
assign tag_out = per_core_tag_out[fp_index];
assign result = per_core_result[fp_index];
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
assign valid_out = valid_out_r;
assign has_fflags = has_fflags_r;
assign tag_out = tag_out_r;
assign result = result_r;
assign fflags = fpnew_fflags;
endmodule

View File

@@ -12,7 +12,7 @@ module VX_fp_noncomp #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FPU_BITS-1:0] op_type,
input wire [`FRM_BITS-1:0] frm,
input wire [LANES-1:0][31:0] dataa,
@@ -38,7 +38,7 @@ module VX_fp_noncomp #(
SIG_NAN = 32'h00000100,
QUT_NAN = 32'h00000200;
reg [`FPU_BITS-1:0] op_r;
reg [`FPU_BITS-1:0] op_type_r;
reg [`FRM_BITS-1:0] frm_r;
reg [LANES-1:0][31:0] dataa_r;
@@ -103,10 +103,10 @@ module VX_fp_noncomp #(
always @(posedge clk) begin
if (~stall) begin
op_r <= op;
frm_r <= frm;
dataa_r <= dataa;
datab_r <= datab;
op_type_r <= op_type;
frm_r <= frm;
dataa_r <= dataa;
datab_r <= datab;
end
end
@@ -144,10 +144,10 @@ module VX_fp_noncomp #(
else if (b_type[i].is_nan)
fminmax_res[i] = dataa_r[i];
else begin
case (op_r) // use LSB to distinguish MIN and MAX
`FPU_MIN: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
`FPU_MAX: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
@@ -156,11 +156,11 @@ module VX_fp_noncomp #(
// Sign Injection
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (op_r)
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
`FPU_SGNJN: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
`FPU_SGNJX: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
case (frm_r)
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
endcase
end
end
@@ -210,56 +210,45 @@ module VX_fp_noncomp #(
// outputs
reg tmp_valid;
reg tmp_has_fflags;
fflags_t [LANES-1:0] tmp_fflags;
reg [LANES-1:0][31:0] tmp_result;
always @(*) begin
case (op_r)
`FPU_SGNJ: tmp_has_fflags = 0;
`FPU_SGNJN: tmp_has_fflags = 0;
`FPU_SGNJX: tmp_has_fflags = 0;
`FPU_MVXW: tmp_has_fflags = 0;
`FPU_MVWX: tmp_has_fflags = 0;
`FPU_CLASS: tmp_has_fflags = 0;
default: tmp_has_fflags = 1;
endcase
end
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
tmp_valid = 1'b1;
case (op_r)
tmp_result[i] = 32'hdeadbeaf;
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
case (op_type_r)
`FPU_CLASS: begin
tmp_result[i] = fclass_mask[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
`FPU_MVXW,`FPU_MVWX: begin
tmp_result[i] = dataa[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
`FPU_MIN,`FPU_MAX: begin
tmp_result[i] = fminmax_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
end
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
end
`FPU_CMP: begin
tmp_result[i] = fcmp_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
end
default: begin
tmp_result[i] = 32'hdeadbeaf;
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
tmp_valid = 1'b0;
end
end
`FPU_MISC: begin
case (frm)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
3,4: begin
tmp_result[i] = fminmax_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
end
5,6: begin
tmp_result[i] = dataa[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
end
endcase
end
endcase
end
end
wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|| (op_type_r == `FPU_CMP); // CMP
VX_generic_register #(
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
) nc_reg (
@@ -267,7 +256,7 @@ module VX_fp_noncomp #(
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({tmp_valid, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
.out ({valid_out, tag_out, result, has_fflags, fflags})
);

View File

@@ -17,8 +17,8 @@ module VX_fpnew #(
input wire [TAGW-1:0] tag_in,
input wire [`FPU_BITS-1:0] op,
input wire [`FRM_BITS-1:0] frm,
input wire [`FPU_BITS-1:0] op_type,
input wire [`MOD_BITS-1:0] frm,
input wire [`NUM_THREADS-1:0][31:0] dataa,
input wire [`NUM_THREADS-1:0][31:0] datab,
@@ -91,7 +91,7 @@ module VX_fpnew #(
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
case (op)
case (op_type)
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
@@ -110,19 +110,22 @@ module VX_fpnew #(
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
endcase
end
default:;
endcase
end