decode op_mod optimization
This commit is contained in:
@@ -12,8 +12,8 @@ module VX_fp_fpga #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
@@ -34,7 +34,7 @@ module VX_fp_fpga #(
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
reg [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
|
||||
wire fpnew_has_fflags;
|
||||
@@ -46,7 +46,7 @@ module VX_fp_fpga #(
|
||||
always @(*) begin
|
||||
core_select = 0;
|
||||
fmadd_negate = 0;
|
||||
case (op)
|
||||
case (op_type)
|
||||
`FPU_ADD: core_select = 1;
|
||||
`FPU_SUB: core_select = 2;
|
||||
`FPU_MUL: core_select = 3;
|
||||
@@ -73,8 +73,8 @@ module VX_fp_fpga #(
|
||||
.valid_in (valid_in && (core_select == 0)),
|
||||
.ready_in (per_core_ready_in[0]),
|
||||
.tag_in (tag_in),
|
||||
.op (op),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (op_mod),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (per_core_result[0]),
|
||||
@@ -271,26 +271,34 @@ module VX_fp_fpga #(
|
||||
.valid_out (per_core_valid_out[11])
|
||||
);
|
||||
|
||||
wire [FPC_BITS-1:0] fp_index;
|
||||
wire fp_valid;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N(NUM_FPC)
|
||||
) wb_select (
|
||||
.data_in (per_core_valid_out),
|
||||
.data_out (fp_index),
|
||||
.valid_out (fp_valid)
|
||||
);
|
||||
reg valid_out_r;
|
||||
reg has_fflags_r;
|
||||
reg [`NUM_THREADS-1:0][31:0] result_r;
|
||||
reg [TAGW-1:0] tag_out_r;
|
||||
|
||||
for (genvar i = 0; i < NUM_FPC; i++) begin
|
||||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||
always @(*) begin
|
||||
per_core_ready_out = 0;
|
||||
valid_out_r = 0;
|
||||
has_fflags_r = 0;
|
||||
result_r = 'x;
|
||||
tag_out_r = 'x;
|
||||
for (integer i = 0; i < NUM_FPC; i++) begin
|
||||
if (per_core_valid_out[i]) begin
|
||||
per_core_ready_out[i] = 1;
|
||||
valid_out_r = i;
|
||||
has_fflags_r = fpnew_has_fflags && (i == 0);
|
||||
result_r = per_core_result[i];
|
||||
tag_out_r = per_core_tag_out[i];
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = (& per_core_ready_in);
|
||||
assign valid_out = fp_valid;
|
||||
assign tag_out = per_core_tag_out[fp_index];
|
||||
assign result = per_core_result[fp_index];
|
||||
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
assign valid_out = valid_out_r;
|
||||
assign has_fflags = has_fflags_r;
|
||||
assign tag_out = tag_out_r;
|
||||
assign result = result_r;
|
||||
assign fflags = fpnew_fflags;
|
||||
|
||||
endmodule
|
||||
@@ -12,7 +12,7 @@ module VX_fp_noncomp #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
@@ -38,7 +38,7 @@ module VX_fp_noncomp #(
|
||||
SIG_NAN = 32'h00000100,
|
||||
QUT_NAN = 32'h00000200;
|
||||
|
||||
reg [`FPU_BITS-1:0] op_r;
|
||||
reg [`FPU_BITS-1:0] op_type_r;
|
||||
reg [`FRM_BITS-1:0] frm_r;
|
||||
|
||||
reg [LANES-1:0][31:0] dataa_r;
|
||||
@@ -103,10 +103,10 @@ module VX_fp_noncomp #(
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (~stall) begin
|
||||
op_r <= op;
|
||||
frm_r <= frm;
|
||||
dataa_r <= dataa;
|
||||
datab_r <= datab;
|
||||
op_type_r <= op_type;
|
||||
frm_r <= frm;
|
||||
dataa_r <= dataa;
|
||||
datab_r <= datab;
|
||||
end
|
||||
end
|
||||
|
||||
@@ -144,10 +144,10 @@ module VX_fp_noncomp #(
|
||||
else if (b_type[i].is_nan)
|
||||
fminmax_res[i] = dataa_r[i];
|
||||
else begin
|
||||
case (op_r) // use LSB to distinguish MIN and MAX
|
||||
`FPU_MIN: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
||||
`FPU_MAX: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
||||
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
||||
case (frm_r) // use LSB to distinguish MIN and MAX
|
||||
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
|
||||
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
|
||||
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
|
||||
endcase
|
||||
end
|
||||
end
|
||||
@@ -156,11 +156,11 @@ module VX_fp_noncomp #(
|
||||
// Sign Injection
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (op_r)
|
||||
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
`FPU_SGNJN: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
`FPU_SGNJX: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
||||
case (frm_r)
|
||||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
||||
endcase
|
||||
end
|
||||
end
|
||||
@@ -210,56 +210,45 @@ module VX_fp_noncomp #(
|
||||
|
||||
// outputs
|
||||
|
||||
reg tmp_valid;
|
||||
reg tmp_has_fflags;
|
||||
fflags_t [LANES-1:0] tmp_fflags;
|
||||
reg [LANES-1:0][31:0] tmp_result;
|
||||
|
||||
always @(*) begin
|
||||
case (op_r)
|
||||
`FPU_SGNJ: tmp_has_fflags = 0;
|
||||
`FPU_SGNJN: tmp_has_fflags = 0;
|
||||
`FPU_SGNJX: tmp_has_fflags = 0;
|
||||
`FPU_MVXW: tmp_has_fflags = 0;
|
||||
`FPU_MVWX: tmp_has_fflags = 0;
|
||||
`FPU_CLASS: tmp_has_fflags = 0;
|
||||
default: tmp_has_fflags = 1;
|
||||
endcase
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
tmp_valid = 1'b1;
|
||||
case (op_r)
|
||||
tmp_result[i] = 32'hdeadbeaf;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
case (op_type_r)
|
||||
`FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MVXW,`FPU_MVWX: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MIN,`FPU_MAX: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
end
|
||||
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
|
||||
tmp_result[i] = fsgnj_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
|
||||
end
|
||||
default: begin
|
||||
tmp_result[i] = 32'hdeadbeaf;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
tmp_valid = 1'b0;
|
||||
end
|
||||
end
|
||||
`FPU_MISC: begin
|
||||
case (frm)
|
||||
0,1,2: begin
|
||||
tmp_result[i] = fsgnj_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
3,4: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
end
|
||||
5,6: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
wire tmp_has_fflags = ((op_type_r == `FPU_MISC) && (frm == 3 || frm == 4)) // MIN/MAX
|
||||
|| (op_type_r == `FPU_CMP); // CMP
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
||||
) nc_reg (
|
||||
@@ -267,7 +256,7 @@ module VX_fp_noncomp #(
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ module VX_fpnew #(
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
input wire [`FPU_BITS-1:0] op_type,
|
||||
input wire [`MOD_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
@@ -91,7 +91,7 @@ module VX_fpnew #(
|
||||
fpu_operands[0] = dataa;
|
||||
fpu_operands[1] = datab;
|
||||
fpu_operands[2] = datac;
|
||||
case (op)
|
||||
case (op_type)
|
||||
`FPU_ADD: begin
|
||||
fpu_op = fpnew_pkg::ADD;
|
||||
fpu_operands[1] = dataa;
|
||||
@@ -110,19 +110,22 @@ module VX_fpnew #(
|
||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
|
||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
|
||||
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
|
||||
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
|
||||
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
|
||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
|
||||
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
|
||||
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
|
||||
`FPU_MISC: begin
|
||||
case (frm)
|
||||
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fpu_has_fflags = 0; end
|
||||
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fpu_has_fflags = 0; end
|
||||
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fpu_has_fflags = 0; end
|
||||
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fpu_has_fflags = 0; end
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user