fixed FPU handshake, optimized writeback's critical path
This commit is contained in:
@@ -5,8 +5,8 @@ module VX_fp_fpga (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
@@ -19,7 +19,7 @@ module VX_fp_fpga (
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -29,31 +29,30 @@ module VX_fp_fpga (
|
||||
localparam NUM_FPC = 12;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
|
||||
wire [NUM_FPC-1:0] core_in_ready;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result;
|
||||
wire fpnew_has_fflags;
|
||||
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags;
|
||||
fflags_t fpnew_fflags;
|
||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag;
|
||||
wire [NUM_FPC-1:0] core_out_ready;
|
||||
wire [NUM_FPC-1:0] core_out_valid;
|
||||
|
||||
reg negate_output;
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg fmadd_negate;
|
||||
|
||||
genvar i;
|
||||
|
||||
always @(*) begin
|
||||
core_select = 0;
|
||||
negate_output = 0;
|
||||
core_select = 0;
|
||||
fmadd_negate = 0;
|
||||
case (op)
|
||||
`FPU_ADD: core_select = 1;
|
||||
`FPU_SUB: core_select = 2;
|
||||
`FPU_MUL: core_select = 3;
|
||||
`FPU_MADD: core_select = 4;
|
||||
`FPU_MSUB: core_select = 5;
|
||||
`FPU_NMSUB: begin core_select = 4; negate_output = 1; end
|
||||
`FPU_NMADD: begin core_select = 5; negate_output = 1; end
|
||||
`FPU_NMSUB: begin core_select = 4; fmadd_negate = 1; end
|
||||
`FPU_NMADD: begin core_select = 5; fmadd_negate = 1; end
|
||||
`FPU_DIV: core_select = 6;
|
||||
`FPU_SQRT: core_select = 7;
|
||||
`FPU_CVTWS: core_select = 8;
|
||||
@@ -130,7 +129,7 @@ module VX_fp_fpga (
|
||||
.in_valid (in_valid && (core_select == 4)),
|
||||
.in_ready (core_in_ready[4]),
|
||||
.in_tag (in_tag),
|
||||
.negate (negate_output),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
@@ -146,7 +145,7 @@ module VX_fp_fpga (
|
||||
.in_valid (in_valid && (core_select == 5)),
|
||||
.in_ready (core_in_ready[5]),
|
||||
.in_tag (in_tag),
|
||||
.negate (negate_output),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
@@ -250,10 +249,21 @@ module VX_fp_fpga (
|
||||
assign core_out_ready[i] = out_ready && (i == fp_index);
|
||||
end
|
||||
|
||||
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
assign fflags = fpnew_fflags;
|
||||
assign out_tag = core_out_tag[fp_index];
|
||||
assign result = core_result[fp_index];
|
||||
assign out_valid = fp_valid;
|
||||
wire tmp_valid = fp_valid;
|
||||
wire [`ISTAG_BITS-1:0] tmp_tag = core_out_tag[fp_index];
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_result = core_result[fp_index];
|
||||
wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({out_valid, out_tag, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -17,7 +17,7 @@ module VX_fp_noncomp (
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -178,7 +178,7 @@ module VX_fp_noncomp (
|
||||
|
||||
reg tmp_valid;
|
||||
reg tmp_has_fflags;
|
||||
reg [`NUM_THREADS-1:0][`FFG_BITS-1:0] tmp_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_fflags;
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_result;
|
||||
|
||||
always @(*) begin
|
||||
@@ -199,27 +199,27 @@ module VX_fp_noncomp (
|
||||
case (op)
|
||||
`FPU_CLASS: begin
|
||||
tmp_result[i] = fclass_mask[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MVXW,`FPU_MVWX: begin
|
||||
tmp_result[i] = dataa[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_MIN,`FPU_MAX: begin
|
||||
tmp_result[i] = fminmax_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0};
|
||||
end
|
||||
`FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin
|
||||
tmp_result[i] = fsgnj_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
end
|
||||
`FPU_CMP: begin
|
||||
tmp_result[i] = fcmp_res[i];
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = fcmp_excp[i];
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i];
|
||||
end
|
||||
default: begin
|
||||
tmp_result[i] = 32'hdeadbeaf;
|
||||
{tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0;
|
||||
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
|
||||
tmp_valid = 1'b0;
|
||||
end
|
||||
endcase
|
||||
@@ -230,7 +230,7 @@ module VX_fp_noncomp (
|
||||
assign in_ready = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -11,8 +11,8 @@ module VX_fpnew #(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
|
||||
@@ -25,7 +25,7 @@ module VX_fpnew #(
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
|
||||
@@ -75,7 +75,7 @@ module VX_fpnew #(
|
||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
|
||||
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
|
||||
|
||||
wire is_class_op_i, is_class_op_o;
|
||||
assign is_class_op_i = (op == `FPU_CLASS);
|
||||
@@ -194,7 +194,8 @@ module VX_fpnew #(
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_in_valid = in_valid;
|
||||
assign in_ready = fpu_in_ready;
|
||||
assign in_ready = fpu_in_ready
|
||||
|| ~in_valid; // fix fpnews's in_ready containing in_valid;
|
||||
|
||||
assign fpu_in_tag = in_tag;
|
||||
assign out_tag = fpu_out_tag;
|
||||
@@ -202,14 +203,7 @@ module VX_fpnew #(
|
||||
assign result = fpu_result;
|
||||
|
||||
assign has_fflags = fpu_has_fflags_o;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign fflags[i][`FFG_NX] = fpu_status[i].NX;
|
||||
assign fflags[i][`FFG_UF] = fpu_status[i].UF;
|
||||
assign fflags[i][`FFG_OF] = fpu_status[i].OF;
|
||||
assign fflags[i][`FFG_DZ] = fpu_status[i].DZ;
|
||||
assign fflags[i][`FFG_NV] = fpu_status[i].NV;
|
||||
end
|
||||
assign fflags = fpu_status;
|
||||
|
||||
assign out_valid = fpu_out_valid;
|
||||
assign fpu_out_ready = out_ready;
|
||||
|
||||
Reference in New Issue
Block a user