new fpu implementation

This commit is contained in:
Blaise Tine
2020-07-24 00:00:37 -04:00
parent f83521b7c6
commit 1f63f9da25
30 changed files with 318 additions and 258 deletions

View File

@@ -1,6 +1,4 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_alu_unit #(
parameter CORE_ID = 0

View File

@@ -11,7 +11,7 @@ module VX_commit #(
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_fp_if fpu_commit_if,
VX_commit_if gpu_commit_if,
// outputs

View File

@@ -63,6 +63,10 @@
`define EXT_M_ENABLE 1
`endif
`ifndef EXT_F_ENABLE
`define EXT_F_ENABLE 1
`endif
// Configuration Values =======================================================
`define VENDOR_ID 0
@@ -71,6 +75,10 @@
// CSR Addresses ==============================================================
`define CSR_FFLAGS 12'hF01
`define CSR_FRM 12'hF02
`define CSR_FCSR 12'hF03
`define CSR_VEND_ID 12'hF11
`define CSR_ARCH_ID 12'hF12
`define CSR_IMPL_ID 12'hF13
@@ -91,6 +99,16 @@
`define CSR_MISA 12'h301
// Size of MUL Request Queue Size
`ifndef MULRQ_SIZE
`define MULRQ_SIZE 8
`endif
// Size of FPU Request Queue Size
`ifndef FPURQ_SIZE
`define FPURQ_SIZE 8
`endif
// Dcache Configurable Knobs ==================================================
// Size of cache in bytes
@@ -407,5 +425,4 @@
`define L3PRFQ_STRIDE 0
`endif
// VX_CONFIG
`endif

View File

@@ -4,6 +4,13 @@ module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_from_csr_if fpu_from_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
input wire[`NW_BITS-1:0] warp_num,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
output reg[31:0] read_data,
@@ -12,25 +19,67 @@ module VX_csr_data #(
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
`IGNORE_WARNINGS_END
input wire[`CSR_WIDTH-1:0] write_data,
input wire[`NW_BITS-1:0] warp_num,
VX_perf_cntrs_if perf_cntrs_if
input wire[`CSR_WIDTH-1:0] write_data
);
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
reg [`FFG_BITS+`FRM_BITS-1:0] fflags_table [`NUM_WARPS-1:0];
reg [`FRM_BITS-1:0] frm_table [`NUM_WARPS-1:0];
reg [`FFG_BITS+`FRM_BITS-1:0] fcsr_table [`NUM_WARPS-1:0]; // fflags + frm
// cast address to physical CSR range
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
wire [`FFG_BITS-1:0] fflags_update;
assign fflags_update[4] = fpu_to_csr_if.fflags_NV;
assign fflags_update[3] = fpu_to_csr_if.fflags_DZ;
assign fflags_update[2] = fpu_to_csr_if.fflags_OF;
assign fflags_update[1] = fpu_to_csr_if.fflags_UF;
assign fflags_update[0] = fpu_to_csr_if.fflags_NX;
integer i;
always @(posedge clk) begin
if (write_enable) begin
csr_table[wr_addr] <= write_data;
if (reset) begin
for (i = 0; i < `NUM_WARPS; i++) begin
fflags_table[i] <= 0;
frm_table[i] <= 0;
fcsr_table[i] <= 0;
end
end else begin
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: begin
fcsr_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
end
`CSR_FRM: begin
fcsr_table[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
frm_table[warp_num] <= write_data[`FRM_BITS-1:0];
end
`CSR_FCSR: begin
fcsr_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
frm_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
end
default: begin
csr_table[wr_addr] <= write_data;
end
endcase
end else if (fpu_to_csr_if.valid) begin
fflags_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
fcsr_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
end
end
end
always @(*) begin
case (read_addr)
`CSR_FFLAGS : read_data = 32'(fflags_table[warp_num]);
`CSR_FRM : read_data = 32'(frm_table[warp_num]);
`CSR_FCSR : read_data = 32'(fcsr_table[warp_num]);
`CSR_LWID : read_data = 32'(warp_num);
`CSR_GTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
@@ -50,4 +99,6 @@ module VX_csr_data #(
endcase
end
assign fpu_from_csr_if.frm = frm_table[fpu_from_csr_if.warp_num];
endmodule

View File

@@ -7,6 +7,8 @@ module VX_csr_unit #(
input wire reset,
VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_from_csr_if fpu_from_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_csr_io_req_if csr_io_req_if,
@@ -48,13 +50,16 @@ module VX_csr_unit #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_from_csr_if(fpu_from_csr_if),
.read_addr (csr_pipe_req_if.csr_addr),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_addr_s2),
.warp_num (csr_pipe_req_if.warp_num),
.perf_cntrs_if (perf_cntrs_if)
.warp_num (csr_pipe_req_if.warp_num)
);
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)

View File

@@ -214,14 +214,14 @@ module VX_decode #(
7'h04: fpu_op = `FPU_SUB;
7'h08: fpu_op = `FPU_MUL;
7'h0C: fpu_op = `FPU_DIV;
7'h2C: fpu_op = `FPU_SQRT;
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
7'h2C: fpu_op = `FPU_SQRT;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
7'h78: fpu_op = `FPU_MVWX;
7'h50: fpu_op = `FPU_CMP; // wb to intReg
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
default:;
endcase
end
@@ -284,14 +284,14 @@ module VX_decode #(
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
assign decode_tmp_if.rs1_is_fp = (is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4);
assign decode_tmp_if.rd_is_fp = is_fpu && ~(is_fci && ((func7 == 7'h50) || (func7 == 7'h60) || (func7 == 7'h70)));
assign decode_tmp_if.rs1_is_fp = is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4;
assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
assign decode_tmp_if.rs3 = rs3;
assign decode_tmp_if.use_rs3 = is_fr4;
assign decode_tmp_if.frm = func3;
assign decode_tmp_if.wb = (is_fpu && (is_fl || (is_fci && ((func7 != 7'h50) || (func7 != 7'h70) || (func7 != 7'h60))) || is_fr4))
|| (~is_fpu && (rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
assign decode_tmp_if.wb = is_fpu || ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
assign join_if.warp_num = ifetch_rsp_if.warp_num;

View File

@@ -25,10 +25,14 @@
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on DECLFILENAME */
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
@@ -76,9 +80,14 @@
`define CSR_WIDTH 12
`define DIV_LATENCY 21
`define LATENCY_IDIV 21
`define MUL_LATENCY 2
`define LATENCY_IMUL 2
`define LATENCY_FMULADD 2
`define LATENCY_FDIVSQRT 2
`define LATENCY_FCONV 2
`define LATENCY_FNONCOMP 1
///////////////////////////////////////////////////////////////////////////////
@@ -93,6 +102,15 @@
`define INST_R 7'b0110011
`define INST_F 7'b0001111
`define INST_SYS 7'b1110011
`define INST_FL 7'b0000111
`define INST_FS 7'b0100111
`define INST_FCI 7'b1010011
`define INST_FMADD 7'b1000011
`define INST_FMSUB 7'b1000111
`define INST_FNMSUB 7'b1001011
`define INST_FNMADD 7'b1001111
`define INST_GPU 7'b1101011
`define BYTEEN_SB 3'h0
@@ -150,18 +168,6 @@
`define BR_OP(x) x[`BR_BITS-1:0]
`define IS_BR_OP(x) x[4]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define LSU_LB {1'b0, `BYTEEN_SB}
`define LSU_LH {1'b0, `BYTEEN_SH}
`define LSU_LW {1'b0, `BYTEEN_SW}
@@ -183,6 +189,53 @@
`define CSR_BITS 2
`define CSR_OP(x) x[`CSR_BITS-1:0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
`define MUL_MULHSU 3'h2
`define MUL_MULHU 3'h3
`define MUL_DIV 3'h4
`define MUL_DIVU 3'h5
`define MUL_REM 3'h6
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define FPU_ADD 5'h00
`define FPU_SUB 5'h01
`define FPU_MUL 5'h02
`define FPU_DIV 5'h03
`define FPU_SQRT 5'h04
`define FPU_MADD 5'h05
`define FPU_MSUB 5'h06
`define FPU_NMSUB 5'h07
`define FPU_NMADD 5'h08
`define FPU_SGNJ 5'h09 // FSGNJ
`define FPU_SGNJN 5'h0A // FSGNJN
`define FPU_SGNJX 5'h0B // FSGNJX
`define FPU_MIN 5'h0C // FMIN.S
`define FPU_MAX 5'h0D // FMAX.S
`define FPU_CVTWS 5'h0E // FCVT.W.S
`define FPU_CVTWUS 5'h0F // FCVT.WU.S
`define FPU_CVTSW 5'h10 // FCVT.S.W
`define FPU_CVTSWU 5'h11 // FCVT.S.WU
`define FPU_MVXW 5'h12 // MOV FP from fpReg to integer reg
`define FPU_MVWX 5'h13 // MOV FP from integer reg to fpReg
`define FPU_CLASS 5'h14
`define FPU_CMP 5'h15
`define FPU_OTHER 5'h1f
`define FPU_BITS 5
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define FRM_RNE 3'b000
`define FRM_RTZ 3'b001
`define FRM_RDN 3'b010
`define FRM_RUP 3'b011 // positive inf
`define FRM_RMM 3'b100
`define FRM_DYN 3'b111
`define FRM_BITS 3
`define FFG_BITS 5
`define GPU_TMC 3'h0
`define GPU_WSPAWN 3'h1
`define GPU_SPLIT 3'h2
@@ -194,21 +247,16 @@
`define EX_NOP 3'h0
`define EX_ALU 3'h1
`define EX_MUL 3'h2
`define EX_LSU 3'h3
`define EX_CSR 3'h4
`define EX_GPU 3'h5
`define EX_LSU 3'h2
`define EX_CSR 3'h3
`define EX_MUL 3'h4
`define EX_FPU 3'h5
`define EX_GPU 3'h6
`define EX_BITS 3
`define NUM_EXS 5
`define NUM_EXS 6
`define NE_BITS `LOG2UP(`NUM_EXS)
`define WB_NO 2'h0
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define WB_BITS 2
///////////////////////////////////////////////////////////////////////////////
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
@@ -216,14 +264,14 @@
| (0 << 2) // C - Compressed extension \
| (0 << 3) // D - Double precsision floating-point extension \
| (0 << 4) // E - RV32E base ISA \
| (0 << 5) // F - Single precsision floating-point extension \
| (`EXT_F_ENABLE << 5) // F - Single precsision floating-point extension \
| (0 << 6) // G - Additional standard extensions present \
| (0 << 7) // H - Hypervisor mode implemented \
| (1 << 8) // I - RV32I/64I/128I base ISA \
| (0 << 9) // J - Reserved \
| (0 << 10) // K - Reserved \
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
| (1 << 12) // M - Integer Multiply/Divide extension \
| (`EXT_M_ENABLE << 12) // M - Integer Multiply/Divide extension \
| (0 << 13) // N - User level interrupts supported \
| (0 << 14) // O - Reserved \
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
@@ -241,7 +289,7 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + `WB_BITS + `NR_BITS + `NW_BITS)
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
`else
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
`endif
@@ -492,16 +540,4 @@ task print_instr_op;
end
endtask
task print_wb;
input [`WB_BITS-1:0] wb;
begin
case (wb)
`WB_ALU: $write("ALU");
`WB_MEM: $write("MEM");
`WB_JAL: $write("JAL");
default: $write("NO");
endcase
end
endtask
`endif

View File

@@ -35,7 +35,7 @@ module VX_execute #(
VX_commit_if lsu_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_fp_if fpu_commit_if,
VX_commit_if gpu_commit_if,
output wire ebreak
@@ -72,6 +72,7 @@ module VX_execute #(
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_from_csr_if(fpu_from_csr_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if),

View File

@@ -1,4 +1,6 @@
`include "VX_define.vh"
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_fpu_unit #(
parameter CORE_ID = 0
@@ -12,7 +14,7 @@ module VX_fpu_unit #(
VX_fpu_from_csr_if fpu_from_csr_if,
// outputs
VX_commit_if fpu_commit_if,
VX_commit_fp_if fpu_commit_if,
VX_fpu_to_csr_if fpu_to_csr_if
);
localparam FOP_BITS = fpnew_pkg::OP_BITS;
@@ -30,21 +32,21 @@ module VX_fpu_unit #(
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
'{default: `LATENCY_FNONCOMP}, // NONCOMP
'{default: `LATENCY_FCONV}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_in_ready;
wire fpu_in_valid;
wire fpu_out_ready;
wire fpu_out_valid;
wire fpu_in_ready, fpu_in_valid;
wire fpu_out_ready, fpu_out_valid;
wire [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
@@ -52,15 +54,13 @@ module VX_fpu_unit #(
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
assign fpu_in_valid = (| fpu_req_if.valid);
assign fpu_operands[0] = fpu_req_if.rs1_data;
assign fpu_operands[1] = fpu_req_if.rs2_data;
assign fpu_operands[2] = fpu_req_if.rs3_data;
assign fpu_req_if.ready = fpu_in_ready;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status;
assign fpu_from_csr_if.warp_num = fpu_req_if.warp_num;
wire is_dyn_rnd = &(fpu_req_if.frm);
wire [`FRM_BITS-1:0] real_frm = is_dyn_rnd ? fpu_from_csr_if.frm : fpu_req_if.frm;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
@@ -96,10 +96,12 @@ module VX_fpu_unit #(
endcase
end
assign fpu_operands = {fpu_req_if.rs3_data, fpu_req_if.rs2_data, fpu_req_if.rs1_data};
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
.TagType (logic [`LOG2UP(`FPURQ_SIZE)-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
@@ -111,26 +113,59 @@ module VX_fpu_unit #(
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.tag_i (1'b0),
.tag_i (fpu_in_tag),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
`UNUSED_PIN (tag_o),
.tag_o (fpu_out_tag),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
assign fpu_commit_if.valid = fpu_req_if.valid & {`NUM_THREADS{fpu_out_valid}};
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.wb = fpu_req_if.wb;
assign fpu_commit_if.rd = fpu_req_if.rd;
assign fpu_out_ready = fpu_commit_if.ready;
wire req_push = fpu_req_if.valid && fpu_req_if.ready;
wire req_pop = fpu_out_valid && fpu_out_ready;
wire req_full;
wire [`NUM_THREADS-1:0] rsp_valid;
wire [`NW_BITS-1:0] rsp_warp_num;
wire [31:0] rsp_curr_PC;
wire rsp_wb;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_rd_is_fp;
VX_index_queue #(
.DATAW (`NUM_THREADS + `NW_BITS + 32 + 1 + `NR_BITS + 1),
.SIZE (`FPURQ_SIZE)
) fpu_req_queue (
.clk (clk),
.reset (reset),
.write_data ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp}),
.write_addr (fpu_in_tag),
.push (req_push),
.full (req_full),
.pop (req_pop),
.read_addr (fpu_out_tag),
.read_data ({rsp_valid, rsp_warp_num, rsp_curr_PC, rsp_wb, rsp_rd, rsp_rd_is_fp}),
`UNUSED_PIN (empty)
);
assign fpu_in_valid = (| fpu_req_if.valid) && ~req_full;
assign fpu_req_if.ready = fpu_in_ready && ~req_full;
assign fpu_commit_if.valid = rsp_valid & {`NUM_THREADS{fpu_out_valid}};
assign fpu_commit_if.warp_num = rsp_warp_num;
assign fpu_commit_if.curr_PC = rsp_curr_PC;
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.wb = rsp_wb;
assign fpu_commit_if.rd = rsp_rd;
assign fpu_commit_if.rd_is_fp = rsp_rd_is_fp;
assign fpu_out_ready = fpu_commit_if.ready;
assign fpu_to_csr_if.valid = fpu_out_valid;
assign fpu_to_csr_if.warp_num = fpu_req_if.warp_num;
assign fpu_to_csr_if.warp_num = rsp_warp_num;
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;

View File

@@ -50,7 +50,7 @@ module VX_gpr_fp_ctrl (
if (decode_if.rs1_is_fp) begin
tmp_rs1_data <= rs1_fp_data;
end else begin
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
end
end
end

View File

@@ -34,7 +34,7 @@ module VX_gpr_stage #(
// Int GPRs
VX_gpr_ram gpr_int_ram (
.clk (clk),
.we (we[i] & {`NUM_THREADS{~writeback_if.is_fp}}),
.we (we[i] & {`NUM_THREADS{~writeback_if.rd_is_fp}}),
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (raddr1),
@@ -46,7 +46,7 @@ module VX_gpr_stage #(
// FP GPRs
VX_gpr_ram gpr_fp_ram (
.clk (clk),
.we (we[i] & {`NUM_THREADS{writeback_if.is_fp}}),
.we (we[i] & {`NUM_THREADS{writeback_if.rd_is_fp}}),
.waddr (writeback_if.rd),
.wdata (writeback_if.data),
.rs1 (raddr1),

View File

@@ -23,7 +23,7 @@ module VX_icache_stage #(
wire valid_inst = (| ifetch_req_if.valid);
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
wire mrq_full;
wire mrq_push = icache_req_if.valid && icache_req_if.ready;
@@ -32,18 +32,18 @@ module VX_icache_stage #(
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
VX_index_queue #(
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
.DATAW (32 + `NW_BITS),
.SIZE (`ICREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
.write_data ({ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
.read_data ({ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
`UNUSED_PIN (empty)
);
@@ -51,9 +51,6 @@ module VX_icache_stage #(
if (mrq_push) begin
valid_threads[ifetch_req_if.warp_num] <= ifetch_req_if.valid;
end
if (mrq_pop) begin
assert(mrq_read_addr == dbg_mrq_write_addr);
end
end
// Icache Request
@@ -67,7 +64,7 @@ module VX_icache_stage #(
assign ifetch_req_if.ready = !mrq_full && icache_req_if.ready;
`ifdef DBG_CORE_REQ_INFO
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 2'b1, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
`else
assign icache_req_if.tag = mrq_write_addr;
`endif

View File

@@ -64,7 +64,7 @@ module VX_issue #(
VX_fpu_req_if fpu_req_tmp_if();
VX_gpu_req_if gpu_req_tmp_if();
VX_issue_mux issue_mux (
VX_issue_demux issue_demux (
.decode_if (decode_if),
.gpr_data_if (gpr_data_if),
.alu_req_if (alu_req_tmp_if),
@@ -134,14 +134,14 @@ module VX_issue #(
);
VX_generic_register #(
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
) fpu_reg (
.clk (clk),
.reset (reset),
.stall (stall_fpu),
.flush (flush_fpu),
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rd_is_fp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
);
VX_generic_register #(

View File

@@ -1,90 +0,0 @@
`include "VX_define.vh"
module VX_issue_mux (
// inputs
VX_decode_if decode_if,
VX_gpr_data_if gpr_data_if,
// outputs
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
VX_csr_req_if csr_req_if,
VX_mul_req_if mul_req_if,
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{decode_if.ex_type == `EX_ALU}};
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
wire[`NUM_THREADS-1:0] is_fpu = {`NUM_THREADS{decode_if.ex_type == `EX_FPU}};
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
// ALU unit
assign alu_req_if.valid = decode_if.valid & is_alu;
assign alu_req_if.warp_num = decode_if.warp_num;
assign alu_req_if.curr_PC = decode_if.curr_PC;
assign alu_req_if.alu_op = `ALU_OP(decode_if.instr_op);
assign alu_req_if.rd = decode_if.rd;
assign alu_req_if.wb = decode_if.wb;
assign alu_req_if.rs1_data = gpr_data_if.rs1_data;
assign alu_req_if.rs2_data = gpr_data_if.rs2_data;
assign alu_req_if.offset = decode_if.imm;
assign alu_req_if.next_PC = decode_if.next_PC;
// LSU unit
assign lsu_req_if.valid = decode_if.valid & is_lsu;
assign lsu_req_if.warp_num = decode_if.warp_num;
assign lsu_req_if.curr_PC = decode_if.curr_PC;
assign lsu_req_if.base_addr = gpr_data_if.rs1_data;
assign lsu_req_if.store_data = gpr_data_if.rs2_data;
assign lsu_req_if.offset = decode_if.imm;
assign lsu_req_if.rw = `LSU_RW(decode_if.instr_op);
assign lsu_req_if.byteen = `LSU_BE(decode_if.instr_op);
assign lsu_req_if.rd = decode_if.rd;
assign lsu_req_if.wb = decode_if.wb;
// CSR unit
assign csr_req_if.valid = decode_if.valid & is_csr;
assign csr_req_if.warp_num = decode_if.warp_num;
assign csr_req_if.curr_PC = decode_if.curr_PC;
assign csr_req_if.csr_op = `CSR_OP(decode_if.instr_op);
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_data_if.rs1_data[0];
assign csr_req_if.rd = decode_if.rd;
assign csr_req_if.wb = decode_if.wb;
assign csr_req_if.is_io = 1'b0;
// MUL unit
assign mul_req_if.valid = decode_if.valid & is_mul;
assign mul_req_if.warp_num = decode_if.warp_num;
assign mul_req_if.curr_PC = decode_if.curr_PC;
assign mul_req_if.mul_op = `MUL_OP(decode_if.instr_op);
assign mul_req_if.rs1_data = gpr_data_if.rs1_data;
assign mul_req_if.rs2_data = gpr_data_if.rs2_data;
assign mul_req_if.rd = decode_if.rd;
assign mul_req_if.wb = decode_if.wb;
// FPU unit
assign fpu_req_if.valid = decode_if.valid & is_fpu;
assign fpu_req_if.warp_num = decode_if.warp_num;
assign fpu_req_if.curr_PC = decode_if.curr_PC;
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.instr_op);
assign fpu_req_if.rs1_data = gpr_data_if.rs1_data;
assign fpu_req_if.rs2_data = gpr_data_if.rs2_data;
assign fpu_req_if.rs3_data = gpr_data_if.rs3_data;
assign fpu_req_if.frm = decode_if.frm;
assign fpu_req_if.rd = decode_if.rd;
assign fpu_req_if.wb = decode_if.wb;
// GPU unit
assign gpu_req_if.valid = decode_if.valid & is_gpu;
assign gpu_req_if.warp_num = decode_if.warp_num;
assign gpu_req_if.curr_PC = decode_if.curr_PC;
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.instr_op);
assign gpu_req_if.rs1_data = gpr_data_if.rs1_data;
assign gpu_req_if.rs2_data = gpr_data_if.rs2_data[0];
assign gpu_req_if.next_PC = decode_if.next_PC;
endmodule

View File

@@ -81,7 +81,7 @@ module VX_lsu_unit #(
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr;
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
@@ -97,18 +97,18 @@ module VX_lsu_unit #(
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
VX_index_queue #(
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
.DATAW (32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
.SIZE (`DCREQ_SIZE)
) mem_req_queue (
.clk (clk),
.reset (reset),
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
.write_data ({use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
.write_addr (mrq_write_addr),
.push (mrq_push),
.full (mrq_full),
.pop (mrq_pop),
.read_addr (mrq_read_addr),
.read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
.read_data ({lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
`UNUSED_PIN (empty)
);
@@ -118,7 +118,6 @@ module VX_lsu_unit #(
end
if (mrq_pop_part) begin
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd;
assert(($time < 2) || mrq_read_addr == dbg_mrq_write_addr);
end
end

View File

@@ -119,7 +119,7 @@ module VX_pipeline #(
VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if fpu_commit_if();
VX_commit_fp_if fpu_commit_if();
VX_commit_if gpu_commit_if();
VX_fetch #(

View File

@@ -20,14 +20,17 @@ module VX_scheduler #(
);
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
reg [`NUM_REGS-1:0][`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0];
reg [`NUM_REGS-1:0] busy_table [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
reg busy_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
reg [CTVW-1:0] count_valid;
wire rs1_rename = busy_table[decode_if.warp_num][decode_if.rs1];
wire rs2_rename = busy_table[decode_if.warp_num][decode_if.rs2];
wire rs3_rename = busy_table[decode_if.warp_num][decode_if.rs3];
wire rd_rename = busy_table[decode_if.warp_num][decode_if.rd];
reg [`NR_BITS:0] read_rd = {decode_if.rd_is_fp, decode_if.rd};
reg [`NR_BITS:0] write_rd = {writeback_if.rd_is_fp, writeback_if.rd};
wire rs1_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs1}];
wire rs2_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs2}];
wire rs3_rename = busy_table[decode_if.warp_num][{1'b1, decode_if.rs3}];
wire rd_rename = busy_table[decode_if.warp_num][read_rd];
wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
@@ -50,7 +53,7 @@ module VX_scheduler #(
wire release_rd = (| writeback_if.valid);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][write_rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
@@ -67,13 +70,13 @@ module VX_scheduler #(
count_valid <= 0;
end else begin
if (acquire_rd) begin
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
busy_table[decode_if.warp_num][decode_if.rd] <= 1;
rename_table[decode_if.warp_num][read_rd] <= decode_if.valid;
busy_table[decode_if.warp_num][read_rd] <= 1;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
busy_table[writeback_if.warp_num][writeback_if.rd] <= (| valid_wb_new_mask);
assert(rename_table[writeback_if.warp_num][write_rd] != 0);
rename_table[writeback_if.warp_num][write_rd] <= valid_wb_new_mask;
busy_table[writeback_if.warp_num][write_rd] <= (| valid_wb_new_mask);
end
count_valid <= count_valid_next;
end

View File

@@ -145,7 +145,7 @@
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
wire [31:0] scope_writeback_curr_PC; \
wire [`WB_BITS-1:0] scope_writeback_wb; \
wire scope_writeback_wb; \
wire [`NR_BITS-1:0] scope_writeback_rd; \
wire [63:0] scope_writeback_data; \
wire scope_bank_valid_st0; \
@@ -224,7 +224,7 @@
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
output wire [31:0] scope_writeback_curr_PC, \
output wire [`WB_BITS-1:0] scope_writeback_wb, \
output wire scope_writeback_wb, \
output wire [`NR_BITS-1:0] scope_writeback_rd, \
output wire [63:0] scope_writeback_data,

View File

@@ -10,7 +10,7 @@ module VX_writeback #(
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_fp_if fpu_commit_if,
VX_commit_if csr_commit_if,
// outputs
@@ -26,30 +26,39 @@ module VX_writeback #(
VX_wb_if writeback_tmp_if();
assign writeback_tmp_if.valid = lsu_valid ? lsu_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
mul_valid ? mul_commit_if.valid :
alu_valid ? alu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
0;
assign writeback_tmp_if.warp_num = lsu_valid ? lsu_commit_if.warp_num :
fpu_valid ? fpu_commit_if.warp_num :
mul_valid ? mul_commit_if.warp_num :
alu_valid ? alu_commit_if.warp_num :
csr_valid ? csr_commit_if.warp_num :
0;
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
mul_valid ? mul_commit_if.data :
alu_valid ? alu_commit_if.data :
csr_valid ? csr_commit_if.data :
0;
assign writeback_tmp_if.rd = lsu_valid ? lsu_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
mul_valid ? mul_commit_if.rd :
alu_valid ? alu_commit_if.rd :
csr_valid ? csr_commit_if.rd :
0;
assign writeback_tmp_if.is_fp = fpu_valid && fpu_commit_if.ready;
assign writeback_tmp_if.rd_is_fp = lsu_valid ? 0 :
fpu_valid ? fpu_commit_if.rd_is_fp :
mul_valid ? 0 :
alu_valid ? 0 :
csr_valid ? 0 :
0;
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
fpu_valid ? fpu_commit_if.data :
mul_valid ? mul_commit_if.data :
alu_valid ? alu_commit_if.data :
csr_valid ? csr_commit_if.data :
0;
wire stall = ~writeback_if.ready && (| writeback_if.valid);
@@ -60,8 +69,8 @@ module VX_writeback #(
.reset (reset),
.stall (stall),
.flush (0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.is_fp}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data, writeback_if.is_fp})
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
);
assign lsu_commit_if.ready = !stall;

View File

@@ -106,7 +106,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_use_pc_st0;
wire[`WB_BITS-1:0] debug_wb_st0;
wire debug_wb_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire debug_rw_st0;
@@ -115,7 +115,7 @@ module VX_bank #(
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
wire[31:0] debug_use_pc_st1e;
wire[`WB_BITS-1:0] debug_wb_st1e;
wire debug_wb_st1e;
wire[`NR_BITS-1:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire debug_rw_st1e;
@@ -124,7 +124,7 @@ module VX_bank #(
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
wire[31:0] debug_use_pc_st2;
wire[`WB_BITS-1:0] debug_wb_st2;
wire debug_wb_st2;
wire[`NR_BITS-1:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire debug_rw_st2;

View File

@@ -130,7 +130,7 @@ module VX_cache #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_core_req_use_pc;
wire[`WB_BITS-1:0] debug_core_req_wb;
wire debug_core_req_wb;
wire[`NR_BITS-1:0] debug_core_req_rd;
wire[`NW_BITS-1:0] debug_core_req_warp_num;
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;

View File

@@ -65,13 +65,13 @@ module VX_snp_forwarder #(
) snp_fwd_queue (
.clk (clk),
.reset (reset),
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.write_addr (sfq_write_addr),
.push (sfq_push),
.pop (sfq_pop),
.full (sfq_full),
.read_addr (sfq_read_addr),
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
`UNUSED_PIN (empty)
);
@@ -81,7 +81,6 @@ module VX_snp_forwarder #(
end
if (fwdin_fire) begin
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
assert(sfq_read_addr == dbg_sfq_write_addr);
end
end

View File

@@ -27,6 +27,7 @@ interface VX_decode_if ();
// FP states
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire rd_is_fp;
wire rs1_is_fp;
wire rs2_is_fp;
wire [`FRM_BITS-1:0] frm;

View File

@@ -5,11 +5,8 @@
interface VX_fpu_from_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire [`NUM_WARPS-1:0][`FRM_BITS-1:0] frm;
`IGNORE_WARNINGS_END
wire [`NW_BITS-1:0] warp_num;
wire [`FRM_BITS-1:0] frm;
endinterface

View File

@@ -14,6 +14,7 @@ interface VX_fpu_req_if ();
wire wb;
wire [`NR_BITS-1:0] rd;
wire rd_is_fp;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;

View File

@@ -5,7 +5,6 @@
interface VX_fpu_to_csr_if ();
`IGNORE_WARNINGS_BEGIN
wire valid;
wire [`NW_BITS-1:0] warp_num;
@@ -16,8 +15,6 @@ interface VX_fpu_to_csr_if ();
wire fflags_UF;
wire fflags_NX;
`IGNORE_WARNINGS_END
endinterface
`endif

View File

@@ -8,8 +8,8 @@ interface VX_wb_if ();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
wire [`NR_BITS-1:0] rd;
wire rd_is_fp;
wire [`NUM_THREADS-1:0][31:0] data;
wire is_fp;
wire ready;
endinterface

View File

@@ -17,7 +17,9 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate
INCLUDE += -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
SRCS = simulator.cpp testbench.cpp
@@ -29,6 +31,8 @@ VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -Wno-DECLFILENAME
VF += --x-initial unique --x-assign unique
VF += -exe $(SRCS) $(INCLUDE)
VF += -cc Vortex.v -top-module Vortex
VF += verilator.vlt
DBG += -DVCD_OUTPUT $(DBG_FLAGS)
DBG += -DDBG_CORE_REQ_INFO
@@ -36,22 +40,22 @@ DBG += -DDBG_CORE_REQ_INFO
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
gen-s:
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
gen-sd:
verilator $(VF) -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
verilator $(VF) $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
gen-st:
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
gen-m:
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md:
verilator $(VF) -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
gen-mt:
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
build-s: gen-s
(cd obj_dir && make -j -f VVortex.mk)