new fpu implementation
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
`include "VX_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
|
||||
@@ -11,7 +11,7 @@ module VX_commit #(
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_fp_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
|
||||
@@ -63,6 +63,10 @@
|
||||
`define EXT_M_ENABLE 1
|
||||
`endif
|
||||
|
||||
`ifndef EXT_F_ENABLE
|
||||
`define EXT_F_ENABLE 1
|
||||
`endif
|
||||
|
||||
// Configuration Values =======================================================
|
||||
|
||||
`define VENDOR_ID 0
|
||||
@@ -71,6 +75,10 @@
|
||||
|
||||
// CSR Addresses ==============================================================
|
||||
|
||||
`define CSR_FFLAGS 12'hF01
|
||||
`define CSR_FRM 12'hF02
|
||||
`define CSR_FCSR 12'hF03
|
||||
|
||||
`define CSR_VEND_ID 12'hF11
|
||||
`define CSR_ARCH_ID 12'hF12
|
||||
`define CSR_IMPL_ID 12'hF13
|
||||
@@ -91,6 +99,16 @@
|
||||
|
||||
`define CSR_MISA 12'h301
|
||||
|
||||
// Size of MUL Request Queue Size
|
||||
`ifndef MULRQ_SIZE
|
||||
`define MULRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of FPU Request Queue Size
|
||||
`ifndef FPURQ_SIZE
|
||||
`define FPURQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
@@ -407,5 +425,4 @@
|
||||
`define L3PRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// VX_CONFIG
|
||||
`endif
|
||||
|
||||
@@ -4,6 +4,13 @@ module VX_csr_data #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
VX_fpu_from_csr_if fpu_from_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
|
||||
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
||||
output reg[31:0] read_data,
|
||||
@@ -12,25 +19,67 @@ module VX_csr_data #(
|
||||
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
|
||||
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
VX_perf_cntrs_if perf_cntrs_if
|
||||
input wire[`CSR_WIDTH-1:0] write_data
|
||||
);
|
||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
||||
|
||||
reg [`FFG_BITS+`FRM_BITS-1:0] fflags_table [`NUM_WARPS-1:0];
|
||||
reg [`FRM_BITS-1:0] frm_table [`NUM_WARPS-1:0];
|
||||
reg [`FFG_BITS+`FRM_BITS-1:0] fcsr_table [`NUM_WARPS-1:0]; // fflags + frm
|
||||
|
||||
// cast address to physical CSR range
|
||||
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
||||
assign rd_addr = $size(rd_addr)'(read_addr);
|
||||
assign wr_addr = $size(wr_addr)'(write_addr);
|
||||
|
||||
wire [`FFG_BITS-1:0] fflags_update;
|
||||
assign fflags_update[4] = fpu_to_csr_if.fflags_NV;
|
||||
assign fflags_update[3] = fpu_to_csr_if.fflags_DZ;
|
||||
assign fflags_update[2] = fpu_to_csr_if.fflags_OF;
|
||||
assign fflags_update[1] = fpu_to_csr_if.fflags_UF;
|
||||
assign fflags_update[0] = fpu_to_csr_if.fflags_NX;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
fflags_table[i] <= 0;
|
||||
frm_table[i] <= 0;
|
||||
fcsr_table[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`CSR_FFLAGS: begin
|
||||
fcsr_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
`CSR_FRM: begin
|
||||
fcsr_table[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||
frm_table[warp_num] <= write_data[`FRM_BITS-1:0];
|
||||
end
|
||||
`CSR_FCSR: begin
|
||||
fcsr_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||
frm_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||
end
|
||||
default: begin
|
||||
csr_table[wr_addr] <= write_data;
|
||||
end
|
||||
endcase
|
||||
end else if (fpu_to_csr_if.valid) begin
|
||||
fflags_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
|
||||
fcsr_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data = 32'(fflags_table[warp_num]);
|
||||
`CSR_FRM : read_data = 32'(frm_table[warp_num]);
|
||||
`CSR_FCSR : read_data = 32'(fcsr_table[warp_num]);
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_GTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
@@ -50,4 +99,6 @@ module VX_csr_data #(
|
||||
endcase
|
||||
end
|
||||
|
||||
assign fpu_from_csr_if.frm = frm_table[fpu_from_csr_if.warp_num];
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -7,6 +7,8 @@ module VX_csr_unit #(
|
||||
input wire reset,
|
||||
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
|
||||
VX_fpu_from_csr_if fpu_from_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
@@ -48,13 +50,16 @@ module VX_csr_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_from_csr_if(fpu_from_csr_if),
|
||||
.read_addr (csr_pipe_req_if.csr_addr),
|
||||
.read_data (csr_read_data_unqual),
|
||||
.write_enable (is_csr_s2),
|
||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_addr_s2),
|
||||
.warp_num (csr_pipe_req_if.warp_num),
|
||||
.perf_cntrs_if (perf_cntrs_if)
|
||||
.warp_num (csr_pipe_req_if.warp_num)
|
||||
);
|
||||
|
||||
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
|
||||
|
||||
@@ -214,14 +214,14 @@ module VX_decode #(
|
||||
7'h04: fpu_op = `FPU_SUB;
|
||||
7'h08: fpu_op = `FPU_MUL;
|
||||
7'h0C: fpu_op = `FPU_DIV;
|
||||
7'h2C: fpu_op = `FPU_SQRT;
|
||||
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
|
||||
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
|
||||
7'h2C: fpu_op = `FPU_SQRT;
|
||||
7'h50: fpu_op = `FPU_CMP; // wb to intReg
|
||||
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
|
||||
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
|
||||
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
|
||||
7'h78: fpu_op = `FPU_MVWX;
|
||||
7'h50: fpu_op = `FPU_CMP; // wb to intReg
|
||||
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
@@ -284,14 +284,14 @@ module VX_decode #(
|
||||
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
|
||||
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
|
||||
|
||||
assign decode_tmp_if.rs1_is_fp = (is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4);
|
||||
assign decode_tmp_if.rd_is_fp = is_fpu && ~(is_fci && ((func7 == 7'h50) || (func7 == 7'h60) || (func7 == 7'h70)));
|
||||
assign decode_tmp_if.rs1_is_fp = is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4;
|
||||
assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
|
||||
assign decode_tmp_if.rs3 = rs3;
|
||||
assign decode_tmp_if.use_rs3 = is_fr4;
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign decode_tmp_if.wb = (is_fpu && (is_fl || (is_fci && ((func7 != 7'h50) || (func7 != 7'h70) || (func7 != 7'h60))) || is_fr4))
|
||||
|| (~is_fpu && (rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
assign decode_tmp_if.wb = is_fpu || ((rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
@@ -25,10 +25,14 @@
|
||||
|
||||
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
|
||||
/* verilator lint_off PINCONNECTEMPTY */ \
|
||||
/* verilator lint_off WIDTH */ \
|
||||
/* verilator lint_off UNOPTFLAT */ \
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
|
||||
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
|
||||
/* verilator lint_on PINCONNECTEMPTY */ \
|
||||
/* verilator lint_on WIDTH */ \
|
||||
/* verilator lint_on UNOPTFLAT */ \
|
||||
/* verilator lint_on DECLFILENAME */
|
||||
|
||||
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
|
||||
@@ -76,9 +80,14 @@
|
||||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define DIV_LATENCY 21
|
||||
`define LATENCY_IDIV 21
|
||||
|
||||
`define MUL_LATENCY 2
|
||||
`define LATENCY_IMUL 2
|
||||
|
||||
`define LATENCY_FMULADD 2
|
||||
`define LATENCY_FDIVSQRT 2
|
||||
`define LATENCY_FCONV 2
|
||||
`define LATENCY_FNONCOMP 1
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -93,6 +102,15 @@
|
||||
`define INST_R 7'b0110011
|
||||
`define INST_F 7'b0001111
|
||||
`define INST_SYS 7'b1110011
|
||||
|
||||
`define INST_FL 7'b0000111
|
||||
`define INST_FS 7'b0100111
|
||||
`define INST_FCI 7'b1010011
|
||||
`define INST_FMADD 7'b1000011
|
||||
`define INST_FMSUB 7'b1000111
|
||||
`define INST_FNMSUB 7'b1001011
|
||||
`define INST_FNMADD 7'b1001111
|
||||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
`define BYTEEN_SB 3'h0
|
||||
@@ -150,18 +168,6 @@
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define IS_BR_OP(x) x[4]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
`define LSU_LW {1'b0, `BYTEEN_SW}
|
||||
@@ -183,6 +189,53 @@
|
||||
`define CSR_BITS 2
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define MUL_MUL 3'h0
|
||||
`define MUL_MULH 3'h1
|
||||
`define MUL_MULHSU 3'h2
|
||||
`define MUL_MULHU 3'h3
|
||||
`define MUL_DIV 3'h4
|
||||
`define MUL_DIVU 3'h5
|
||||
`define MUL_REM 3'h6
|
||||
`define MUL_REMU 3'h7
|
||||
`define MUL_BITS 3
|
||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
|
||||
`define FPU_ADD 5'h00
|
||||
`define FPU_SUB 5'h01
|
||||
`define FPU_MUL 5'h02
|
||||
`define FPU_DIV 5'h03
|
||||
`define FPU_SQRT 5'h04
|
||||
`define FPU_MADD 5'h05
|
||||
`define FPU_MSUB 5'h06
|
||||
`define FPU_NMSUB 5'h07
|
||||
`define FPU_NMADD 5'h08
|
||||
`define FPU_SGNJ 5'h09 // FSGNJ
|
||||
`define FPU_SGNJN 5'h0A // FSGNJN
|
||||
`define FPU_SGNJX 5'h0B // FSGNJX
|
||||
`define FPU_MIN 5'h0C // FMIN.S
|
||||
`define FPU_MAX 5'h0D // FMAX.S
|
||||
`define FPU_CVTWS 5'h0E // FCVT.W.S
|
||||
`define FPU_CVTWUS 5'h0F // FCVT.WU.S
|
||||
`define FPU_CVTSW 5'h10 // FCVT.S.W
|
||||
`define FPU_CVTSWU 5'h11 // FCVT.S.WU
|
||||
`define FPU_MVXW 5'h12 // MOV FP from fpReg to integer reg
|
||||
`define FPU_MVWX 5'h13 // MOV FP from integer reg to fpReg
|
||||
`define FPU_CLASS 5'h14
|
||||
`define FPU_CMP 5'h15
|
||||
`define FPU_OTHER 5'h1f
|
||||
`define FPU_BITS 5
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
|
||||
`define FRM_RNE 3'b000
|
||||
`define FRM_RTZ 3'b001
|
||||
`define FRM_RDN 3'b010
|
||||
`define FRM_RUP 3'b011 // positive inf
|
||||
`define FRM_RMM 3'b100
|
||||
`define FRM_DYN 3'b111
|
||||
`define FRM_BITS 3
|
||||
`define FFG_BITS 5
|
||||
|
||||
`define GPU_TMC 3'h0
|
||||
`define GPU_WSPAWN 3'h1
|
||||
`define GPU_SPLIT 3'h2
|
||||
@@ -194,21 +247,16 @@
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_MUL 3'h2
|
||||
`define EX_LSU 3'h3
|
||||
`define EX_CSR 3'h4
|
||||
`define EX_GPU 3'h5
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 5
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
`define WB_NO 2'h0
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define WB_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
||||
@@ -216,14 +264,14 @@
|
||||
| (0 << 2) // C - Compressed extension \
|
||||
| (0 << 3) // D - Double precsision floating-point extension \
|
||||
| (0 << 4) // E - RV32E base ISA \
|
||||
| (0 << 5) // F - Single precsision floating-point extension \
|
||||
| (`EXT_F_ENABLE << 5) // F - Single precsision floating-point extension \
|
||||
| (0 << 6) // G - Additional standard extensions present \
|
||||
| (0 << 7) // H - Hypervisor mode implemented \
|
||||
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
||||
| (0 << 9) // J - Reserved \
|
||||
| (0 << 10) // K - Reserved \
|
||||
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
||||
| (1 << 12) // M - Integer Multiply/Divide extension \
|
||||
| (`EXT_M_ENABLE << 12) // M - Integer Multiply/Divide extension \
|
||||
| (0 << 13) // N - User level interrupts supported \
|
||||
| (0 << 14) // O - Reserved \
|
||||
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
||||
@@ -241,7 +289,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + `WB_BITS + `NR_BITS + `NW_BITS)
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
`endif
|
||||
@@ -492,16 +540,4 @@ task print_instr_op;
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_wb;
|
||||
input [`WB_BITS-1:0] wb;
|
||||
begin
|
||||
case (wb)
|
||||
`WB_ALU: $write("ALU");
|
||||
`WB_MEM: $write("MEM");
|
||||
`WB_JAL: $write("JAL");
|
||||
default: $write("NO");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
@@ -35,7 +35,7 @@ module VX_execute #(
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_fp_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
output wire ebreak
|
||||
@@ -72,6 +72,7 @@ module VX_execute #(
|
||||
.reset (reset),
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_from_csr_if(fpu_from_csr_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
`include "VX_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
@@ -12,7 +14,7 @@ module VX_fpu_unit #(
|
||||
VX_fpu_from_csr_if fpu_from_csr_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_fp_if fpu_commit_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if
|
||||
);
|
||||
localparam FOP_BITS = fpnew_pkg::OP_BITS;
|
||||
@@ -41,10 +43,10 @@ module VX_fpu_unit #(
|
||||
PipeConfig: fpnew_pkg::DISTRIBUTED
|
||||
};
|
||||
|
||||
wire fpu_in_ready;
|
||||
wire fpu_in_valid;
|
||||
wire fpu_out_ready;
|
||||
wire fpu_out_valid;
|
||||
wire fpu_in_ready, fpu_in_valid;
|
||||
wire fpu_out_ready, fpu_out_valid;
|
||||
|
||||
wire [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
|
||||
|
||||
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
|
||||
@@ -52,15 +54,13 @@ module VX_fpu_unit #(
|
||||
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||
|
||||
assign fpu_in_valid = (| fpu_req_if.valid);
|
||||
assign fpu_operands[0] = fpu_req_if.rs1_data;
|
||||
assign fpu_operands[1] = fpu_req_if.rs2_data;
|
||||
assign fpu_operands[2] = fpu_req_if.rs3_data;
|
||||
assign fpu_req_if.ready = fpu_in_ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status;
|
||||
|
||||
assign fpu_from_csr_if.warp_num = fpu_req_if.warp_num;
|
||||
wire is_dyn_rnd = &(fpu_req_if.frm);
|
||||
wire [`FRM_BITS-1:0] real_frm = is_dyn_rnd ? fpu_from_csr_if.frm : fpu_req_if.frm;
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
@@ -96,10 +96,12 @@ module VX_fpu_unit #(
|
||||
endcase
|
||||
end
|
||||
|
||||
assign fpu_operands = {fpu_req_if.rs3_data, fpu_req_if.rs2_data, fpu_req_if.rs1_data};
|
||||
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic)
|
||||
.TagType (logic [`LOG2UP(`FPURQ_SIZE)-1:0])
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (1'b1),
|
||||
@@ -111,26 +113,59 @@ module VX_fpu_unit #(
|
||||
.dst_fmt_i (fpu_dst_fmt),
|
||||
.int_fmt_i (fpu_int_fmt),
|
||||
.vectorial_op_i (1'b1),
|
||||
.tag_i (1'b0),
|
||||
.tag_i (fpu_in_tag),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_ready_o (fpu_in_ready),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result),
|
||||
.status_o (fpu_status),
|
||||
`UNUSED_PIN (tag_o),
|
||||
.tag_o (fpu_out_tag),
|
||||
.out_valid_o (fpu_out_valid),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
|
||||
assign fpu_commit_if.valid = fpu_req_if.valid & {`NUM_THREADS{fpu_out_valid}};
|
||||
wire req_push = fpu_req_if.valid && fpu_req_if.ready;
|
||||
wire req_pop = fpu_out_valid && fpu_out_ready;
|
||||
wire req_full;
|
||||
|
||||
wire [`NUM_THREADS-1:0] rsp_valid;
|
||||
wire [`NW_BITS-1:0] rsp_warp_num;
|
||||
wire [31:0] rsp_curr_PC;
|
||||
wire rsp_wb;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_rd_is_fp;
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`NUM_THREADS + `NW_BITS + 32 + 1 + `NR_BITS + 1),
|
||||
.SIZE (`FPURQ_SIZE)
|
||||
) fpu_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp}),
|
||||
.write_addr (fpu_in_tag),
|
||||
.push (req_push),
|
||||
.full (req_full),
|
||||
.pop (req_pop),
|
||||
.read_addr (fpu_out_tag),
|
||||
.read_data ({rsp_valid, rsp_warp_num, rsp_curr_PC, rsp_wb, rsp_rd, rsp_rd_is_fp}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
assign fpu_in_valid = (| fpu_req_if.valid) && ~req_full;
|
||||
assign fpu_req_if.ready = fpu_in_ready && ~req_full;
|
||||
|
||||
assign fpu_commit_if.valid = rsp_valid & {`NUM_THREADS{fpu_out_valid}};
|
||||
assign fpu_commit_if.warp_num = rsp_warp_num;
|
||||
assign fpu_commit_if.curr_PC = rsp_curr_PC;
|
||||
assign fpu_commit_if.data = fpu_result;
|
||||
assign fpu_commit_if.wb = fpu_req_if.wb;
|
||||
assign fpu_commit_if.rd = fpu_req_if.rd;
|
||||
assign fpu_commit_if.wb = rsp_wb;
|
||||
assign fpu_commit_if.rd = rsp_rd;
|
||||
assign fpu_commit_if.rd_is_fp = rsp_rd_is_fp;
|
||||
assign fpu_out_ready = fpu_commit_if.ready;
|
||||
|
||||
assign fpu_to_csr_if.valid = fpu_out_valid;
|
||||
assign fpu_to_csr_if.warp_num = fpu_req_if.warp_num;
|
||||
assign fpu_to_csr_if.warp_num = rsp_warp_num;
|
||||
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
|
||||
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
|
||||
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
|
||||
|
||||
@@ -34,7 +34,7 @@ module VX_gpr_stage #(
|
||||
// Int GPRs
|
||||
VX_gpr_ram gpr_int_ram (
|
||||
.clk (clk),
|
||||
.we (we[i] & {`NUM_THREADS{~writeback_if.is_fp}}),
|
||||
.we (we[i] & {`NUM_THREADS{~writeback_if.rd_is_fp}}),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
@@ -46,7 +46,7 @@ module VX_gpr_stage #(
|
||||
// FP GPRs
|
||||
VX_gpr_ram gpr_fp_ram (
|
||||
.clk (clk),
|
||||
.we (we[i] & {`NUM_THREADS{writeback_if.is_fp}}),
|
||||
.we (we[i] & {`NUM_THREADS{writeback_if.rd_is_fp}}),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
|
||||
@@ -23,7 +23,7 @@ module VX_icache_stage #(
|
||||
|
||||
wire valid_inst = (| ifetch_req_if.valid);
|
||||
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr, dbg_mrq_write_addr;
|
||||
wire [`LOG2UP(`ICREQ_SIZE)-1:0] mrq_write_addr, mrq_read_addr;
|
||||
wire mrq_full;
|
||||
|
||||
wire mrq_push = icache_req_if.valid && icache_req_if.ready;
|
||||
@@ -32,18 +32,18 @@ module VX_icache_stage #(
|
||||
assign mrq_read_addr = icache_rsp_if.tag[0][`LOG2UP(`ICREQ_SIZE)-1:0];
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`ICREQ_SIZE) + 32 + `NW_BITS),
|
||||
.DATAW (32 + `NW_BITS),
|
||||
.SIZE (`ICREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
|
||||
.write_data ({ifetch_req_if.curr_PC, ifetch_req_if.warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
|
||||
.read_data ({ifetch_rsp_if.curr_PC, ifetch_rsp_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
@@ -51,9 +51,6 @@ module VX_icache_stage #(
|
||||
if (mrq_push) begin
|
||||
valid_threads[ifetch_req_if.warp_num] <= ifetch_req_if.valid;
|
||||
end
|
||||
if (mrq_pop) begin
|
||||
assert(mrq_read_addr == dbg_mrq_write_addr);
|
||||
end
|
||||
end
|
||||
|
||||
// Icache Request
|
||||
@@ -67,7 +64,7 @@ module VX_icache_stage #(
|
||||
assign ifetch_req_if.ready = !mrq_full && icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 2'b1, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, mrq_write_addr};
|
||||
`else
|
||||
assign icache_req_if.tag = mrq_write_addr;
|
||||
`endif
|
||||
|
||||
@@ -64,7 +64,7 @@ module VX_issue #(
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_issue_mux issue_mux (
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_if),
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
@@ -134,14 +134,14 @@ module VX_issue #(
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_fpu),
|
||||
.flush (flush_fpu),
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rd_is_fp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rd_is_fp, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_mux (
|
||||
// inputs
|
||||
VX_decode_if decode_if,
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire[`NUM_THREADS-1:0] is_alu = {`NUM_THREADS{decode_if.ex_type == `EX_ALU}};
|
||||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_fpu = {`NUM_THREADS{decode_if.ex_type == `EX_FPU}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = decode_if.valid & is_alu;
|
||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.instr_op);
|
||||
assign alu_req_if.rd = decode_if.rd;
|
||||
assign alu_req_if.wb = decode_if.wb;
|
||||
assign alu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign alu_req_if.offset = decode_if.imm;
|
||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = decode_if.valid & is_lsu;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.base_addr = gpr_data_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_data_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.instr_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.instr_op);
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = decode_if.valid & is_csr;
|
||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.instr_op);
|
||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
|
||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_data_if.rs1_data[0];
|
||||
assign csr_req_if.rd = decode_if.rd;
|
||||
assign csr_req_if.wb = decode_if.wb;
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
assign mul_req_if.valid = decode_if.valid & is_mul;
|
||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.instr_op);
|
||||
assign mul_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign mul_req_if.rd = decode_if.rd;
|
||||
assign mul_req_if.wb = decode_if.wb;
|
||||
|
||||
// FPU unit
|
||||
assign fpu_req_if.valid = decode_if.valid & is_fpu;
|
||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign fpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.instr_op);
|
||||
assign fpu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = gpr_data_if.rs3_data;
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
assign fpu_req_if.rd = decode_if.rd;
|
||||
assign fpu_req_if.wb = decode_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.instr_op);
|
||||
assign gpu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_data_if.rs2_data[0];
|
||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
||||
|
||||
endmodule
|
||||
@@ -81,7 +81,7 @@ module VX_lsu_unit #(
|
||||
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`DCREQ_SIZE-1:0];
|
||||
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
|
||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr;
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
||||
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
||||
|
||||
@@ -97,18 +97,18 @@ module VX_lsu_unit #(
|
||||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.DATAW (32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({mrq_write_addr, use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
|
||||
.write_data ({use_pc, use_wb, use_req_offset, mem_byteen, use_rd, use_warp_num}),
|
||||
.write_addr (mrq_write_addr),
|
||||
.push (mrq_push),
|
||||
.full (mrq_full),
|
||||
.pop (mrq_pop),
|
||||
.read_addr (mrq_read_addr),
|
||||
.read_data ({dbg_mrq_write_addr, lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
|
||||
.read_data ({lsu_commit_if.curr_PC, lsu_commit_if.wb, mem_rsp_offset, core_rsp_mem_read, lsu_commit_if.rd, lsu_commit_if.warp_num}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
@@ -118,7 +118,6 @@ module VX_lsu_unit #(
|
||||
end
|
||||
if (mrq_pop_part) begin
|
||||
mem_rsp_mask[mrq_read_addr] <= mem_rsp_mask_upd;
|
||||
assert(($time < 2) || mrq_read_addr == dbg_mrq_write_addr);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -119,7 +119,7 @@ module VX_pipeline #(
|
||||
VX_commit_if lsu_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
VX_commit_if mul_commit_if();
|
||||
VX_commit_if fpu_commit_if();
|
||||
VX_commit_fp_if fpu_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
VX_fetch #(
|
||||
|
||||
@@ -20,14 +20,17 @@ module VX_scheduler #(
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
|
||||
reg [`NUM_REGS-1:0][`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0];
|
||||
reg [`NUM_REGS-1:0] busy_table [`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
|
||||
reg busy_table [`NUM_WARPS-1:0][(`NUM_REGS*2)-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
|
||||
wire rs1_rename = busy_table[decode_if.warp_num][decode_if.rs1];
|
||||
wire rs2_rename = busy_table[decode_if.warp_num][decode_if.rs2];
|
||||
wire rs3_rename = busy_table[decode_if.warp_num][decode_if.rs3];
|
||||
wire rd_rename = busy_table[decode_if.warp_num][decode_if.rd];
|
||||
reg [`NR_BITS:0] read_rd = {decode_if.rd_is_fp, decode_if.rd};
|
||||
reg [`NR_BITS:0] write_rd = {writeback_if.rd_is_fp, writeback_if.rd};
|
||||
|
||||
wire rs1_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs1}];
|
||||
wire rs2_rename = busy_table[decode_if.warp_num][{decode_if.rs1_is_fp, decode_if.rs2}];
|
||||
wire rs3_rename = busy_table[decode_if.warp_num][{1'b1, decode_if.rs3}];
|
||||
wire rd_rename = busy_table[decode_if.warp_num][read_rd];
|
||||
|
||||
wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
|
||||
wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
|
||||
@@ -50,7 +53,7 @@ module VX_scheduler #(
|
||||
|
||||
wire release_rd = (| writeback_if.valid);
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][write_rd] & ~writeback_if.valid;
|
||||
|
||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && !(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||
@@ -67,13 +70,13 @@ module VX_scheduler #(
|
||||
count_valid <= 0;
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
rename_table[decode_if.warp_num][decode_if.rd] <= decode_if.valid;
|
||||
busy_table[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
rename_table[decode_if.warp_num][read_rd] <= decode_if.valid;
|
||||
busy_table[decode_if.warp_num][read_rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
busy_table[writeback_if.warp_num][writeback_if.rd] <= (| valid_wb_new_mask);
|
||||
assert(rename_table[writeback_if.warp_num][write_rd] != 0);
|
||||
rename_table[writeback_if.warp_num][write_rd] <= valid_wb_new_mask;
|
||||
busy_table[writeback_if.warp_num][write_rd] <= (| valid_wb_new_mask);
|
||||
end
|
||||
count_valid <= count_valid_next;
|
||||
end
|
||||
|
||||
@@ -145,7 +145,7 @@
|
||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
||||
wire [31:0] scope_writeback_curr_PC; \
|
||||
wire [`WB_BITS-1:0] scope_writeback_wb; \
|
||||
wire scope_writeback_wb; \
|
||||
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
||||
wire [63:0] scope_writeback_data; \
|
||||
wire scope_bank_valid_st0; \
|
||||
@@ -224,7 +224,7 @@
|
||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
||||
output wire [31:0] scope_writeback_curr_PC, \
|
||||
output wire [`WB_BITS-1:0] scope_writeback_wb, \
|
||||
output wire scope_writeback_wb, \
|
||||
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
||||
output wire [63:0] scope_writeback_data,
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ module VX_writeback #(
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_fp_if fpu_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
// outputs
|
||||
@@ -26,30 +26,39 @@ module VX_writeback #(
|
||||
VX_wb_if writeback_tmp_if();
|
||||
|
||||
assign writeback_tmp_if.valid = lsu_valid ? lsu_commit_if.valid :
|
||||
fpu_valid ? fpu_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
alu_valid ? alu_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.warp_num = lsu_valid ? lsu_commit_if.warp_num :
|
||||
fpu_valid ? fpu_commit_if.warp_num :
|
||||
mul_valid ? mul_commit_if.warp_num :
|
||||
alu_valid ? alu_commit_if.warp_num :
|
||||
csr_valid ? csr_commit_if.warp_num :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
alu_valid ? alu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.rd = lsu_valid ? lsu_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
alu_valid ? alu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.is_fp = fpu_valid && fpu_commit_if.ready;
|
||||
assign writeback_tmp_if.rd_is_fp = lsu_valid ? 0 :
|
||||
fpu_valid ? fpu_commit_if.rd_is_fp :
|
||||
mul_valid ? 0 :
|
||||
alu_valid ? 0 :
|
||||
csr_valid ? 0 :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.data = lsu_valid ? lsu_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
alu_valid ? alu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
0;
|
||||
|
||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
||||
|
||||
@@ -60,8 +69,8 @@ module VX_writeback #(
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.is_fp}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data, writeback_if.is_fp})
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
|
||||
);
|
||||
|
||||
assign lsu_commit_if.ready = !stall;
|
||||
|
||||
6
hw/rtl/cache/VX_bank.v
vendored
6
hw/rtl/cache/VX_bank.v
vendored
@@ -106,7 +106,7 @@ module VX_bank #(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_use_pc_st0;
|
||||
wire[`WB_BITS-1:0] debug_wb_st0;
|
||||
wire debug_wb_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
||||
wire debug_rw_st0;
|
||||
@@ -115,7 +115,7 @@ module VX_bank #(
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||
|
||||
wire[31:0] debug_use_pc_st1e;
|
||||
wire[`WB_BITS-1:0] debug_wb_st1e;
|
||||
wire debug_wb_st1e;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
||||
wire debug_rw_st1e;
|
||||
@@ -124,7 +124,7 @@ module VX_bank #(
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
||||
|
||||
wire[31:0] debug_use_pc_st2;
|
||||
wire[`WB_BITS-1:0] debug_wb_st2;
|
||||
wire debug_wb_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
||||
wire debug_rw_st2;
|
||||
|
||||
2
hw/rtl/cache/VX_cache.v
vendored
2
hw/rtl/cache/VX_cache.v
vendored
@@ -130,7 +130,7 @@ module VX_cache #(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire[`WB_BITS-1:0] debug_core_req_wb;
|
||||
wire debug_core_req_wb;
|
||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
||||
wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx;
|
||||
|
||||
5
hw/rtl/cache/VX_snp_forwarder.v
vendored
5
hw/rtl/cache/VX_snp_forwarder.v
vendored
@@ -65,13 +65,13 @@ module VX_snp_forwarder #(
|
||||
) snp_fwd_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({sfq_write_addr, snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.write_addr (sfq_write_addr),
|
||||
.push (sfq_push),
|
||||
.pop (sfq_pop),
|
||||
.full (sfq_full),
|
||||
.read_addr (sfq_read_addr),
|
||||
.read_data ({dbg_sfq_write_addr, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
|
||||
@@ -81,7 +81,6 @@ module VX_snp_forwarder #(
|
||||
end
|
||||
if (fwdin_fire) begin
|
||||
pending_cntrs[sfq_read_addr] <= pending_cntrs[sfq_read_addr] - 1;
|
||||
assert(sfq_read_addr == dbg_sfq_write_addr);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ interface VX_decode_if ();
|
||||
// FP states
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire use_rs3;
|
||||
wire rd_is_fp;
|
||||
wire rs1_is_fp;
|
||||
wire rs2_is_fp;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
@@ -5,11 +5,8 @@
|
||||
|
||||
interface VX_fpu_from_csr_if ();
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
||||
wire [`NUM_WARPS-1:0][`FRM_BITS-1:0] frm;
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ interface VX_fpu_req_if ();
|
||||
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire rd_is_fp;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
interface VX_fpu_to_csr_if ();
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
@@ -16,8 +15,6 @@ interface VX_fpu_to_csr_if ();
|
||||
wire fflags_UF;
|
||||
wire fflags_NX;
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
@@ -8,8 +8,8 @@ interface VX_wb_if ();
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire rd_is_fp;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire is_fp;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -17,7 +17,9 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate
|
||||
|
||||
INCLUDE += -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||
|
||||
SRCS = simulator.cpp testbench.cpp
|
||||
|
||||
@@ -29,6 +31,8 @@ VF += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VF += -Wno-DECLFILENAME
|
||||
VF += --x-initial unique --x-assign unique
|
||||
VF += -exe $(SRCS) $(INCLUDE)
|
||||
VF += -cc Vortex.v -top-module Vortex
|
||||
VF += verilator.vlt
|
||||
|
||||
DBG += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
DBG += -DDBG_CORE_REQ_INFO
|
||||
@@ -36,22 +40,22 @@ DBG += -DDBG_CORE_REQ_INFO
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
gen-s:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
verilator $(VF) $(SINGLECORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(SINGLECORE)' --trace $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
|
||||
gen-m:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md:
|
||||
verilator $(VF) -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -g -O0 $(DBG) $(MULTICORE)' --trace $(DBG)
|
||||
|
||||
gen-mt:
|
||||
verilator $(VF) -DNDEBUG -cc Vortex.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
||||
build-s: gen-s
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
Reference in New Issue
Block a user