416 lines
13 KiB
Systemverilog
416 lines
13 KiB
Systemverilog
`ifndef VX_DEFINE
|
|
`define VX_DEFINE
|
|
|
|
`include "VX_platform.vh"
|
|
`include "VX_config.vh"
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// `define SYNTHESIS 1
|
|
// `define ASIC 1
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
|
|
|
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
|
|
|
`define NC_BITS `LOG2UP(`NUM_CORES)
|
|
|
|
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
|
|
|
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
|
|
|
|
`ifdef EXT_F_ENABLE
|
|
`define NUM_REGS 64
|
|
`else
|
|
`define NUM_REGS 32
|
|
`endif
|
|
|
|
`define NR_BITS `LOG2UP(`NUM_REGS)
|
|
|
|
`define CSR_ADDR_BITS 12
|
|
|
|
`define CSR_WIDTH 12
|
|
|
|
`define ISTAG_BITS `LOG2UP(`ISSUEQ_SIZE)
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define LATENCY_IDIV 33
|
|
`define LATENCY_IMUL 3
|
|
|
|
`define LATENCY_FDIV 16
|
|
`define LATENCY_FSQRT 10
|
|
`define LATENCY_FTOI 5
|
|
`define LATENCY_FTOU 4
|
|
`define LATENCY_ITOF 8
|
|
`define LATENCY_UTOF 7
|
|
|
|
`define LATENCY_FMULADD 2
|
|
`define LATENCY_FDIVSQRT 2
|
|
`define LATENCY_FCONV 2
|
|
`define LATENCY_FNONCOMP 1
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define INST_LUI 7'b0110111
|
|
`define INST_AUIPC 7'b0010111
|
|
`define INST_JAL 7'b1101111
|
|
`define INST_JALR 7'b1100111
|
|
`define INST_B 7'b1100011 // branch instructions
|
|
`define INST_L 7'b0000011 // load instructions
|
|
`define INST_S 7'b0100011 // store instructions
|
|
`define INST_I 7'b0010011 // immediate instructions
|
|
`define INST_R 7'b0110011 // register instructions
|
|
`define INST_F 7'b0001111 // Fence instructions
|
|
`define INST_SYS 7'b1110011 // system instructions
|
|
|
|
`define INST_FL 7'b0000111 // float load instruction
|
|
`define INST_FS 7'b0100111 // float store instruction
|
|
`define INST_FMADD 7'b1000011
|
|
`define INST_FMSUB 7'b1000111
|
|
`define INST_FNMSUB 7'b1001011
|
|
`define INST_FNMADD 7'b1001111
|
|
`define INST_FCI 7'b1010011 // float common instructions
|
|
|
|
`define INST_GPU 7'b1101011
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define BYTEEN_SB 3'h0
|
|
`define BYTEEN_SH 3'h1
|
|
`define BYTEEN_SW 3'h2
|
|
`define BYTEEN_UB 3'h4
|
|
`define BYTEEN_UH 3'h5
|
|
`define BYTEEN_BITS 3
|
|
`define BYTEEN_TYPE(x) x[1:0]
|
|
|
|
`define FRM_RNE 3'b000 // round to nearest even
|
|
`define FRM_RTZ 3'b001 // round to zero
|
|
`define FRM_RDN 3'b010 // round to -inf
|
|
`define FRM_RUP 3'b011 // round to +inf
|
|
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
|
`define FRM_DYN 3'b111 // dynamic mode
|
|
`define FRM_BITS 3
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define EX_NOP 3'h0
|
|
`define EX_ALU 3'h1
|
|
`define EX_BRU 3'h2
|
|
`define EX_LSU 3'h3
|
|
`define EX_CSR 3'h4
|
|
`define EX_MUL 3'h5
|
|
`define EX_FPU 3'h6
|
|
`define EX_GPU 3'h7
|
|
`define EX_BITS 3
|
|
|
|
`define NUM_EXS 7
|
|
`define NE_BITS `LOG2UP(`NUM_EXS)
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define OP_BITS 5
|
|
|
|
`define ALU_ADD 4'b0000
|
|
`define ALU_SUB 4'b0001
|
|
`define ALU_LUI 4'b0010
|
|
`define ALU_AUIPC 4'b0011
|
|
`define ALU_SLT 4'b0100
|
|
`define ALU_SLTU 4'b0101
|
|
`define ALU_SRL 4'b1000
|
|
`define ALU_SRA 4'b1001
|
|
`define ALU_AND 4'b1100
|
|
`define ALU_OR 4'b1101
|
|
`define ALU_XOR 4'b1110
|
|
`define ALU_SLL 4'b1111
|
|
`define ALU_OTHER 4'b0111
|
|
`define ALU_BITS 4
|
|
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
|
`define ALU_OP_CLASS(x) x[3:2]
|
|
|
|
`define BRU_EQ 4'b0000
|
|
`define BRU_NE 4'b0001
|
|
`define BRU_LTU 4'b0010
|
|
`define BRU_GEU 4'b0011
|
|
`define BRU_LT 4'b0110
|
|
`define BRU_GE 4'b0111
|
|
`define BRU_JAL 4'b1000
|
|
`define BRU_JALR 4'b1001
|
|
`define BRU_ECALL 4'b1010
|
|
`define BRU_EBREAK 4'b1011
|
|
`define BRU_MRET 4'b1100
|
|
`define BRU_SRET 4'b1101
|
|
`define BRU_DRET 4'b1110
|
|
`define BRU_OTHER 4'b1111
|
|
`define BRU_BITS 4
|
|
`define BRU_OP(x) x[`BRU_BITS-1:0]
|
|
`define BRU_NEG(x) x[0]
|
|
`define BRU_LESS(x) x[1]
|
|
`define BRU_SIGNED(x) x[2]
|
|
`define BRU_STATIC(x) x[3]
|
|
|
|
`define LSU_LB {1'b0, `BYTEEN_SB}
|
|
`define LSU_LH {1'b0, `BYTEEN_SH}
|
|
`define LSU_LW {1'b0, `BYTEEN_SW}
|
|
`define LSU_LBU {1'b0, `BYTEEN_UB}
|
|
`define LSU_LHU {1'b0, `BYTEEN_UH}
|
|
`define LSU_SB {1'b1, `BYTEEN_SB}
|
|
`define LSU_SH {1'b1, `BYTEEN_SH}
|
|
`define LSU_SW {1'b1, `BYTEEN_SW}
|
|
`define LSU_SBU {1'b1, `BYTEEN_UB}
|
|
`define LSU_SHU {1'b1, `BYTEEN_UH}
|
|
`define LSU_BITS 4
|
|
`define LSU_RW(x) x[3]
|
|
`define LSU_BE(x) x[2:0]
|
|
|
|
`define CSR_RW 2'h0
|
|
`define CSR_RS 2'h1
|
|
`define CSR_RC 2'h2
|
|
`define CSR_OTHER 2'h3
|
|
`define CSR_BITS 2
|
|
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
|
|
|
`define MUL_MUL 3'h0
|
|
`define MUL_MULH 3'h1
|
|
`define MUL_MULHSU 3'h2
|
|
`define MUL_MULHU 3'h3
|
|
`define MUL_DIV 3'h4
|
|
`define MUL_DIVU 3'h5
|
|
`define MUL_REM 3'h6
|
|
`define MUL_REMU 3'h7
|
|
`define MUL_BITS 3
|
|
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
|
`define IS_DIV_OP(x) x[2]
|
|
|
|
`define FPU_ADD 5'h00
|
|
`define FPU_SUB 5'h01
|
|
`define FPU_MUL 5'h02
|
|
`define FPU_DIV 5'h03
|
|
`define FPU_SQRT 5'h04
|
|
`define FPU_MADD 5'h05
|
|
`define FPU_MSUB 5'h06
|
|
`define FPU_NMSUB 5'h07
|
|
`define FPU_NMADD 5'h08
|
|
`define FPU_SGNJ 5'h09 // FSGNJ
|
|
`define FPU_SGNJN 5'h0A // FSGNJN
|
|
`define FPU_SGNJX 5'h0B // FSGNJX
|
|
`define FPU_MIN 5'h0C // FMIN.S
|
|
`define FPU_MAX 5'h0D // FMAX.S
|
|
`define FPU_CVTWS 5'h0E // FCVT.W.S
|
|
`define FPU_CVTWUS 5'h0F // FCVT.WU.S
|
|
`define FPU_CVTSW 5'h10 // FCVT.S.W
|
|
`define FPU_CVTSWU 5'h11 // FCVT.S.WU
|
|
`define FPU_MVXW 5'h12 // MOV FP from fpReg to integer reg
|
|
`define FPU_MVWX 5'h13 // MOV FP from integer reg to fpReg
|
|
`define FPU_CLASS 5'h14
|
|
`define FPU_CMP 5'h15
|
|
`define FPU_OTHER 5'h1f
|
|
`define FPU_BITS 5
|
|
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
|
|
|
`define GPU_TMC 3'h0
|
|
`define GPU_WSPAWN 3'h1
|
|
`define GPU_SPLIT 3'h2
|
|
`define GPU_JOIN 3'h3
|
|
`define GPU_BAR 3'h4
|
|
`define GPU_OTHER 3'h7
|
|
`define GPU_BITS 3
|
|
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`ifdef EXT_M_ENABLE
|
|
`define ISA_EXT_M (1 << 12)
|
|
`else
|
|
`define ISA_EXT_M 0
|
|
`endif
|
|
|
|
`ifdef EXT_F_ENABLE
|
|
`define ISA_EXT_F (1 << 5)
|
|
`else
|
|
`define ISA_EXT_F 0
|
|
`endif
|
|
|
|
`define ISA_CODE (0 << 0) // A - Atomic Instructions extension \
|
|
| (0 << 1) // B - Tentatively reserved for Bit operations extension \
|
|
| (0 << 2) // C - Compressed extension \
|
|
| (0 << 3) // D - Double precsision floating-point extension \
|
|
| (0 << 4) // E - RV32E base ISA \
|
|
|`ISA_EXT_F // F - Single precsision floating-point extension \
|
|
| (0 << 6) // G - Additional standard extensions present \
|
|
| (0 << 7) // H - Hypervisor mode implemented \
|
|
| (1 << 8) // I - RV32I/64I/128I base ISA \
|
|
| (0 << 9) // J - Reserved \
|
|
| (0 << 10) // K - Reserved \
|
|
| (0 << 11) // L - Tentatively reserved for Bit operations extension \
|
|
|`ISA_EXT_M // M - Integer Multiply/Divide extension \
|
|
| (0 << 13) // N - User level interrupts supported \
|
|
| (0 << 14) // O - Reserved \
|
|
| (0 << 15) // P - Tentatively reserved for Packed-SIMD extension \
|
|
| (0 << 16) // Q - Quad-precision floating-point extension \
|
|
| (0 << 17) // R - Reserved \
|
|
| (0 << 18) // S - Supervisor mode implemented \
|
|
| (0 << 19) // T - Tentatively reserved for Transactional Memory extension \
|
|
| (1 << 20) // U - User mode implemented \
|
|
| (0 << 21) // V - Tentatively reserved for Vector extension \
|
|
| (0 << 22) // W - Reserved \
|
|
| (1 << 23) // X - Non-standard extensions present \
|
|
| (0 << 24) // Y - Reserved \
|
|
| (0 << 25) // Z - Reserved
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, wid
|
|
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
|
`else
|
|
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
|
`endif
|
|
|
|
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
|
|
|
// Cache ID
|
|
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
|
|
|
// TAG sharing enable
|
|
`define DCORE_TAG_ID_BITS `ISTAG_BITS
|
|
|
|
// Core request tag bits
|
|
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
|
|
|
|
// DRAM request data bits
|
|
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
|
|
|
// DRAM request address bits
|
|
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DBANK_LINE_SIZE))
|
|
|
|
// DRAM byte enable bits
|
|
`define DDRAM_BYTEEN_WIDTH `DBANK_LINE_SIZE
|
|
|
|
// DRAM request tag bits
|
|
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define DNUM_REQUESTS `NUM_THREADS
|
|
|
|
// Snoop request tag bits
|
|
`define DSNP_TAG_WIDTH ((`NUM_CORES > 1) ? `LOG2UP(`L2SNRQ_SIZE) : `L2SNP_TAG_WIDTH)
|
|
|
|
////////////////////////// Icache Configurable Knobs //////////////////////////
|
|
|
|
// Cache ID
|
|
`define ICACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 1)
|
|
|
|
// Core request address bits
|
|
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
|
|
|
|
// Core request byte enable bits
|
|
`define ICORE_BYTEEN_WIDTH `DWORD_SIZE
|
|
|
|
// TAG sharing enable
|
|
`define ICORE_TAG_ID_BITS `NW_BITS
|
|
|
|
// Core request tag bits
|
|
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
|
|
|
|
// DRAM request data bits
|
|
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
|
|
|
|
// DRAM request address bits
|
|
`define IDRAM_ADDR_WIDTH (32 - `CLOG2(`IBANK_LINE_SIZE))
|
|
|
|
// DRAM byte enable bits
|
|
`define IDRAM_BYTEEN_WIDTH `IBANK_LINE_SIZE
|
|
|
|
// DRAM request tag bits
|
|
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define INUM_REQUESTS 1
|
|
|
|
////////////////////////// SM Configurable Knobs //////////////////////////////
|
|
|
|
// Cache ID
|
|
`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 2)
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define SNUM_REQUESTS `NUM_THREADS
|
|
|
|
// DRAM request address bits
|
|
`define SDRAM_ADDR_WIDTH (32 - `CLOG2(`SBANK_LINE_SIZE))
|
|
|
|
// DRAM request tag bits
|
|
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define SNUM_REQUESTS `NUM_THREADS
|
|
|
|
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
|
|
|
// Cache ID
|
|
`define L2CACHE_ID (`L3_ENABLE ? 1 : 0)
|
|
|
|
// Core request tag bits
|
|
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
|
|
|
// DRAM request data bits
|
|
`define L2DRAM_LINE_WIDTH (`L2_ENABLE ? (`L2BANK_LINE_SIZE * 8) : `DDRAM_LINE_WIDTH)
|
|
|
|
// DRAM request address bits
|
|
`define L2DRAM_ADDR_WIDTH (`L2_ENABLE ? (32 - `CLOG2(`L2BANK_LINE_SIZE)) : `DDRAM_ADDR_WIDTH)
|
|
|
|
// DRAM byte enable bits
|
|
`define L2DRAM_BYTEEN_WIDTH (`L2_ENABLE ? `L2BANK_LINE_SIZE : `DDRAM_BYTEEN_WIDTH)
|
|
|
|
// DRAM request tag bits
|
|
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
|
|
|
|
// Snoop request tag bits
|
|
`define L2SNP_TAG_WIDTH (`L3_ENABLE ? `LOG2UP(`L3SNRQ_SIZE) : `L3SNP_TAG_WIDTH)
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define L2NUM_REQUESTS (2 * `NUM_CORES)
|
|
|
|
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
|
|
|
// Cache ID
|
|
`define L3CACHE_ID 0
|
|
|
|
// Core request tag bits
|
|
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
|
|
|
// DRAM request data bits
|
|
`define L3DRAM_LINE_WIDTH (`L3_ENABLE ? (`L3BANK_LINE_SIZE * 8) : `L2DRAM_LINE_WIDTH)
|
|
|
|
// DRAM request address bits
|
|
`define L3DRAM_ADDR_WIDTH (`L3_ENABLE ? (32 - `CLOG2(`L3BANK_LINE_SIZE)) : `L2DRAM_ADDR_WIDTH)
|
|
|
|
// DRAM byte enable bits
|
|
`define L3DRAM_BYTEEN_WIDTH (`L3_ENABLE ? `L3BANK_LINE_SIZE : `L2DRAM_BYTEEN_WIDTH)
|
|
|
|
// DRAM request tag bits
|
|
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
|
|
|
|
// Snoop request tag bits
|
|
`define L3SNP_TAG_WIDTH 16
|
|
|
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
|
`define L3NUM_REQUESTS `NUM_CLUSTERS
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
`define VX_DRAM_BYTEEN_WIDTH `L3DRAM_BYTEEN_WIDTH
|
|
`define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH
|
|
`define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH
|
|
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
|
|
`define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH
|
|
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
|
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
|
|
|
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
|
|
|
`include "VX_types.vh"
|
|
|
|
`endif
|