diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index d81d5ba2..933a3ac7 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -9,11 +9,11 @@ extern int vx_dev_caps(int caps_id) { case VX_CAPS_VERSION: return 0; case VX_CAPS_MAX_CORES: - return NUMBER_CORES; + return NUM_CORES; case VX_CAPS_MAX_WARPS: - return NW; + return NUM_WARPS; case VX_CAPS_MAX_THREADS: - return NT; + return NUM_THREADS; case VX_CAPS_CACHE_LINESIZE: return GLOBAL_BLOCK_SIZE_BYTES; case VX_CAPS_LOCAL_MEM_SIZE: diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index ef5a4b6d..6290c432 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -142,7 +142,7 @@ public: private: void run() { - Harp::ArchDef arch("rv32i", NW, NT); + Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS); Harp::WordDecoder dec(arch); Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); Harp::Core core(arch, dec, mu); diff --git a/hw/Makefile b/hw/Makefile index a109b6d3..5e20894d 100644 --- a/hw/Makefile +++ b/hw/Makefile @@ -33,7 +33,7 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu .PHONY: build_config build_config: - ./gen_config.py --rtl_locations + ./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h # -LDFLAGS '-lsystemc' VERILATOR: build_config diff --git a/hw/modelsim/Makefile b/hw/modelsim/Makefile index 26d209dd..f6984419 100644 --- a/hw/modelsim/Makefile +++ b/hw/modelsim/Makefile @@ -6,8 +6,9 @@ ALL:sim SRC = \ vortex_dpi.cpp \ vortex_tb.v \ -../rtl/VX_define.v \ -../rtl/VX_define_synth.v \ +../rtl/VX_user_config.vh \ +../rtl/VX_config.vh \ +../rtl/VX_define.vh \ ../rtl/interfaces/VX_branch_response_inter.v \ ../rtl/interfaces/VX_csr_req_inter.v \ ../rtl/interfaces/VX_csr_wb_inter.v \ diff --git a/hw/modelsim/modelsim.mpf b/hw/modelsim/modelsim.mpf index e37489d6..83857a21 100644 --- a/hw/modelsim/modelsim.mpf +++ b/hw/modelsim/modelsim.mpf @@ -2182,7 +2182,7 @@ Project_File_33 = ../rtl/shared_memory/VX_set_bit.v Project_File_P_33 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 cover_branch 0 folder {Top Level} last_compile 0 cover_fsm 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 1 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 53 cover_expr 0 dont_compile 0 cover_stmt 0 Project_File_34 = ../rtl/interfaces/VX_dcache_response_inter.v Project_File_P_34 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 27 dont_compile 0 cover_expr 0 cover_stmt 0 -Project_File_35 = ../rtl/VX_define.v +Project_File_35 = ../rtl/VX_define.vh Project_File_P_35 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 folder {Top Level} cover_branch 0 cover_fsm 0 last_compile 1572058635 vlog_noload 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 0 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 7 cover_expr 0 dont_compile 0 cover_stmt 0 Project_File_36 = ../rtl/interfaces/VX_csr_req_inter.v Project_File_P_36 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 24 dont_compile 0 cover_expr 0 cover_stmt 0 diff --git a/hw/modelsim/vortex_tb.v b/hw/modelsim/vortex_tb.v index 6534cb74..93bcdcc0 100644 --- a/hw/modelsim/vortex_tb.v +++ b/hw/modelsim/vortex_tb.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" //`define NUMBER_BANKS 8 //`define NUM_WORDS_PER_BLOCK 4 diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 6f147b73..69b8f037 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -11,9 +11,10 @@ vortex_afu.json +incdir+../rtl/pipe_regs +incdir+../rtl/compat -../rtl/VX_define_synth.v -../rtl/VX_define.v -../rtl/generic_cache/VX_cache_config.v +../rtl/VX_user_config.vh +../rtl/VX_config.vh +../rtl/VX_define.vh +../rtl/generic_cache/VX_cache_config.vh ../rtl/Vortex_Socket.v ../rtl/Vortex_Cluster.v ../rtl/Vortex.v diff --git a/hw/rtl/.gitignore b/hw/rtl/.gitignore index f46f5734..a98a6b43 100644 --- a/hw/rtl/.gitignore +++ b/hw/rtl/.gitignore @@ -1,4 +1 @@ -/simulate/VX_define.h -/simulate/VX_define_synth.h -/VX_define_synth.v -/results.txt \ No newline at end of file +/VX_user_config.vh \ No newline at end of file diff --git a/hw/rtl/VX_alu.v b/hw/rtl/VX_alu.v index e4346f75..9e9c94b8 100644 --- a/hw/rtl/VX_alu.v +++ b/hw/rtl/VX_alu.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_alu( input wire clk, diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 933e9a1d..a29b378c 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_back_end #( diff --git a/hw/rtl/VX_define.v b/hw/rtl/VX_config.vh similarity index 51% rename from hw/rtl/VX_define.v rename to hw/rtl/VX_config.vh index df54f8b0..e6dc8366 100644 --- a/hw/rtl/VX_define.v +++ b/hw/rtl/VX_config.vh @@ -1,736 +1,516 @@ -`ifndef VX_DEFINE -`define VX_DEFINE - -`include "./VX_define_synth.v" - -`ifndef NT -`define NT 4 -`endif - -`ifndef NW -`define NW 8 -`endif - -`ifndef NUMBER_CORES_PER_CLUSTER -`define NUMBER_CORES_PER_CLUSTER 1 -`endif - -`ifndef NUMBER_CLUSTERS -`define NUMBER_CLUSTERS 1 -`endif - -// `define QUEUE_FORCE_MLAB 1 - -`define NT_M1 (`NT-1) - -// NW_M1 is actually log2(NW) -`define NW_M1 (`CLOG2(`NW)) - -// Uncomment the below line if NW=1 -// `define ONLY - -// `define SYN 1 -// `define ASIC 1 -// `define SYN_FUNC 1 - -`ifndef NUM_BARRIERS -`define NUM_BARRIERS 4 -`endif - -`define R_INST 7'd51 -`define L_INST 7'd3 -`define ALU_INST 7'd19 -`define S_INST 7'd35 -`define B_INST 7'd99 -`define LUI_INST 7'd55 -`define AUIPC_INST 7'd23 -`define JAL_INST 7'd111 -`define JALR_INST 7'd103 -`define SYS_INST 7'd115 -`define GPGPU_INST 7'h6b - - -`define WB_ALU 2'h1 -`define WB_MEM 2'h2 -`define WB_JAL 2'h3 -`define NO_WB 2'h0 - - -`define RS2_IMMED 1 -`define RS2_REG 0 - - -`define NO_MEM_READ 3'h7 -`define LB_MEM_READ 3'h0 -`define LH_MEM_READ 3'h1 -`define LW_MEM_READ 3'h2 -`define LBU_MEM_READ 3'h4 -`define LHU_MEM_READ 3'h5 - - -`define NO_MEM_WRITE 3'h7 -`define SB_MEM_WRITE 3'h0 -`define SH_MEM_WRITE 3'h1 -`define SW_MEM_WRITE 3'h2 - - -`define NO_BRANCH 3'h0 -`define BEQ 3'h1 -`define BNE 3'h2 -`define BLT 3'h3 -`define BGT 3'h4 -`define BLTU 3'h5 -`define BGTU 3'h6 - - -`define NO_ALU 5'd15 -`define ADD 5'd0 -`define SUB 5'd1 -`define SLLA 5'd2 -`define SLT 5'd3 -`define SLTU 5'd4 -`define XOR 5'd5 -`define SRL 5'd6 -`define SRA 5'd7 -`define OR 5'd8 -`define AND 5'd9 -`define SUBU 5'd10 -`define LUI_ALU 5'd11 -`define AUIPC_ALU 5'd12 -`define CSR_ALU_RW 5'd13 -`define CSR_ALU_RS 5'd14 -`define CSR_ALU_RC 5'd15 -`define MUL 5'd16 -`define MULH 5'd17 -`define MULHSU 5'd18 -`define MULHU 5'd19 -`define DIV 5'd20 -`define DIVU 5'd21 -`define REM 5'd22 -`define REMU 5'd23 - -// WRITEBACK -`define WB_ALU 2'h1 -`define WB_MEM 2'h2 -`define WB_JAL 2'h3 -`define NO_WB 2'h0 - - -// JAL -`define JUMP 1'h1 -`define NO_JUMP 1'h0 - -// STALLS -`define STALL 1'h1 -`define NO_STALL 1'h0 - - -`define TAKEN 1'h1 -`define NOT_TAKEN 1'h0 - - -`define ZERO_REG 5'h0 - -`define CLOG2(x) \ - (x <= 2) ? 1 : \ - (x <= 4) ? 2 : \ - (x <= 8) ? 3 : \ - (x <= 16) ? 4 : \ - (x <= 32) ? 5 : \ - (x <= 64) ? 6 : \ - (x <= 128) ? 7 : \ - (x <= 256) ? 8 : \ - (x <= 512) ? 9 : \ - (x <= 1024) ? 10 : \ - -199 - - -`ifndef NUMBER_CORES -`define NUMBER_CORES (`NUMBER_CORES_PER_CLUSTER*`NUMBER_CLUSTERS) -`endif - -// `define SINGLE_CORE_BENCH - -`ifndef GLOBAL_BLOCK_SIZE_BYTES -`define GLOBAL_BLOCK_SIZE_BYTES 16 -`endif - -// ========================================= Dcache Configurable Knobs ========================================= - -// General Cache Knobs - -// Size of cache in bytes -`ifndef DCACHE_SIZE_BYTES -`define DCACHE_SIZE_BYTES 2048 -`endif - -// Size of line inside a bank in bytes -`ifndef DBANK_LINE_SIZE_BYTES -`define DBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES -`endif - -// Number of banks {1, 2, 4, 8,...} -`ifndef DNUMBER_BANKS -`define DNUMBER_BANKS 8 -`endif - -// Size of a word in bytes -`ifndef DWORD_SIZE_BYTES -`define DWORD_SIZE_BYTES 4 -`endif - -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef DNUMBER_REQUESTS -`define DNUMBER_REQUESTS `NT -`endif - -// Number of cycles to complete stage 1 (read from memory) -`ifndef DSTAGE_1_CYCLES -`define DSTAGE_1_CYCLES 1 -`endif - -// Function ID -`ifndef DFUNC_ID -`define DFUNC_ID 0 -`endif - -// Bank Number of words in a line -`ifndef DBANK_LINE_SIZE_WORDS -`define DBANK_LINE_SIZE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES) -`endif - -// Bank Number of words range -`ifndef DBANK_LINE_SIZE_RNG -`define DBANK_LINE_SIZE_RNG `DBANK_LINE_SIZE_WORDS-1:0 -`endif - -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - -// Core Request Queue Size -`ifndef DREQQ_SIZE -`define DREQQ_SIZE `NW -`endif - -// Miss Reserv Queue Knob -`ifndef DMRVQ_SIZE -`define DMRVQ_SIZE (`NW*`NT) -`endif - -// Dram Fill Rsp Queue Size -`ifndef DDFPQ_SIZE -`define DDFPQ_SIZE 32 -`endif - -// Snoop Req Queue -`ifndef DSNRQ_SIZE -`define DSNRQ_SIZE 32 -`endif - -// Queues for writebacks Knobs {1, 2, 4, 8, ...} - -// Core Writeback Queue Size -`ifndef DCWBQ_SIZE -`define DCWBQ_SIZE `DREQQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef DDWBQ_SIZE -`define DDWBQ_SIZE 4 -`endif - -// Dram Fill Req Queue Size -`ifndef DDFQQ_SIZE -`define DDFQQ_SIZE `DREQQ_SIZE -`endif - -// Lower Level Cache Hit Queue Size -`ifndef DLLVQ_SIZE -`define DLLVQ_SIZE 0 -`endif - -// Fill Forward SNP Queue -`ifndef DFFSQ_SIZE -`define DFFSQ_SIZE 32 -`endif - -// Prefetcher -`ifndef DPRFQ_SIZE -`define DPRFQ_SIZE 32 -`endif - -`ifndef DPRFQ_STRIDE -`define DPRFQ_STRIDE 0 -`endif - -// Fill Invalidator Size {Fill invalidator must be active} -`ifndef DFILL_INVALIDAOR_SIZE -`define DFILL_INVALIDAOR_SIZE 32 -`endif - -// Dram knobs -`ifndef DSIMULATED_DRAM_LATENCY_CYCLES -`define DSIMULATED_DRAM_LATENCY_CYCLES 2 -`endif - -// ========================================= Icache Configurable Knobs ========================================= - -// General Cache Knobs - -// Size of cache in bytes -`ifndef ICACHE_SIZE_BYTES -`define ICACHE_SIZE_BYTES 4096 -`endif - -// Size of line inside a bank in bytes -`ifndef IBANK_LINE_SIZE_BYTES -`define IBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES -`endif - -// Number of banks {1, 2, 4, 8,...} -`ifndef INUMBER_BANKS -`define INUMBER_BANKS 8 -`endif - -// Size of a word in bytes -`ifndef IWORD_SIZE_BYTES -`define IWORD_SIZE_BYTES 4 -`endif - -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef INUMBER_REQUESTS -`define INUMBER_REQUESTS 1 -`endif - -// Number of cycles to complete stage 1 (read from memory) -`ifndef ISTAGE_1_CYCLES -`define ISTAGE_1_CYCLES 1 -`endif - -// Function ID -`ifndef IFUNC_ID -`define IFUNC_ID 1 -`endif - -// Bank Number of words in a line -`ifndef IBANK_LINE_SIZE_WORDS -`define IBANK_LINE_SIZE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES) -`endif - -// Bank Number of words range -`ifndef IBANK_LINE_SIZE_RNG -`define IBANK_LINE_SIZE_RNG `IBANK_LINE_SIZE_WORDS-1:0 -`endif - -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - -// Core Request Queue Size -`ifndef IREQQ_SIZE -`define IREQQ_SIZE `NW -`endif - -// Miss Reserv Queue Knob -`ifndef IMRVQ_SIZE -`define IMRVQ_SIZE `IREQQ_SIZE -`endif - -// Dram Fill Rsp Queue Size -`ifndef IDFPQ_SIZE -`define IDFPQ_SIZE 32 -`endif - -// Snoop Req Queue -`ifndef ISNRQ_SIZE -`define ISNRQ_SIZE 32 -`endif - -// Queues for writebacks Knobs {1, 2, 4, 8, ...} - -// Core Writeback Queue Size -`ifndef ICWBQ_SIZE -`define ICWBQ_SIZE `IREQQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef IDWBQ_SIZE -`define IDWBQ_SIZE 16 -`endif - -// Dram Fill Req Queue Size -`ifndef IDFQQ_SIZE -`define IDFQQ_SIZE `IREQQ_SIZE -`endif - -// Lower Level Cache Hit Queue Size -`ifndef ILLVQ_SIZE -`define ILLVQ_SIZE 16 -`endif - -// Fill Forward SNP Queue -`ifndef IFFSQ_SIZE -`define IFFSQ_SIZE 8 -`endif - -// Prefetcher -`ifndef IPRFQ_SIZE -`define IPRFQ_SIZE 32 -`endif - -`ifndef IPRFQ_STRIDE -`define IPRFQ_STRIDE 0 -`endif - -// Fill Invalidator Size {Fill invalidator must be active} -`ifndef IFILL_INVALIDAOR_SIZE -`define IFILL_INVALIDAOR_SIZE 32 -`endif - -// Dram knobs -`ifndef ISIMULATED_DRAM_LATENCY_CYCLES -`define ISIMULATED_DRAM_LATENCY_CYCLES 2 -`endif - -// ========================================= SM Configurable Knobs ========================================= - -// General Cache Knobs -// Size of cache in bytes -`ifndef SCACHE_SIZE_BYTES -`define SCACHE_SIZE_BYTES 1024 -`endif - -// Size of line inside a bank in bytes -`ifndef SBANK_LINE_SIZE_BYTES -`define SBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES -`endif - -// Number of banks {1, 2, 4, 8,...} -`ifndef SNUMBER_BANKS -`define SNUMBER_BANKS 8 -`endif - -// Size of a word in bytes -`ifndef SWORD_SIZE_BYTES -`define SWORD_SIZE_BYTES 4 -`endif - -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef SNUMBER_REQUESTS -`define SNUMBER_REQUESTS `NT -`endif - -// Number of cycles to complete stage 1 (read from memory) -`ifndef SSTAGE_1_CYCLES -`define SSTAGE_1_CYCLES 1 -`endif - -// Function ID -`ifndef SFUNC_ID -`define SFUNC_ID 2 -`endif - -// Bank Number of words in a line -`ifndef SBANK_LINE_SIZE_WORDS -`define SBANK_LINE_SIZE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES) -`endif - -`ifndef SBANK_LINE_SIZE_RNG -`define SBANK_LINE_SIZE_RNG `SBANK_LINE_SIZE_WORDS-1:0 -`endif - -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - -// Core Request Queue Size -`ifndef SREQQ_SIZE -`define SREQQ_SIZE `NW -`endif - -// Miss Reserv Queue Knob -`ifndef SMRVQ_SIZE -`define SMRVQ_SIZE `SREQQ_SIZE -`endif - -// Dram Fill Rsp Queue Size -`ifndef SDFPQ_SIZE -`define SDFPQ_SIZE 0 -`endif - -// Snoop Req Queue -`ifndef SSNRQ_SIZE -`define SSNRQ_SIZE 16 -`endif - -// Queues for writebacks Knobs {1, 2, 4, 8, ...} - -// Core Writeback Queue Size -`ifndef SCWBQ_SIZE -`define SCWBQ_SIZE `SREQQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef SDWBQ_SIZE -`define SDWBQ_SIZE 16 -`endif - -// Dram Fill Req Queue Size -`ifndef SDFQQ_SIZE -`define SDFQQ_SIZE 16 -`endif - -// Lower Level Cache Hit Queue Size -`ifndef SLLVQ_SIZE -`define SLLVQ_SIZE 16 -`endif - -// Fill Forward SNP Queue -`ifndef SFFSQ_SIZE -`define SFFSQ_SIZE 16 -`endif - -// Prefetcher -`ifndef SPRFQ_SIZE -`define SPRFQ_SIZE 4 -`endif - -`ifndef SPRFQ_STRIDE -`define SPRFQ_STRIDE 0 -`endif - -// Fill Invalidator Size {Fill invalidator must be active} -`ifndef SFILL_INVALIDAOR_SIZE -`define SFILL_INVALIDAOR_SIZE 32 -`endif - -// Dram knobs -`ifndef SSIMULATED_DRAM_LATENCY_CYCLES -`define SSIMULATED_DRAM_LATENCY_CYCLES 2 -`endif - -// ========================================= L2cache Configurable Knobs ========================================= - -// General Cache Knobs - -// Size of cache in bytes -`ifndef LLCACHE_SIZE_BYTES -`define LLCACHE_SIZE_BYTES 4096 -`endif - -// Size of line inside a bank in bytes -`ifndef LLBANK_LINE_SIZE_BYTES -`define LLBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES -`endif - -// Number of banks {1, 2, 4, 8,...} -`ifndef LLNUMBER_BANKS -`define LLNUMBER_BANKS 8 -`endif - -// Size of a word in bytes -`ifndef LLWORD_SIZE_BYTES -`define LLWORD_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES) -`endif - -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef LLNUMBER_REQUESTS -`define LLNUMBER_REQUESTS (2*`NUMBER_CORES_PER_CLUSTER) -`endif - -// Number of cycles to complete stage 1 (read from memory) -`ifndef LLSTAGE_1_CYCLES -`define LLSTAGE_1_CYCLES 1 -`endif - -// Function ID -`define LLFUNC_ID 3 - -// Bank Number of words in a line -`ifndef LLBANK_LINE_SIZE_WORDS -`define LLBANK_LINE_SIZE_WORDS (`LLBANK_LINE_SIZE_BYTES / `LLWORD_SIZE_BYTES) -`endif - -`ifndef LLBANK_LINE_SIZE_RNG -`define LLBANK_LINE_SIZE_RNG `LLBANK_LINE_SIZE_WORDS-1:0 -`endif - -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - -// Core Request Queue Size -`ifndef LLREQQ_SIZE -`define LLREQQ_SIZE 32 -`endif - -// Miss Reserv Queue Knob -`ifndef LLMRVQ_SIZE -`define LLMRVQ_SIZE 32 -`endif - -// Dram Fill Rsp Queue Size -`ifndef LLDFPQ_SIZE -`define LLDFPQ_SIZE 32 -`endif - -// Snoop Req Queue -`ifndef LLSNRQ_SIZE -`define LLSNRQ_SIZE 32 -`endif - -// Queues for writebacks Knobs {1, 2, 4, 8, ...} - -// Core Writeback Queue Size -`ifndef LLCWBQ_SIZE -`define LLCWBQ_SIZE `LLREQQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef LLDWBQ_SIZE -`define LLDWBQ_SIZE 16 -`endif - -// Dram Fill Req Queue Size -`ifndef LLDFQQ_SIZE -`define LLDFQQ_SIZE `LLREQQ_SIZE -`endif - -// Lower Level Cache Hit Queue Size -`ifndef LLLLVQ_SIZE -`define LLLLVQ_SIZE 32 -`endif - -// Fill Forward SNP Queue -`ifndef LLFFSQ_SIZE -`define LLFFSQ_SIZE 32 -`endif - -// Prefetcher -`ifndef LLPRFQ_SIZE -`define LLPRFQ_SIZE 32 -`endif - -`ifndef LLPRFQ_STRIDE -`define LLPRFQ_STRIDE 0 -`endif - -// Fill Invalidator Size {Fill invalidator must be active} -`ifndef LLFILL_INVALIDAOR_SIZE -`define LLFILL_INVALIDAOR_SIZE 32 -`endif - -// Dram knobs -`ifndef LLSIMULATED_DRAM_LATENCY_CYCLES -`define LLSIMULATED_DRAM_LATENCY_CYCLES 2 -`endif - -// ========================================= L3cache Configurable Knobs ========================================= - -// General Cache Knobs - -// Size of cache in bytes -`ifndef L3CACHE_SIZE_BYTES -`define L3CACHE_SIZE_BYTES 8192 -`endif - -// Size of line inside a bank in bytes -`ifndef L3BANK_LINE_SIZE_BYTES -`define L3BANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES -`endif - -// Number of banks {1, 2, 4, 8,...} -`ifndef L3NUMBER_BANKS -`define L3NUMBER_BANKS 8 -`endif - -// Size of a word in bytes -`ifndef L3WORD_SIZE_BYTES -`define L3WORD_SIZE_BYTES (`L3BANK_LINE_SIZE_BYTES) -`endif - -// Number of Word requests per cycle {1, 2, 4, 8, ...} -`ifndef L3NUMBER_REQUESTS -`define L3NUMBER_REQUESTS (`NUMBER_CLUSTERS) -`endif - -// Number of cycles to complete stage 1 (read from memory) -`ifndef L3STAGE_1_CYCLES -`define L3STAGE_1_CYCLES 1 -`endif - -// Function ID -`define L3FUNC_ID 3 - -// Bank Number of words in a line -`ifndef L3BANK_LINE_SIZE_WORDS -`define L3BANK_LINE_SIZE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES) -`endif - -`ifndef L3BANK_LINE_SIZE_RNG -`define L3BANK_LINE_SIZE_RNG `L3BANK_LINE_SIZE_WORDS-1:0 -`endif - -// Queues feeding into banks Knobs {1, 2, 4, 8, ...} - -// Core Request Queue Size -`ifndef L3REQQ_SIZE -`define L3REQQ_SIZE 32 -`endif - -// Miss Reserv Queue Knob -`ifndef L3MRVQ_SIZE -`define L3MRVQ_SIZE `L3REQQ_SIZE -`endif - -// Dram Fill Rsp Queue Size -`ifndef L3DFPQ_SIZE -`define L3DFPQ_SIZE 32 -`endif - -// Snoop Req Queue -`ifndef L3SNRQ_SIZE -`define L3SNRQ_SIZE 32 -`endif - -// Queues for writebacks Knobs {1, 2, 4, 8, ...} - -// Core Writeback Queue Size -`ifndef L3CWBQ_SIZE -`define L3CWBQ_SIZE `L3REQQ_SIZE -`endif - -// Dram Writeback Queue Size -`ifndef L3DWBQ_SIZE -`define L3DWBQ_SIZE 16 -`endif - -// Dram Fill Req Queue Size -`ifndef L3DFQQ_SIZE -`define L3DFQQ_SIZE `L3REQQ_SIZE -`endif - -// Lower Level Cache Hit Queue Size -`ifndef L3LLVQ_SIZE -`define L3LLVQ_SIZE 0 -`endif - -// Fill Forward SNP Queue -`ifndef L3FFSQ_SIZE -`define L3FFSQ_SIZE 8 -`endif - -// Prefetcher -`ifndef L3PRFQ_SIZE -`define L3PRFQ_SIZE 32 -`endif - -`ifndef L3PRFQ_STRIDE -`define L3PRFQ_STRIDE 0 -`endif - -// Fill Invalidator Size {Fill invalidator must be active} -`ifndef L3FILL_INVALIDAOR_SIZE -`define L3FILL_INVALIDAOR_SIZE 32 -`endif - -// Dram knobs -`ifndef L3SIMULATED_DRAM_LATENCY_CYCLES -`define L3SIMULATED_DRAM_LATENCY_CYCLES 2 -`endif - - // VX_DEFINE -`endif +`ifndef VX_CONFIG +`define VX_CONFIG + +`include "VX_user_config.vh" + +`ifndef NUM_CORES +`define NUM_CORES 1 +`endif + +`ifndef NUM_CLUSTERS +`define NUM_CLUSTERS 1 +`endif + +`ifndef NUM_WARPS +`define NUM_WARPS 8 +`endif + +`ifndef NUM_THREADS +`define NUM_THREADS 4 +`endif + +`ifndef NUM_BARRIERS +`define NUM_BARRIERS 4 +`endif + +// `define SINGLE_CORE_BENCH + +`ifndef GLOBAL_BLOCK_SIZE_BYTES +`define GLOBAL_BLOCK_SIZE_BYTES 16 +`endif + +// ========================= Dcache Configurable Knobs ======================== + +// Size of cache in bytes +`ifndef DCACHE_SIZE_BYTES +`define DCACHE_SIZE_BYTES 2048 +`endif + +// Size of line inside a bank in bytes +`ifndef DBANK_LINE_SIZE_BYTES +`define DBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES +`endif + +// Size of line inside a bank in bits +`define DBANK_LINE_SIZE (`DBANK_LINE_SIZE_BYTES * 8) + +// Number of banks {1, 2, 4, 8,...} +`ifndef DNUMBER_BANKS +`define DNUMBER_BANKS 8 +`endif + +// Size of a word in bytes +`ifndef DWORD_SIZE_BYTES +`define DWORD_SIZE_BYTES 4 +`endif + +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`ifndef DNUMBER_REQUESTS +`define DNUMBER_REQUESTS `NUM_THREADS +`endif + +// Number of cycles to complete stage 1 (read from memory) +`ifndef DSTAGE_1_CYCLES +`define DSTAGE_1_CYCLES 1 +`endif + +// Core Request Queue Size +`ifndef DREQQ_SIZE +`define DREQQ_SIZE `NUM_WARPS +`endif + +// Miss Reserv Queue Knob +`ifndef DMRVQ_SIZE +`define DMRVQ_SIZE (`NUM_WARPS*`NUM_THREADS) +`endif + +// Dram Fill Rsp Queue Size +`ifndef DDFPQ_SIZE +`define DDFPQ_SIZE 32 +`endif + +// Snoop Req Queue +`ifndef DSNRQ_SIZE +`define DSNRQ_SIZE 32 +`endif + +// Core Writeback Queue Size +`ifndef DCWBQ_SIZE +`define DCWBQ_SIZE `DREQQ_SIZE +`endif + +// Dram Writeback Queue Size +`ifndef DDWBQ_SIZE +`define DDWBQ_SIZE 4 +`endif + +// Dram Fill Req Queue Size +`ifndef DDFQQ_SIZE +`define DDFQQ_SIZE `DREQQ_SIZE +`endif + +// Lower Level Cache Hit Queue Size +`ifndef DLLVQ_SIZE +`define DLLVQ_SIZE 0 +`endif + +// Fill Forward SNP Queue +`ifndef DFFSQ_SIZE +`define DFFSQ_SIZE 32 +`endif + +// Prefetcher +`ifndef DPRFQ_SIZE +`define DPRFQ_SIZE 32 +`endif + +`ifndef DPRFQ_STRIDE +`define DPRFQ_STRIDE 0 +`endif + +// Fill Invalidator Size {Fill invalidator must be active} +`ifndef DFILL_INVALIDAOR_SIZE +`define DFILL_INVALIDAOR_SIZE 32 +`endif + +// Dram knobs +`ifndef DSIMULATED_DRAM_LATENCY_CYCLES +`define DSIMULATED_DRAM_LATENCY_CYCLES 2 +`endif + +// ========================== Icache Configurable Knobs ======================= + +// Size of cache in bytes +`ifndef ICACHE_SIZE_BYTES +`define ICACHE_SIZE_BYTES 4096 +`endif + +// Size of line inside a bank in bytes +`ifndef IBANK_LINE_SIZE_BYTES +`define IBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES +`endif + +// Number of banks {1, 2, 4, 8,...} +`ifndef INUMBER_BANKS +`define INUMBER_BANKS 8 +`endif + +// Size of a word in bytes +`ifndef IWORD_SIZE_BYTES +`define IWORD_SIZE_BYTES 4 +`endif + +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`ifndef INUMBER_REQUESTS +`define INUMBER_REQUESTS 1 +`endif + +// Number of cycles to complete stage 1 (read from memory) +`ifndef ISTAGE_1_CYCLES +`define ISTAGE_1_CYCLES 1 +`endif + +// Core Request Queue Size +`ifndef IREQQ_SIZE +`define IREQQ_SIZE `NUM_WARPS +`endif + +// Miss Reserv Queue Knob +`ifndef IMRVQ_SIZE +`define IMRVQ_SIZE `IREQQ_SIZE +`endif + +// Dram Fill Rsp Queue Size +`ifndef IDFPQ_SIZE +`define IDFPQ_SIZE 32 +`endif + +// Snoop Req Queue +`ifndef ISNRQ_SIZE +`define ISNRQ_SIZE 32 +`endif + +// Core Writeback Queue Size +`ifndef ICWBQ_SIZE +`define ICWBQ_SIZE `IREQQ_SIZE +`endif + +// Dram Writeback Queue Size +`ifndef IDWBQ_SIZE +`define IDWBQ_SIZE 16 +`endif + +// Dram Fill Req Queue Size +`ifndef IDFQQ_SIZE +`define IDFQQ_SIZE `IREQQ_SIZE +`endif + +// Lower Level Cache Hit Queue Size +`ifndef ILLVQ_SIZE +`define ILLVQ_SIZE 16 +`endif + +// Fill Forward SNP Queue +`ifndef IFFSQ_SIZE +`define IFFSQ_SIZE 8 +`endif + +// Prefetcher +`ifndef IPRFQ_SIZE +`define IPRFQ_SIZE 32 +`endif + +`ifndef IPRFQ_STRIDE +`define IPRFQ_STRIDE 0 +`endif + +// Fill Invalidator Size {Fill invalidator must be active} +`ifndef IFILL_INVALIDAOR_SIZE +`define IFILL_INVALIDAOR_SIZE 32 +`endif + +// Dram knobs +`ifndef ISIMULATED_DRAM_LATENCY_CYCLES +`define ISIMULATED_DRAM_LATENCY_CYCLES 2 +`endif + +// =========================== SM Configurable Knobs ========================== + +// Size of cache in bytes +`ifndef SCACHE_SIZE_BYTES +`define SCACHE_SIZE_BYTES 1024 +`endif + +// Size of line inside a bank in bytes +`ifndef SBANK_LINE_SIZE_BYTES +`define SBANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES +`endif + +// Number of banks {1, 2, 4, 8,...} +`ifndef SNUMBER_BANKS +`define SNUMBER_BANKS 8 +`endif + +// Size of a word in bytes +`ifndef SWORD_SIZE_BYTES +`define SWORD_SIZE_BYTES 4 +`endif + +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`ifndef SNUMBER_REQUESTS +`define SNUMBER_REQUESTS `NUM_THREADS +`endif + +// Number of cycles to complete stage 1 (read from memory) +`ifndef SSTAGE_1_CYCLES +`define SSTAGE_1_CYCLES 1 +`endif + +// Core Request Queue Size +`ifndef SREQQ_SIZE +`define SREQQ_SIZE `NUM_WARPS +`endif + +// Miss Reserv Queue Knob +`ifndef SMRVQ_SIZE +`define SMRVQ_SIZE `SREQQ_SIZE +`endif + +// Dram Fill Rsp Queue Size +`ifndef SDFPQ_SIZE +`define SDFPQ_SIZE 0 +`endif + +// Snoop Req Queue +`ifndef SSNRQ_SIZE +`define SSNRQ_SIZE 16 +`endif + +// Core Writeback Queue Size +`ifndef SCWBQ_SIZE +`define SCWBQ_SIZE `SREQQ_SIZE +`endif + +// Dram Writeback Queue Size +`ifndef SDWBQ_SIZE +`define SDWBQ_SIZE 16 +`endif + +// Dram Fill Req Queue Size +`ifndef SDFQQ_SIZE +`define SDFQQ_SIZE 16 +`endif + +// Lower Level Cache Hit Queue Size +`ifndef SLLVQ_SIZE +`define SLLVQ_SIZE 16 +`endif + +// Fill Forward SNP Queue +`ifndef SFFSQ_SIZE +`define SFFSQ_SIZE 16 +`endif + +// Prefetcher +`ifndef SPRFQ_SIZE +`define SPRFQ_SIZE 4 +`endif + +`ifndef SPRFQ_STRIDE +`define SPRFQ_STRIDE 0 +`endif + +// Fill Invalidator Size {Fill invalidator must be active} +`ifndef SFILL_INVALIDAOR_SIZE +`define SFILL_INVALIDAOR_SIZE 32 +`endif + +// Dram knobs +`ifndef SSIMULATED_DRAM_LATENCY_CYCLES +`define SSIMULATED_DRAM_LATENCY_CYCLES 2 +`endif + +// ======================== L2cache Configurable Knobs ======================== + +// Size of cache in bytes +`ifndef L2CACHE_SIZE_BYTES +`define L2CACHE_SIZE_BYTES 4096 +`endif + +// Size of line inside a bank in bytes +`ifndef L2BANK_LINE_SIZE_BYTES +`define L2BANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES +`endif + +// Number of banks {1, 2, 4, 8,...} +`ifndef L2NUMBER_BANKS +`define L2NUMBER_BANKS 8 +`endif + +// Size of a word in bytes +`ifndef L2WORD_SIZE_BYTES +`define L2WORD_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES) +`endif + +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`ifndef L2NUMBER_REQUESTS +`define L2NUMBER_REQUESTS (2*`NUM_CORES_PER_CLUSTER) +`endif + +// Number of cycles to complete stage 1 (read from memory) +`ifndef L2STAGE_1_CYCLES +`define L2STAGE_1_CYCLES 1 +`endif + +// Core Request Queue Size +`ifndef L2REQQ_SIZE +`define L2REQQ_SIZE 32 +`endif + +// Miss Reserv Queue Knob +`ifndef L2MRVQ_SIZE +`define L2MRVQ_SIZE 32 +`endif + +// Dram Fill Rsp Queue Size +`ifndef L2DFPQ_SIZE +`define L2DFPQ_SIZE 32 +`endif + +// Snoop Req Queue +`ifndef L2SNRQ_SIZE +`define L2SNRQ_SIZE 32 +`endif + +// Core Writeback Queue Size +`ifndef L2CWBQ_SIZE +`define L2CWBQ_SIZE `L2REQQ_SIZE +`endif + +// Dram Writeback Queue Size +`ifndef L2DWBQ_SIZE +`define L2DWBQ_SIZE 16 +`endif + +// Dram Fill Req Queue Size +`ifndef L2DFQQ_SIZE +`define L2DFQQ_SIZE `L2REQQ_SIZE +`endif + +// Lower Level Cache Hit Queue Size +`ifndef L2LLVQ_SIZE +`define L2LLVQ_SIZE 32 +`endif + +// Fill Forward SNP Queue +`ifndef L2FFSQ_SIZE +`define L2FFSQ_SIZE 32 +`endif + +// Prefetcher +`ifndef L2PRFQ_SIZE +`define L2PRFQ_SIZE 32 +`endif + +`ifndef L2PRFQ_STRIDE +`define L2PRFQ_STRIDE 0 +`endif + +// Fill Invalidator Size {Fill invalidator must be active} +`ifndef L2FILL_INVALIDAOR_SIZE +`define L2FILL_INVALIDAOR_SIZE 32 +`endif + +// Dram knobs +`ifndef L2SIMULATED_DRAM_LATENCY_CYCLES +`define L2SIMULATED_DRAM_LATENCY_CYCLES 2 +`endif + +// ======================== L3cache Configurable Knobs ======================== + +// Size of cache in bytes +`ifndef L3CACHE_SIZE_BYTES +`define L3CACHE_SIZE_BYTES 8192 +`endif + +// Size of line inside a bank in bytes +`ifndef L3BANK_LINE_SIZE_BYTES +`define L3BANK_LINE_SIZE_BYTES `GLOBAL_BLOCK_SIZE_BYTES +`endif + +// Number of banks {1, 2, 4, 8,...} +`ifndef L3NUMBER_BANKS +`define L3NUMBER_BANKS 8 +`endif + +// Size of a word in bytes +`ifndef L3WORD_SIZE_BYTES +`define L3WORD_SIZE_BYTES (`L3BANK_LINE_SIZE_BYTES) +`endif + +// Number of Word requests per cycle {1, 2, 4, 8, ...} +`ifndef L3NUMBER_REQUESTS +`define L3NUMBER_REQUESTS (`NUM_CLUSTERS) +`endif + +// Number of cycles to complete stage 1 (read from memory) +`ifndef L3STAGE_1_CYCLES +`define L3STAGE_1_CYCLES 1 +`endif + +// Core Request Queue Size +`ifndef L3REQQ_SIZE +`define L3REQQ_SIZE 32 +`endif + +// Miss Reserv Queue Knob +`ifndef L3MRVQ_SIZE +`define L3MRVQ_SIZE `L3REQQ_SIZE +`endif + +// Dram Fill Rsp Queue Size +`ifndef L3DFPQ_SIZE +`define L3DFPQ_SIZE 32 +`endif + +// Snoop Req Queue +`ifndef L3SNRQ_SIZE +`define L3SNRQ_SIZE 32 +`endif + +// Core Writeback Queue Size +`ifndef L3CWBQ_SIZE +`define L3CWBQ_SIZE `L3REQQ_SIZE +`endif + +// Dram Writeback Queue Size +`ifndef L3DWBQ_SIZE +`define L3DWBQ_SIZE 16 +`endif + +// Dram Fill Req Queue Size +`ifndef L3DFQQ_SIZE +`define L3DFQQ_SIZE `L3REQQ_SIZE +`endif + +// Lower Level Cache Hit Queue Size +`ifndef L3LLVQ_SIZE +`define L3LLVQ_SIZE 0 +`endif + +// Fill Forward SNP Queue +`ifndef L3FFSQ_SIZE +`define L3FFSQ_SIZE 8 +`endif + +// Prefetcher +`ifndef L3PRFQ_SIZE +`define L3PRFQ_SIZE 32 +`endif + +`ifndef L3PRFQ_STRIDE +`define L3PRFQ_STRIDE 0 +`endif + +// Fill Invalidator Size {Fill invalidator must be active} +`ifndef L3FILL_INVALIDAOR_SIZE +`define L3FILL_INVALIDAOR_SIZE 32 +`endif + +// Dram knobs +`ifndef L3SIMULATED_DRAM_LATENCY_CYCLES +`define L3SIMULATED_DRAM_LATENCY_CYCLES 2 +`endif + + // VX_CONFIG +`endif diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index 5fce2eb1..7d8af6a6 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_csr_data ( input wire clk, // Clock @@ -19,17 +19,17 @@ module VX_csr_data ( /* verilator lint_off WIDTH */ - // wire[`NT_M1:0][31:0] thread_ids; - // wire[`NT_M1:0][31:0] warp_ids; + // wire[`NUM_THREADS-1:0][31:0] thread_ids; + // wire[`NUM_THREADS-1:0][31:0] warp_ids; // genvar cur_t; - // for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + // for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin // assign thread_ids[cur_t] = cur_t; // end // genvar cur_tw; - // for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin - // assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num}; + // for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin + // assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num}; // end reg[11:0] csr[1023:0]; diff --git a/hw/rtl/VX_csr_pipe.v b/hw/rtl/VX_csr_pipe.v index a04f51de..f2a84da9 100644 --- a/hw/rtl/VX_csr_pipe.v +++ b/hw/rtl/VX_csr_pipe.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_csr_pipe #( @@ -14,8 +14,8 @@ module VX_csr_pipe output wire stall_gpr_csr ); - wire[`NT_M1:0] valid_s2; - wire[`NW_M1:0] warp_num_s2; + wire[`NUM_THREADS-1:0] valid_s2; + wire[`NW_BITS-1:0] warp_num_s2; wire[4:0] rd_s2; wire[1:0] wb_s2; wire[4:0] alu_op_s2; @@ -60,7 +60,7 @@ module VX_csr_pipe wire zero = 0; - VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_M1+1) + `NT)) csr_reg_s2 ( + VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)) csr_reg_s2 ( .clk (clk), .reset(reset), .stall(no_slot_csr), @@ -70,28 +70,26 @@ module VX_csr_pipe ); - wire[`NT_M1:0][31:0] final_csr_data; + wire[`NUM_THREADS-1:0][31:0] final_csr_data; - wire[`NT_M1:0][31:0] thread_ids; - wire[`NT_M1:0][31:0] warp_ids; - wire[`NT_M1:0][31:0] warp_idz; - wire[`NT_M1:0][31:0] csr_vec_read_data_s2; + wire[`NUM_THREADS-1:0][31:0] thread_ids; + wire[`NUM_THREADS-1:0][31:0] warp_ids; + wire[`NUM_THREADS-1:0][31:0] warp_idz; + wire[`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2; genvar cur_t; - for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin assign thread_ids[cur_t] = cur_t; end genvar cur_tw; - for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin - assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2}; - assign warp_idz[cur_tw] = (warp_num_s2 + (CORE_ID*`NW)); + for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin + assign warp_ids[cur_tw] = warp_num_s2; + assign warp_idz[cur_tw] = 32'(warp_num_s2 + (CORE_ID * `NUM_WARPS)); end - - genvar cur_v; - for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin + for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; end @@ -104,7 +102,6 @@ module VX_csr_pipe warp_id_select ? warp_idz : csr_vec_read_data_s2; - assign VX_csr_wb.valid = valid_s2; assign VX_csr_wb.warp_num = warp_num_s2; assign VX_csr_wb.rd = rd_s2; diff --git a/hw/rtl/VX_csr_wrapper.v b/hw/rtl/VX_csr_wrapper.v index 94c5b5eb..4097a779 100644 --- a/hw/rtl/VX_csr_wrapper.v +++ b/hw/rtl/VX_csr_wrapper.v @@ -1,5 +1,5 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_csr_wrapper ( VX_csr_req_inter VX_csr_req, @@ -8,17 +8,17 @@ module VX_csr_wrapper ( ); - wire[`NT_M1:0][31:0] thread_ids; - wire[`NT_M1:0][31:0] warp_ids; + wire[`NUM_THREADS-1:0][31:0] thread_ids; + wire[`NUM_THREADS-1:0][31:0] warp_ids; genvar cur_t, cur_tw; generate - for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin : thread_ids_init + for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init assign thread_ids[cur_t] = cur_t; end - for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin : warp_ids_init - assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num}; + for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init + assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, VX_csr_req.warp_num}; end endgenerate diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 4f33bbd1..1ea0ef97 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -1,5 +1,5 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_decode( // Fetch Inputs @@ -16,11 +16,11 @@ module VX_decode( wire[31:0] in_instruction = fd_inst_meta_de.instruction; wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; - wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num; + wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; assign VX_frE_to_bckE_req.curr_PC = in_curr_PC; - wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid; + wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid; wire[6:0] curr_opcode; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh new file mode 100644 index 00000000..9985edc1 --- /dev/null +++ b/hw/rtl/VX_define.vh @@ -0,0 +1,163 @@ +`ifndef VX_DEFINE +`define VX_DEFINE + +`include "./VX_config.vh" + +// `define QUEUE_FORCE_MLAB 1 + +// `define SYN 1 +// `define ASIC 1 +// `define SYN_FUNC 1 + +`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1) + +`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS) + +`define NW_BITS `LOG2UP(`NUM_WARPS) + +`define NT_BITS `LOG2UP(`NUM_THREADS) + +`define NC_BITS `LOG2UP(`NUM_CORES) + +`define R_INST 7'd51 +`define L_INST 7'd3 +`define ALU_INST 7'd19 +`define S_INST 7'd35 +`define B_INST 7'd99 +`define LUI_INST 7'd55 +`define AUIPC_INST 7'd23 +`define JAL_INST 7'd111 +`define JALR_INST 7'd103 +`define SYS_INST 7'd115 +`define GPGPU_INST 7'h6b + +`define WB_ALU 2'h1 +`define WB_MEM 2'h2 +`define WB_JAL 2'h3 +`define NO_WB 2'h0 + +`define RS2_IMMED 1 +`define RS2_REG 0 + +`define NO_MEM_READ 3'h7 +`define LB_MEM_READ 3'h0 +`define LH_MEM_READ 3'h1 +`define LW_MEM_READ 3'h2 +`define LBU_MEM_READ 3'h4 +`define LHU_MEM_READ 3'h5 + +`define NO_MEM_WRITE 3'h7 +`define SB_MEM_WRITE 3'h0 +`define SH_MEM_WRITE 3'h1 +`define SW_MEM_WRITE 3'h2 + +`define NO_BRANCH 3'h0 +`define BEQ 3'h1 +`define BNE 3'h2 +`define BLT 3'h3 +`define BGT 3'h4 +`define BLTU 3'h5 +`define BGTU 3'h6 + +`define NO_ALU 5'd15 +`define ADD 5'd0 +`define SUB 5'd1 +`define SLLA 5'd2 +`define SLT 5'd3 +`define SLTU 5'd4 +`define XOR 5'd5 +`define SRL 5'd6 +`define SRA 5'd7 +`define OR 5'd8 +`define AND 5'd9 +`define SUBU 5'd10 +`define LUI_ALU 5'd11 +`define AUIPC_ALU 5'd12 +`define CSR_ALU_RW 5'd13 +`define CSR_ALU_RS 5'd14 +`define CSR_ALU_RC 5'd15 +`define MUL 5'd16 +`define MULH 5'd17 +`define MULHSU 5'd18 +`define MULHU 5'd19 +`define DIV 5'd20 +`define DIVU 5'd21 +`define REM 5'd22 +`define REMU 5'd23 + +// WRITEBACK +`define WB_ALU 2'h1 +`define WB_MEM 2'h2 +`define WB_JAL 2'h3 +`define NO_WB 2'h0 + +// JAL +`define JUMP 1'h1 +`define NO_JUMP 1'h0 + +// STALLS +`define STALL 1'h1 +`define NO_STALL 1'h0 + +`define TAKEN 1'h1 +`define NOT_TAKEN 1'h0 + +`define ZERO_REG 5'h0 + +// ======================= Dcache Configurable Knobs ========================== + +// Function ID +`define DFUNC_ID 0 + +// Size of line inside a bank in bits +`define DBANK_LINE_SIZE (`DBANK_LINE_SIZE_BYTES * 8) + +// Bank Number of words in a line +`define DBANK_LINE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES) + +// ======================= Icache Configurable Knobs ========================== + +// Function ID +`define IFUNC_ID 1 + +// Size of line inside a bank in bits +`define IBANK_LINE_SIZE (`IBANK_LINE_SIZE_BYTES * 8) + +// Bank Number of words in a line +`define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES) + +// ======================= SM Configurable Knobs ============================== + +// Function ID +`define SFUNC_ID 2 + +// Size of line inside a bank in bits +`define SBANK_LINE_SIZE (`SBANK_LINE_SIZE_BYTES * 8) + +// Bank Number of words in a line +`define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES) + +// ======================= L2cache Configurable Knobs ========================= + +// Function ID +`define L2FUNC_ID 3 + +// Size of line inside a bank in bits +`define L2BANK_LINE_SIZE (`L2BANK_LINE_SIZE_BYTES * 8) + +// Bank Number of words in a line +`define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES) + +// ======================= L3cache Configurable Knobs ========================= + +// Function ID +`define L3FUNC_ID 3 + +// Size of line inside a bank in bits +`define L3BANK_LINE_SIZE (`L3BANK_LINE_SIZE_BYTES * 8) + +// Bank Number of words in a line +`define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES) + + // VX_DEFINE +`endif diff --git a/hw/rtl/VX_dmem_controller.v b/hw/rtl/VX_dmem_controller.v index 596da8cd..25304e6b 100644 --- a/hw/rtl/VX_dmem_controller.v +++ b/hw/rtl/VX_dmem_controller.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_dmem_controller ( input wire clk, @@ -36,7 +36,7 @@ module VX_dmem_controller ( wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid); // Dcache Request - assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}}; + assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}}; assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr; assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata; assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read; @@ -49,7 +49,7 @@ module VX_dmem_controller ( // Shred Memory Request - assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}}; + assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}}; assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr; assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata; assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read; @@ -73,8 +73,8 @@ module VX_dmem_controller ( - VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_req(); - VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_res(); + VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_req(); + VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_res(); diff --git a/hw/rtl/VX_execute_unit.v b/hw/rtl/VX_execute_unit.v index 60c7441a..65442639 100644 --- a/hw/rtl/VX_execute_unit.v +++ b/hw/rtl/VX_execute_unit.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_execute_unit ( input wire clk, @@ -18,8 +18,8 @@ module VX_execute_unit ( output wire out_delay ); - wire[`NT_M1:0][31:0] in_a_reg_data; - wire[`NT_M1:0][31:0] in_b_reg_data; + wire[`NUM_THREADS-1:0][31:0] in_a_reg_data; + wire[`NUM_THREADS-1:0][31:0] in_b_reg_data; wire[4:0] in_alu_op; wire in_rs2_src; wire[31:0] in_itype_immed; @@ -41,11 +41,11 @@ module VX_execute_unit ( assign in_curr_PC = VX_exec_unit_req.curr_PC; - wire[`NT_M1:0][31:0] alu_result; - wire[`NT_M1:0] alu_stall; + wire[`NUM_THREADS-1:0][31:0] alu_result; + wire[`NUM_THREADS-1:0] alu_stall; genvar index_out_reg; generate - for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs + for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs VX_alu vx_alu( .clk(clk), .reset(reset), @@ -69,9 +69,9 @@ module VX_execute_unit ( assign out_delay = no_slot_exec || internal_stall; - wire [$clog2(`NT)-1:0] jal_branch_use_index; + wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index; wire jal_branch_found_valid; - VX_generic_priority_encoder #(.N(`NT)) choose_alu_result( + VX_generic_priority_encoder #(.N(`NUM_THREADS)) choose_alu_result( .valids(VX_exec_unit_req.valid), .index (jal_branch_use_index), .found (jal_branch_found_valid) @@ -95,10 +95,10 @@ module VX_execute_unit ( end - wire[`NT_M1:0][31:0] duplicate_PC_data; + wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; genvar i; generate - for (i = 0; i < `NT; i=i+1) begin : pc_data_setup + for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next; end endgenerate @@ -113,7 +113,7 @@ module VX_execute_unit ( // Actual Writeback assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd; assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb; - assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NT{!internal_stall}}; + assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}}; assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num; assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result; @@ -141,7 +141,7 @@ module VX_execute_unit ( // .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc }) // ); - VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg( + VX_generic_register #(.N(33 + `NW_BITS-1 + 1)) jal_reg( .clk (clk), .reset(reset), .stall(zero), @@ -150,7 +150,7 @@ module VX_execute_unit ( .out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num}) ); - VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg( + VX_generic_register #(.N(34 + `NW_BITS-1 + 1)) branch_reg( .clk (clk), .reset(reset), .stall(zero), diff --git a/hw/rtl/VX_fetch.v b/hw/rtl/VX_fetch.v index cb08181b..d3134351 100644 --- a/hw/rtl/VX_fetch.v +++ b/hw/rtl/VX_fetch.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_fetch ( input wire clk, @@ -7,8 +7,8 @@ module VX_fetch ( VX_join_inter VX_join, input wire schedule_delay, input wire icache_stage_delay, - input wire[`NW_M1:0] icache_stage_wid, - input wire[`NT-1:0] icache_stage_valids, + input wire[`NW_BITS-1:0] icache_stage_wid, + input wire[`NUM_THREADS-1:0] icache_stage_valids, output wire out_ebreak, VX_jal_response_inter VX_jal_rsp, @@ -17,8 +17,8 @@ module VX_fetch ( VX_warp_ctl_inter VX_warp_ctl ); - wire[`NT_M1:0] thread_mask; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] thread_mask; + wire[`NW_BITS-1:0] warp_num; wire[31:0] warp_pc; wire scheduled_warp; diff --git a/hw/rtl/VX_front_end.v b/hw/rtl/VX_front_end.v index fd204b20..61a40ef4 100644 --- a/hw/rtl/VX_front_end.v +++ b/hw/rtl/VX_front_end.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_front_end ( input wire clk, @@ -37,8 +37,8 @@ wire icache_stage_delay; wire vortex_ebreak; wire terminate_sim; -wire[`NW_M1:0] icache_stage_wid; -wire[`NT-1:0] icache_stage_valids; +wire[`NW_BITS-1:0] icache_stage_wid; +wire[`NUM_THREADS-1:0] icache_stage_valids; reg old_ebreak; // This should be eventually removed always @(posedge clk) begin diff --git a/hw/rtl/VX_generic_priority_encoder.v b/hw/rtl/VX_generic_priority_encoder.v index 80df3d43..fb852564 100644 --- a/hw/rtl/VX_generic_priority_encoder.v +++ b/hw/rtl/VX_generic_priority_encoder.v @@ -1,7 +1,7 @@ `ifndef VX_GENERIC_PRIORITY_ENCODER `define VX_GENERIC_PRIORITY_ENCODER -`include "VX_define.v" +`include "VX_define.vh" module VX_generic_priority_encoder #( @@ -10,8 +10,8 @@ module VX_generic_priority_encoder ( input wire[N-1:0] valids, //output reg[$clog2(N)-1:0] index, - output reg[(`CLOG2(N))-1:0] index, - //output reg[`CLOG2(N):0] index, // eh + output reg[(`LOG2UP(N))-1:0] index, + //output reg[`LOG2UP(N):0] index, // eh output reg found ); @@ -22,7 +22,7 @@ module VX_generic_priority_encoder for (i = N-1; i >= 0; i = i - 1) begin if (valids[i]) begin //index = i[$clog2(N)-1:0]; - index = i[(`CLOG2(N))-1:0]; + index = i[(`LOG2UP(N))-1:0]; found = 1; end end diff --git a/hw/rtl/VX_generic_queue_ll.v b/hw/rtl/VX_generic_queue_ll.v index 5349f649..e0c8a1b1 100644 --- a/hw/rtl/VX_generic_queue_ll.v +++ b/hw/rtl/VX_generic_queue_ll.v @@ -1,5 +1,3 @@ -`include "VX_define_synth.v" - module VX_generic_queue_ll #( parameter DATAW = 4, diff --git a/hw/rtl/VX_gpgpu_inst.v b/hw/rtl/VX_gpgpu_inst.v index 6219eeb2..721932bf 100644 --- a/hw/rtl/VX_gpgpu_inst.v +++ b/hw/rtl/VX_gpgpu_inst.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_gpgpu_inst ( // Input @@ -9,14 +9,14 @@ module VX_gpgpu_inst ( ); - wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid; + wire[`NUM_THREADS-1:0] curr_valids = VX_gpu_inst_req.valid; wire is_split = (VX_gpu_inst_req.is_split); - wire[`NT_M1:0] tmc_new_mask; - wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0]; + wire[`NUM_THREADS-1:0] tmc_new_mask; + wire all_threads = `NUM_THREADS < VX_gpu_inst_req.a_reg_data[0]; genvar curr_t; generate - for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init + for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0]; end endgenerate @@ -33,11 +33,11 @@ module VX_gpgpu_inst ( wire wspawn = VX_gpu_inst_req.is_wspawn; wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2; - wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0]; - wire[`NW-1:0] wspawn_new_active; + wire all_active = `NUM_WARPS < VX_gpu_inst_req.a_reg_data[0]; + wire[`NUM_WARPS-1:0] wspawn_new_active; genvar curr_w; generate - for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init + for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0]; end endgenerate @@ -47,19 +47,19 @@ module VX_gpgpu_inst ( assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0]; wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1; - assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0]; + assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0]; assign VX_warp_ctl.wspawn = wspawn; assign VX_warp_ctl.wspawn_pc = wspawn_pc; assign VX_warp_ctl.wspawn_new_active = wspawn_new_active; - wire[`NT_M1:0] split_new_use_mask; - wire[`NT_M1:0] split_new_later_mask; + wire[`NUM_THREADS-1:0] split_new_use_mask; + wire[`NUM_THREADS-1:0] split_new_later_mask; // VX_gpu_inst_req.pc genvar curr_s_t; generate - for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin : masks_init + for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1); assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); @@ -67,18 +67,18 @@ module VX_gpgpu_inst ( end endgenerate - wire[$clog2(`NT):0] num_valids; + wire[$clog2(`NUM_THREADS):0] num_valids; - VX_countones #(.N(`NT)) valids_counter ( + VX_countones #(.N(`NUM_THREADS)) valids_counter ( .valids(curr_valids), .count (num_valids) ); - // wire[`NW_M1:0] num_valids = $countones(curr_valids); + // wire[`NW_BITS-1:0] num_valids = $countones(curr_valids); assign VX_warp_ctl.is_split = is_split && (num_valids > 1); - assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}})); + assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}})); assign VX_warp_ctl.split_new_mask = split_new_use_mask; assign VX_warp_ctl.split_later_mask = split_new_later_mask; assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next; diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index 6f239c51..23e56d35 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -1,5 +1,5 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_gpr ( input wire clk, @@ -8,8 +8,8 @@ module VX_gpr ( VX_gpr_read_inter VX_gpr_read, VX_wb_inter VX_writeback_inter, - output reg[`NT_M1:0][31:0] out_a_reg_data, - output reg[`NT_M1:0][31:0] out_b_reg_data + output reg[`NUM_THREADS-1:0][31:0] out_a_reg_data, + output reg[`NUM_THREADS-1:0][31:0] out_b_reg_data ); @@ -41,10 +41,10 @@ module VX_gpr ( wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid); - wire[`NT_M1:0][31:0] write_bit_mask; + wire[`NUM_THREADS-1:0][31:0] write_bit_mask; genvar curr_t; - for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin + for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t]; assign write_bit_mask[curr_t] = {32{~local_write}}; end @@ -59,14 +59,14 @@ module VX_gpr ( wire cena_1 = 0; wire cena_2 = 0; - wire[`NT_M1:0][31:0] temp_a; - wire[`NT_M1:0][31:0] temp_b; + wire[`NUM_THREADS-1:0][31:0] temp_a; + wire[`NUM_THREADS-1:0][31:0] temp_b; `ifndef SYN genvar thread; genvar curr_bit; - for (thread = 0; thread < `NT; thread = thread + 1) + for (thread = 0; thread < `NUM_THREADS; thread = thread + 1) begin for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1) begin @@ -83,7 +83,7 @@ module VX_gpr ( `endif - wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; + wire[`NUM_THREADS-1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; genvar curr_base_thread; for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index e9ffc48d..71a4a3bc 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_gpr_stage ( input wire clk, @@ -114,15 +114,15 @@ module VX_gpr_stage ( ); - wire[`NT_M1:0][31:0] temp_store_data; - wire[`NT_M1:0][31:0] temp_base_address; // A reg data + wire[`NUM_THREADS-1:0][31:0] temp_store_data; + wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data - wire[`NT_M1:0][31:0] real_store_data; - wire[`NT_M1:0][31:0] real_base_address; // A reg data + wire[`NUM_THREADS-1:0][31:0] real_store_data; + wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; - VX_generic_register #(.N(`NT*32*2)) lsu_data( + VX_generic_register #(.N(`NUM_THREADS*32*2)) lsu_data( .clk (clk), .reset(reset), .stall(!store_curr_real), @@ -139,7 +139,7 @@ module VX_gpr_stage ( assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; - VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg( + VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))) lsu_reg( .clk (clk), .reset(reset), .stall(stall_lsu), @@ -148,7 +148,7 @@ module VX_gpr_stage ( .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) ); - VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg( + VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))) exec_unit_reg( .clk (clk), .reset(reset), .stall(stall_exec), @@ -160,7 +160,7 @@ module VX_gpr_stage ( assign VX_exec_unit_req.a_reg_data = real_base_address; assign VX_exec_unit_req.b_reg_data = real_store_data; - VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg( + VX_generic_register #(.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))) gpu_inst_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -172,7 +172,7 @@ module VX_gpr_stage ( assign VX_gpu_inst_req.a_reg_data = real_base_address; assign VX_gpu_inst_req.rd2 = real_store_data; - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( + VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg( .clk (clk), .reset(reset), .stall(stall_gpr_csr), @@ -187,7 +187,7 @@ module VX_gpr_stage ( `else // 341 - VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg( + VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) lsu_reg( .clk (clk), .reset(reset), .stall(stall_lsu), @@ -196,7 +196,7 @@ module VX_gpr_stage ( .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) ); - VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( + VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) exec_unit_reg( .clk (clk), .reset(reset), .stall(stall_exec), @@ -205,7 +205,7 @@ module VX_gpr_stage ( .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) ); - VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg( + VX_generic_register #(.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))) gpu_inst_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -214,7 +214,7 @@ module VX_gpr_stage ( .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 }) ); - VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( + VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg( .clk (clk), .reset(reset), .stall(stall_gpr_csr), diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index a58bc9ef..a2d2a7b6 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_gpr_wrapper ( input wire clk, @@ -7,23 +7,22 @@ module VX_gpr_wrapper ( VX_wb_inter VX_writeback_inter, VX_gpr_jal_inter VX_gpr_jal, - output wire[`NT_M1:0][31:0] out_a_reg_data, - output wire[`NT_M1:0][31:0] out_b_reg_data + output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data, + output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data ); - wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data; - wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data; + wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data; + wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data; - wire[`NT_M1:0][31:0] jal_data; + wire[`NUM_THREADS-1:0][31:0] jal_data; genvar index; generate - for (index = 0; index <= `NT_M1; index = index + 1) begin : jal_data_assign + for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign assign jal_data[index] = VX_gpr_jal.curr_PC; end endgenerate - `ifndef ASIC assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num])); assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]); @@ -31,8 +30,8 @@ module VX_gpr_wrapper ( wire zer = 0; - wire[`NW_M1:0] old_warp_num; - VX_generic_register #(`NW_M1+1) store_wn( + wire[`NW_BITS-1:0] old_warp_num; + VX_generic_register #(`NW_BITS-1+1) store_wn( .clk (clk), .reset(reset), .stall(zer), @@ -49,7 +48,7 @@ module VX_gpr_wrapper ( genvar warp_index; generate - for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin : warp_gprs + for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num; VX_gpr vx_gpr( diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 1c8975d0..555a29e7 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -1,12 +1,12 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_icache_stage ( input wire clk, input wire reset, input wire total_freeze, output wire icache_stage_delay, - output wire[`NW_M1:0] icache_stage_wid, - output wire[`NT-1:0] icache_stage_valids, + output wire[`NW_BITS-1:0] icache_stage_wid, + output wire[`NUM_THREADS-1:0] icache_stage_valids, VX_inst_meta_inter fe_inst_meta_fi, VX_inst_meta_inter fe_inst_meta_id, @@ -14,7 +14,7 @@ module VX_icache_stage ( VX_gpu_dcache_req_inter VX_icache_req ); - reg[`NT-1:0] threads_active[`NW-1:0]; + reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0]; wire valid_inst = (|fe_inst_meta_fi.valid); @@ -39,7 +39,7 @@ module VX_icache_stage ( /* verilator lint_off WIDTH */ assign icache_stage_wid = fe_inst_meta_id.warp_num; - assign icache_stage_valids = fe_inst_meta_id.valid & {`NT{!icache_stage_delay}}; + assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}}; // Cache can't accept request assign icache_stage_delay = VX_icache_rsp.delay_req; @@ -50,7 +50,7 @@ module VX_icache_stage ( integer curr_w; always @(posedge clk) begin if (reset) begin - for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) threads_active[curr_w] <= 0; + for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) threads_active[curr_w] <= 0; end else begin if (valid_inst && !icache_stage_delay) begin /* verilator lint_off WIDTH */ diff --git a/hw/rtl/VX_inst_multiplex.v b/hw/rtl/VX_inst_multiplex.v index bf57afb9..f9bc7730 100644 --- a/hw/rtl/VX_inst_multiplex.v +++ b/hw/rtl/VX_inst_multiplex.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_inst_multiplex ( // Inputs @@ -12,9 +12,9 @@ module VX_inst_multiplex ( VX_csr_req_inter VX_csr_req ); - wire[`NT_M1:0] is_mem_mask; - wire[`NT_M1:0] is_gpu_mask; - wire[`NT_M1:0] is_csr_mask; + wire[`NUM_THREADS-1:0] is_mem_mask; + wire[`NUM_THREADS-1:0] is_gpu_mask; + wire[`NUM_THREADS-1:0] is_csr_mask; wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ); wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); @@ -23,7 +23,7 @@ module VX_inst_multiplex ( genvar currT; generate - for (currT = 0; currT < `NT; currT = currT + 1) begin : mask_init + for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init assign is_mem_mask[currT] = is_mem; assign is_gpu_mask[currT] = is_gpu; assign is_csr_mask[currT] = is_csr; diff --git a/hw/rtl/VX_lsu.v b/hw/rtl/VX_lsu.v index a0abf1ff..7c8ec808 100644 --- a/hw/rtl/VX_lsu.v +++ b/hw/rtl/VX_lsu.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_lsu ( input wire clk, @@ -15,7 +15,7 @@ module VX_lsu ( ); // Generate Addresses - wire[`NT_M1:0][31:0] address; + wire[`NUM_THREADS-1:0][31:0] address; VX_lsu_addr_gen VX_lsu_addr_gen ( .base_address(VX_lsu_req.base_address), @@ -23,19 +23,19 @@ module VX_lsu ( .address (address) ); - wire[`NT_M1:0][31:0] use_address; - wire[`NT_M1:0][31:0] use_store_data; - wire[`NT_M1:0] use_valid; + wire[`NUM_THREADS-1:0][31:0] use_address; + wire[`NUM_THREADS-1:0][31:0] use_store_data; + wire[`NUM_THREADS-1:0] use_valid; wire[2:0] use_mem_read; wire[2:0] use_mem_write; wire[4:0] use_rd; - wire[`NW_M1:0] use_warp_num; + wire[`NW_BITS-1:0] use_warp_num; wire[1:0] use_wb; wire[31:0] use_pc; wire zero = 0; - VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer( + VX_generic_register #(.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)) lsu_buffer( .clk (clk), .reset(reset), .stall(out_delay), @@ -49,10 +49,10 @@ module VX_lsu ( assign VX_dcache_req.core_req_valid = use_valid; assign VX_dcache_req.core_req_addr = use_address; assign VX_dcache_req.core_req_writedata = use_store_data; - assign VX_dcache_req.core_req_mem_read = {`NT{use_mem_read}}; - assign VX_dcache_req.core_req_mem_write = {`NT{use_mem_write}}; + assign VX_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}}; + assign VX_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}}; assign VX_dcache_req.core_req_rd = use_rd; - assign VX_dcache_req.core_req_wb = {`NT{use_wb}}; + assign VX_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}}; assign VX_dcache_req.core_req_warp_num = use_warp_num; assign VX_dcache_req.core_req_pc = use_pc; @@ -70,9 +70,9 @@ module VX_lsu ( assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num; assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata; - wire[(`CLOG2(`NT))-1:0] use_pc_index; + wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index; wire found; - VX_generic_priority_encoder #(.N(`NT)) pick_first_pc( + VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc( .valids(VX_dcache_rsp.core_wb_valid), .index (use_pc_index), .found (found) diff --git a/hw/rtl/VX_lsu_addr_gen.v b/hw/rtl/VX_lsu_addr_gen.v index 6e3b643c..04d3d8df 100644 --- a/hw/rtl/VX_lsu_addr_gen.v +++ b/hw/rtl/VX_lsu_addr_gen.v @@ -1,17 +1,15 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_lsu_addr_gen ( - input wire[`NT_M1:0][31:0] base_address, + input wire[`NUM_THREADS-1:0][31:0] base_address, input wire[31:0] offset, - output wire[`NT_M1:0][31:0] address + output wire[`NUM_THREADS-1:0][31:0] address ); - - - genvar index; + genvar i; generate - for (index = 0; index < `NT; index = index + 1) begin : addresses - assign address[index] = base_address[index] + offset; + for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses + assign address[i] = base_address[i] + offset; end endgenerate diff --git a/hw/rtl/VX_priority_encoder.v b/hw/rtl/VX_priority_encoder.v index a0f7934f..753814a8 100644 --- a/hw/rtl/VX_priority_encoder.v +++ b/hw/rtl/VX_priority_encoder.v @@ -1,8 +1,8 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_priority_encoder ( - input wire[`NW-1:0] valids, - output reg[`NW_M1:0] index, + input wire[`NUM_WARPS-1:0] valids, + output reg[`NW_BITS-1:0] index, output reg found ); @@ -10,9 +10,9 @@ module VX_priority_encoder ( always @(*) begin index = 0; found = 0; - for (i = `NW-1; i >= 0; i = i - 1) begin + for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin if (valids[i]) begin - index = i[`NW_M1:0]; + index = i[`NW_BITS-1:0]; found = 1; end end diff --git a/hw/rtl/VX_priority_encoder_w_mask.v b/hw/rtl/VX_priority_encoder_w_mask.v index cba23415..ee60d11a 100644 --- a/hw/rtl/VX_priority_encoder_w_mask.v +++ b/hw/rtl/VX_priority_encoder_w_mask.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_priority_encoder_w_mask #( parameter N = 10 @@ -7,8 +7,8 @@ module VX_priority_encoder_w_mask input wire[N-1:0] valids, output reg [N-1:0] mask, //output reg[$clog2(N)-1:0] index, - output reg[(`CLOG2(N))-1:0] index, - //output reg[`CLOG2(N):0] index, // eh + output reg[(`LOG2UP(N))-1:0] index, + //output reg[`LOG2UP(N):0] index, // eh output reg found ); @@ -20,7 +20,7 @@ module VX_priority_encoder_w_mask for (i = 0; i < N; i=i+1) begin if (valids[i]) begin //index = i[$clog2(N)-1:0]; - index = i[(`CLOG2(N))-1:0]; + index = i[(`LOG2UP(N))-1:0]; found = 1; // mask[index] = (1 << i); // $display("%h",(1 << i)); diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index 47e582a3..4b50efd2 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_scheduler ( input wire clk, @@ -10,8 +10,7 @@ module VX_scheduler ( VX_wb_inter VX_writeback_inter, output wire schedule_delay, - output wire is_empty - + output wire is_empty ); /* verilator lint_off WIDTH */ @@ -19,7 +18,7 @@ module VX_scheduler ( assign is_empty = count_valid == 0; - reg[31:0][`NT-1:0] rename_table[`NW-1:0]; + reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0); wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0); @@ -32,13 +31,11 @@ module VX_scheduler ( wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ); // classify our next instruction. - wire is_mem = is_store || is_load; - wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); - wire is_csr = VX_bckE_req.is_csr; + wire is_mem = is_store || is_load; + wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); + wire is_csr = VX_bckE_req.is_csr; wire is_exec = !is_mem && !is_gpu && !is_csr; - - // wire rs1_pass = 0; // wire rs2_pass = 0; @@ -48,7 +45,6 @@ module VX_scheduler ( wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2)); wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0)); - wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) @@ -61,7 +57,7 @@ module VX_scheduler ( always @(posedge clk or posedge reset) begin if (reset) begin - for (w = 0; w < `NW; w=w+1) + for (w = 0; w < `NUM_WARPS; w=w+1) begin for (i = 0; i < 32; i = i + 1) begin @@ -74,7 +70,6 @@ module VX_scheduler ( if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1; if (!schedule_delay && wb_inc) count_valid = count_valid + 1; - end end diff --git a/hw/rtl/VX_warp.v b/hw/rtl/VX_warp.v index 345d5bcd..25c976ec 100644 --- a/hw/rtl/VX_warp.v +++ b/hw/rtl/VX_warp.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_warp ( @@ -6,7 +6,7 @@ module VX_warp ( input wire reset, input wire stall, input wire remove, - input wire[`NT_M1:0] in_thread_mask, + input wire[`NUM_THREADS-1:0] in_thread_mask, input wire in_change_mask, input wire in_jal, input wire[31:0] in_jal_dest, @@ -16,20 +16,20 @@ module VX_warp ( input wire[31:0] in_wspawn_pc, output wire[31:0] out_PC, - output wire[`NT_M1:0] out_valid + output wire[`NUM_THREADS-1:0] out_valid ); reg[31:0] real_PC; logic [31:0] temp_PC; logic [31:0] use_PC; - reg[`NT_M1:0] valid; + reg[`NUM_THREADS-1:0] valid; - reg[`NT_M1:0] valid_zero; + reg[`NUM_THREADS-1:0] valid_zero; integer ini_cur_th = 0; initial begin real_PC = 0; - for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin + for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin valid[ini_cur_th] = 0; // Thread 1 active valid_zero[ini_cur_th] = 0; end @@ -49,7 +49,7 @@ module VX_warp ( genvar out_cur_th; generate - for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1) begin : out_valid_assign + for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; end endgenerate diff --git a/hw/rtl/VX_warp_scheduler.v b/hw/rtl/VX_warp_scheduler.v index 38885e6e..1a6381ea 100644 --- a/hw/rtl/VX_warp_scheduler.v +++ b/hw/rtl/VX_warp_scheduler.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_warp_scheduler ( input wire clk, // Clock @@ -7,57 +7,57 @@ module VX_warp_scheduler ( // Wspawn input wire wspawn, input wire[31:0] wsapwn_pc, - input wire[`NW-1:0] wspawn_new_active, + input wire[`NUM_WARPS-1:0] wspawn_new_active, // CTM input wire ctm, - input wire[`NT_M1:0] ctm_mask, - input wire[`NW_M1:0] ctm_warp_num, + input wire[`NUM_THREADS-1:0] ctm_mask, + input wire[`NW_BITS-1:0] ctm_warp_num, // WHALT input wire whalt, - input wire[`NW_M1:0] whalt_warp_num, + input wire[`NW_BITS-1:0] whalt_warp_num, input wire is_barrier, input wire[31:0] barrier_id, - input wire[$clog2(`NW):0] num_warps, - input wire[`NW_M1:0] barrier_warp_num, + input wire[$clog2(`NUM_WARPS):0] num_warps, + input wire[`NW_BITS-1:0] barrier_warp_num, // WSTALL input wire wstall, - input wire[`NW_M1:0] wstall_warp_num, + input wire[`NW_BITS-1:0] wstall_warp_num, // Split input wire is_split, input wire dont_split, - input wire[`NT_M1:0] split_new_mask, - input wire[`NT_M1:0] split_later_mask, + input wire[`NUM_THREADS-1:0] split_new_mask, + input wire[`NUM_THREADS-1:0] split_later_mask, input wire[31:0] split_save_pc, - input wire[`NW_M1:0] split_warp_num, + input wire[`NW_BITS-1:0] split_warp_num, // Join input wire is_join, - input wire[`NW_M1:0] join_warp_num, + input wire[`NW_BITS-1:0] join_warp_num, // JAL input wire jal, input wire[31:0] jal_dest, - input wire[`NW_M1:0] jal_warp_num, + input wire[`NW_BITS-1:0] jal_warp_num, // Branch input wire branch_valid, input wire branch_dir, input wire[31:0] branch_dest, - input wire[`NW_M1:0] branch_warp_num, + input wire[`NW_BITS-1:0] branch_warp_num, - output wire[`NT_M1:0] thread_mask, - output wire[`NW_M1:0] warp_num, + output wire[`NUM_THREADS-1:0] thread_mask, + output wire[`NW_BITS-1:0] warp_num, output wire[31:0] warp_pc, output wire out_ebreak, output wire scheduled_warp, - input wire[`NW_M1:0] icache_stage_wid, - input wire[`NT-1:0] icache_stage_valids + input wire[`NW_BITS-1:0] icache_stage_wid, + input wire[`NUM_THREADS-1:0] icache_stage_valids ); @@ -66,41 +66,41 @@ module VX_warp_scheduler ( wire update_visible_active; - wire[(1+32+`NT_M1):0] d[`NW-1:0]; + wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0]; wire join_fall; wire[31:0] join_pc; - wire[`NT_M1:0] join_tm; + wire[`NUM_THREADS-1:0] join_tm; wire in_wspawn = wspawn; wire in_ctm = ctm; wire in_whalt = whalt; wire in_wstall = wstall; - reg[`NW-1:0] warp_active; - reg[`NW-1:0] warp_stalled; + reg[`NUM_WARPS-1:0] warp_active; + reg[`NUM_WARPS-1:0] warp_stalled; - reg [`NW-1:0] visible_active; - wire[`NW-1:0] use_active; + reg [`NUM_WARPS-1:0] visible_active; + wire[`NUM_WARPS-1:0] use_active; - reg [`NW-1:0] warp_lock; + reg [`NUM_WARPS-1:0] warp_lock; wire wstall_this_cycle; - reg[`NT_M1:0] thread_masks[`NW-1:0]; - reg[31:0] warp_pcs[`NW-1:0]; + reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0]; + reg[31:0] warp_pcs[`NUM_WARPS-1:0]; // barriers - reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0]; + reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0]; wire reached_barrier_limit; - wire[`NW-1:0] curr_barrier_mask; - wire[$clog2(`NW):0] curr_barrier_count; + wire[`NUM_WARPS-1:0] curr_barrier_mask; + wire[$clog2(`NUM_WARPS):0] curr_barrier_count; // wsapwn reg[31:0] use_wsapwn_pc; - reg[`NW-1:0] use_wsapwn; + reg[`NUM_WARPS-1:0] use_wsapwn; - wire[`NW_M1:0] warp_to_schedule; + wire[`NW_BITS-1:0] warp_to_schedule; wire schedule; wire hazard; @@ -110,12 +110,12 @@ module VX_warp_scheduler ( wire[31:0] new_pc; - reg[`NW-1:0] total_barrier_stall; + reg[`NUM_WARPS-1:0] total_barrier_stall; reg didnt_split; /* verilator lint_off UNUSED */ - // wire[$clog2(`NW):0] num_active; + // wire[$clog2(`NUM_WARPS):0] num_active; /* verilator lint_on UNUSED */ integer curr_w_help; @@ -135,7 +135,7 @@ module VX_warp_scheduler ( didnt_split <= 0; warp_lock <= 0; // total_barrier_stall = 0; - for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin + for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin warp_pcs[curr_w_help] <= 0; warp_active[curr_w_help] <= 0; // Activating first warp visible_active[curr_w_help] <= 0; // Activating first warp @@ -147,7 +147,7 @@ module VX_warp_scheduler ( if (wspawn) begin warp_active <= wspawn_new_active; use_wsapwn_pc <= wsapwn_pc; - use_wsapwn <= wspawn_new_active & (~`NW'b1); + use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1); end if (is_barrier) begin @@ -219,30 +219,30 @@ module VX_warp_scheduler ( // Lock/Release if (scheduled_warp && !stall) begin warp_lock[warp_num] <= 1'b1; - // warp_lock <= {`NW{1'b1}}; + // warp_lock <= {`NUM_WARPS{1'b1}}; end if (|icache_stage_valids && !stall) begin warp_lock[icache_stage_wid] <= 1'b0; - // warp_lock <= {`NW{1'b0}}; + // warp_lock <= {`NUM_WARPS{1'b0}}; end end end - VX_countones #(.N(`NW)) barrier_count( + VX_countones #(.N(`NUM_WARPS)) barrier_count( .valids(curr_barrier_mask), .count (curr_barrier_count) ); - wire[$clog2(`NW):0] count_visible_active; - VX_countones #(.N(`NW)) num_visible( + wire[$clog2(`NUM_WARPS):0] count_visible_active; + VX_countones #(.N(`NUM_WARPS)) num_visible( .valids(visible_active), .count (count_visible_active) ); // assign curr_barrier_count = $countones(curr_barrier_mask); - assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0]; + assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0]; assign reached_barrier_limit = curr_barrier_count == (num_warps); assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug @@ -253,15 +253,15 @@ module VX_warp_scheduler ( // total_barrier_stall = 0; // for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1) // begin - // total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b]; + // total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b]; // end // end assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join); - wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]}; - wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask}; + wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]}; + wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask}; assign {join_fall, join_pc, join_tm} = d[join_warp_num]; @@ -270,13 +270,13 @@ module VX_warp_scheduler ( genvar curr_warp; generate - for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin : stacks + for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks wire correct_warp_s = (curr_warp == split_warp_num); wire correct_warp_j = (curr_warp == join_warp_num); wire push = (is_split && !dont_split) && correct_warp_s; wire pop = is_join && correct_warp_j; - VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack( + VX_generic_stack #(.WIDTH(1+32+`NUM_THREADS), .DEPTH($clog2(`NUM_THREADS)+1)) ipdom_stack( .clk (clk), .reset(reset), .push (push), @@ -304,7 +304,7 @@ module VX_warp_scheduler ( wire real_use_wspawn = use_wsapwn[warp_to_schedule]; assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule]; - assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]); + assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]); assign warp_num = warp_to_schedule; assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall; diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 2cba499a..f33c8369 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_writeback ( input wire clk, @@ -64,9 +64,9 @@ module VX_writeback ( wire zero = 0; - wire[`NT-1:0][31:0] use_wb_data; + wire[`NUM_THREADS-1:0][31:0] use_wb_data; - VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register( + VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register( .clk (clk), .reset(reset), .stall(zero), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 5744daca..f63dba0b 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -1,5 +1,5 @@ -`include "VX_define.v" -`include "VX_cache_config.v" +`include "VX_define.vh" +`include "VX_cache_config.vh" module Vortex #( @@ -13,24 +13,24 @@ module Vortex // IO output wire io_valid, - output wire[31:0] io_data, + output wire [31:0] io_data, // DRAM Dcache Req - output wire dram_req, - output wire dram_req_write, - output wire dram_req_read, - output wire [31:0] dram_req_addr, - output wire [31:0] dram_req_size, - output wire [31:0] dram_req_data[`DBANK_LINE_SIZE_RNG], - output wire [31:0] dram_expected_lat, + output wire dram_req, + output wire dram_req_write, + output wire dram_req_read, + output wire [31:0] dram_req_addr, + output wire [31:0] dram_req_size, + output wire [`DBANK_LINE_SIZE-1:0] dram_req_data, + output wire [31:0] dram_expected_lat, - input wire dram_req_delay, + input wire dram_req_delay, // DRAM Dcache Res - output wire dram_fill_accept, - input wire dram_fill_rsp, - input wire [31:0] dram_fill_rsp_addr, - input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], + output wire dram_fill_accept, + input wire dram_fill_rsp, + input wire [31:0] dram_fill_rsp_addr, + input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data, // DRAM Icache Req output wire I_dram_req, @@ -38,25 +38,25 @@ module Vortex output wire I_dram_req_read, output wire [31:0] I_dram_req_addr, output wire [31:0] I_dram_req_size, - output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data, + output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data, output wire [31:0] I_dram_expected_lat, // DRAM Icache Res output wire I_dram_fill_accept, input wire I_dram_fill_rsp, input wire [31:0] I_dram_fill_rsp_addr, - input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data, + input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data, // LLC Snooping - input wire snp_req, - input wire [31:0] snp_req_addr, - output wire snp_req_delay, + input wire snp_req, + input wire [31:0] snp_req_addr, + output wire snp_req_delay, input wire I_snp_req, input wire [31:0] I_snp_req_addr, output wire I_snp_req_delay, - output wire out_ebreak + output wire out_ebreak `else @@ -72,14 +72,14 @@ module Vortex output wire dram_req_read, output wire [31:0] dram_req_addr, output wire [31:0] dram_req_size, - output wire [`DBANK_LINE_SIZE_RNG][31:0] dram_req_data, + output wire [`DBANK_LINE_SIZE-1:0] dram_req_data, output wire [31:0] dram_expected_lat, // DRAM Dcache Res output wire dram_fill_accept, input wire dram_fill_rsp, input wire [31:0] dram_fill_rsp_addr, - input wire [`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, + input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data, // DRAM Icache Req @@ -88,16 +88,16 @@ module Vortex output wire I_dram_req_read, output wire [31:0] I_dram_req_addr, output wire [31:0] I_dram_req_size, - output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data, + output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data, output wire [31:0] I_dram_expected_lat, // DRAM Icache Res output wire I_dram_fill_accept, input wire I_dram_fill_rsp, input wire [31:0] I_dram_fill_rsp_addr, - input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data, + input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data, - input wire dram_req_delay, + input wire dram_req_delay, input wire snp_req, input wire [31:0] snp_req_addr, @@ -110,27 +110,24 @@ module Vortex output wire out_ebreak `endif ); - wire scheduler_empty; wire out_ebreak_unqual; // assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1); assign out_ebreak = out_ebreak_unqual; - wire memory_delay; wire exec_delay; wire gpr_stage_delay; wire schedule_delay; - // Dcache Interface VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual(); - VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_req(); - VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_res(); + VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_req(); + VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_res(); assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp; @@ -146,36 +143,40 @@ module Vortex assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay; - genvar wordy; + genvar i; generate - for (wordy = 0; wordy < `DBANK_LINE_SIZE_WORDS; wordy=wordy+1) begin - assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[wordy] = dram_fill_rsp_data[wordy]; - assign dram_req_data[wordy] = VX_gpu_dcache_dram_req.dram_req_data[wordy]; + for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin + assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[i] = dram_fill_rsp_data[i * 32 +: 32]; + assign dram_req_data[i * 32 +: 32] = VX_gpu_dcache_dram_req.dram_req_data[i]; end endgenerate - wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000); + wire temp_io_valid = (!memory_delay) + && (|VX_dcache_req.core_req_valid) + && (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) + && (VX_dcache_req.core_req_addr[0] == 32'h00010000); + wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0]; assign io_valid = temp_io_valid; assign io_data = temp_io_data; - assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~io_valid}}; - assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr; - assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata; - assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read; - assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write; - assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd; - assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb; - assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num; - assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc; - assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot; + assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}}; + assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr; + assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata; + assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read; + assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write; + assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd; + assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb; + assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num; + assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc; + assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot; VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req(); - VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req(); - VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res(); + VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_req(); + VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_res(); assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp; @@ -191,11 +192,11 @@ module Vortex assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay; - genvar iwordy; + genvar j; generate - for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin - assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy]; - assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy]; + for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin + assign VX_gpu_icache_dram_res.dram_fill_rsp_data[j] = I_dram_fill_rsp_data[j * 32 +: 32]; + assign I_dram_req_data[j * 32 +: 32] = VX_gpu_icache_dram_req.dram_req_data[j]; end endgenerate @@ -239,7 +240,7 @@ VX_front_end vx_front_end( .VX_jal_rsp (VX_jal_rsp), .VX_branch_rsp (VX_branch_rsp), .fetch_ebreak (out_ebreak_unqual) - ); +); VX_scheduler schedule( .clk (clk), @@ -251,7 +252,7 @@ VX_scheduler schedule( .VX_writeback_inter(VX_writeback_inter), .schedule_delay (schedule_delay), .is_empty (scheduler_empty) - ); +); VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end( .clk (clk), @@ -267,7 +268,7 @@ VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end( .out_mem_delay (memory_delay), .out_exec_delay (exec_delay), .gpr_stage_delay (gpr_stage_delay) - ); +); VX_dmem_controller VX_dmem_controller( @@ -291,7 +292,7 @@ VX_dmem_controller VX_dmem_controller( // Core <-> Dcache .VX_dcache_req (VX_dcache_req_qual), .VX_dcache_rsp (VX_dcache_rsp) - ); +); // VX_csr_handler vx_csr_handler( // .clk (clk), @@ -300,7 +301,7 @@ VX_dmem_controller VX_dmem_controller( // .in_wb_valid (VX_writeback_inter.wb_valid[0]), // .out_decode_csr_data (csr_decode_csr_data) -// ); +// ); endmodule // Vortex diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 5b78b8cd..86598907 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -1,20 +1,18 @@ -`include "VX_define.v" -`include "VX_cache_config.v" - +`include "VX_define.vh" +`include "VX_cache_config.vh" module Vortex_Cluster - #( - parameter CLUSTER_ID = 0 - ) - ( + #( + parameter CLUSTER_ID = 0 + ) ( // Clock input wire clk, input wire reset, // IO - output wire[`NUMBER_CORES_PER_CLUSTER-1:0] io_valid, - output wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] io_data, + output wire[`NUM_CORES_PER_CLUSTER-1:0] io_valid, + output wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] io_data, // DRAM Req output wire out_dram_req, @@ -22,7 +20,7 @@ module Vortex_Cluster output wire out_dram_req_read, output wire [31:0] out_dram_req_addr, output wire [31:0] out_dram_req_size, - output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG], + output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0], output wire [31:0] out_dram_expected_lat, input wire out_dram_req_delay, @@ -30,8 +28,7 @@ module Vortex_Cluster output wire out_dram_fill_accept, input wire out_dram_fill_rsp, input wire [31:0] out_dram_fill_rsp_addr, - input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], - + input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0], // LLC Snooping input wire llc_snp_req, @@ -40,142 +37,133 @@ module Vortex_Cluster output wire out_ebreak ); - // DRAM Dcache Req - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_write; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_read; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size; - wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_req_data; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size; + wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_req_data; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat; // DRAM Dcache Res - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr; - wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_fill_rsp_data; - + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr; + wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_fill_rsp_data; // DRAM Icache Req - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size; - wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_req_data; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size; + wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_req_data; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat; // DRAM Icache Res - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr; - wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_fill_rsp_data; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp; + wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr; + wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_fill_rsp_data; // Out ebreak - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_out_ebreak; - - - wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_io_valid; - wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_out_ebreak; + wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_io_valid; + wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data; wire l2c_core_accept; - - wire snp_fwd; - wire[31:0] snp_fwd_addr; - wire[`NUMBER_CORES_PER_CLUSTER-1:0] snp_fwd_delay; + wire snp_fwd; + wire[31:0] snp_fwd_addr; + wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_delay; assign out_ebreak = (&per_core_out_ebreak); genvar curr_core; generate - for (curr_core = 0; curr_core < `NUMBER_CORES_PER_CLUSTER; curr_core=curr_core+1) begin + for (curr_core = 0; curr_core < `NUM_CORES_PER_CLUSTER; curr_core=curr_core+1) begin - wire [`IBANK_LINE_SIZE_RNG][31:0] curr_core_I_dram_req_data; - wire [`DBANK_LINE_SIZE_RNG][31:0] curr_core_dram_req_data ; + wire [`IBANK_LINE_WORDS-1:0][31:0] curr_core_I_dram_req_data; + wire [`DBANK_LINE_WORDS-1:0][31:0] curr_core_dram_req_data ; assign io_valid[curr_core] = per_core_io_valid[curr_core]; assign io_data [curr_core] = per_core_io_data [curr_core]; - Vortex #(.CORE_ID(curr_core + (CLUSTER_ID * `NUMBER_CORES_PER_CLUSTER))) vortex_core( - .clk (clk), - .reset (reset), - .io_valid (per_core_io_valid [curr_core]), - .io_data (per_core_io_data [curr_core]), - .dram_req (per_core_dram_req [curr_core]), - .dram_req_write (per_core_dram_req_write [curr_core]), - .dram_req_read (per_core_dram_req_read [curr_core]), - .dram_req_addr (per_core_dram_req_addr [curr_core]), - .dram_req_size (per_core_dram_req_size [curr_core]), - .dram_req_data (curr_core_dram_req_data ), - .dram_expected_lat (per_core_dram_expected_lat [curr_core]), - .dram_fill_accept (per_core_dram_fill_accept [curr_core]), - .dram_fill_rsp (per_core_dram_fill_rsp [curr_core]), - .dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]), - .dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]), - .I_dram_req (per_core_I_dram_req [curr_core]), - .I_dram_req_write (per_core_I_dram_req_write [curr_core]), - .I_dram_req_read (per_core_I_dram_req_read [curr_core]), - .I_dram_req_addr (per_core_I_dram_req_addr [curr_core]), - .I_dram_req_size (per_core_I_dram_req_size [curr_core]), - .I_dram_req_data (curr_core_I_dram_req_data ), - .I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]), - .I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]), - .I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]), - .I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]), - .I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]), - .dram_req_delay (l2c_core_accept ), - .out_ebreak (per_core_out_ebreak [curr_core]), - .snp_req (snp_fwd), - .snp_req_addr (snp_fwd_addr), - .snp_req_delay (snp_fwd_delay[curr_core]), - .I_snp_req (0), - .I_snp_req_addr (), - .I_snp_req_delay () - ); + Vortex #( + .CORE_ID(curr_core + (CLUSTER_ID * `NUM_CORES_PER_CLUSTER)) + ) vortex_core( + .clk (clk), + .reset (reset), + .io_valid (per_core_io_valid [curr_core]), + .io_data (per_core_io_data [curr_core]), + .dram_req (per_core_dram_req [curr_core]), + .dram_req_write (per_core_dram_req_write [curr_core]), + .dram_req_read (per_core_dram_req_read [curr_core]), + .dram_req_addr (per_core_dram_req_addr [curr_core]), + .dram_req_size (per_core_dram_req_size [curr_core]), + .dram_req_data (curr_core_dram_req_data ), + .dram_expected_lat (per_core_dram_expected_lat [curr_core]), + .dram_fill_accept (per_core_dram_fill_accept [curr_core]), + .dram_fill_rsp (per_core_dram_fill_rsp [curr_core]), + .dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]), + .dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]), + .I_dram_req (per_core_I_dram_req [curr_core]), + .I_dram_req_write (per_core_I_dram_req_write [curr_core]), + .I_dram_req_read (per_core_I_dram_req_read [curr_core]), + .I_dram_req_addr (per_core_I_dram_req_addr [curr_core]), + .I_dram_req_size (per_core_I_dram_req_size [curr_core]), + .I_dram_req_data (curr_core_I_dram_req_data ), + .I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]), + .I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]), + .I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]), + .I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]), + .I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]), + .dram_req_delay (l2c_core_accept ), + .out_ebreak (per_core_out_ebreak [curr_core]), + .snp_req (snp_fwd), + .snp_req_addr (snp_fwd_addr), + .snp_req_delay (snp_fwd_delay[curr_core]), + .I_snp_req (0), + .I_snp_req_addr (), + .I_snp_req_delay () + ); assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data; assign per_core_I_dram_req_data[curr_core] = curr_core_I_dram_req_data; end endgenerate - //////////////////// L2 Cache //////////////////// - wire[`LLNUMBER_REQUESTS-1:0] l2c_core_req; - wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write; - wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read; - wire[`LLNUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr; - wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_core_req_data; - wire[`LLNUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb; + wire[`L2NUMBER_REQUESTS-1:0] l2c_core_req; + wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write; + wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read; + wire[`L2NUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr; + wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data; + wire[`L2NUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb; - wire[`LLNUMBER_REQUESTS-1:0] l2c_core_no_wb_slot; + wire[`L2NUMBER_REQUESTS-1:0] l2c_core_no_wb_slot; + wire[`L2NUMBER_REQUESTS-1:0] l2c_wb; + wire[`L2NUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr; + wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data; - - wire[`LLNUMBER_REQUESTS-1:0] l2c_wb; - wire[`LLNUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr; - wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_wb_data; - - - wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port; - wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port; + wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port; + wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port; genvar llb_index; generate - for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin + for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index]; assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index]; end endgenerate - - // genvar l2c_curr_core; generate - for (l2c_curr_core = 0; l2c_curr_core < `LLNUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin + for (l2c_curr_core = 0; l2c_curr_core < `L2NUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin // Core Request assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)]; assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)]; @@ -214,29 +202,27 @@ module Vortex_Cluster wire dram_snp_full; wire dram_req_because_of_wb; VX_cache #( - .CACHE_SIZE_BYTES (`LLCACHE_SIZE_BYTES), - .BANK_LINE_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES), - .NUMBER_BANKS (`LLNUMBER_BANKS), - .WORD_SIZE_BYTES (`LLWORD_SIZE_BYTES), - .NUMBER_REQUESTS (`LLNUMBER_REQUESTS), - .STAGE_1_CYCLES (`LLSTAGE_1_CYCLES), - .FUNC_ID (`LLFUNC_ID), - .REQQ_SIZE (`LLREQQ_SIZE), - .MRVQ_SIZE (`LLMRVQ_SIZE), - .DFPQ_SIZE (`LLDFPQ_SIZE), - .SNRQ_SIZE (`LLSNRQ_SIZE), - .CWBQ_SIZE (`LLCWBQ_SIZE), - .DWBQ_SIZE (`LLDWBQ_SIZE), - .DFQQ_SIZE (`LLDFQQ_SIZE), - .LLVQ_SIZE (`LLLLVQ_SIZE), - .FFSQ_SIZE (`LLFFSQ_SIZE), - .PRFQ_SIZE (`LLPRFQ_SIZE), - .PRFQ_STRIDE (`LLPRFQ_STRIDE), - .FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE), - .SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES) - ) - gpu_l2cache - ( + .CACHE_SIZE_BYTES (`L2CACHE_SIZE_BYTES), + .BANK_LINE_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES), + .NUMBER_BANKS (`L2NUMBER_BANKS), + .WORD_SIZE_BYTES (`L2WORD_SIZE_BYTES), + .NUMBER_REQUESTS (`L2NUMBER_REQUESTS), + .STAGE_1_CYCLES (`L2STAGE_1_CYCLES), + .FUNC_ID (`L2FUNC_ID), + .REQQ_SIZE (`L2REQQ_SIZE), + .MRVQ_SIZE (`L2MRVQ_SIZE), + .DFPQ_SIZE (`L2DFPQ_SIZE), + .SNRQ_SIZE (`L2SNRQ_SIZE), + .CWBQ_SIZE (`L2CWBQ_SIZE), + .DWBQ_SIZE (`L2DWBQ_SIZE), + .DFQQ_SIZE (`L2DFQQ_SIZE), + .LLVQ_SIZE (`L2LLVQ_SIZE), + .FFSQ_SIZE (`L2FFSQ_SIZE), + .PRFQ_SIZE (`L2PRFQ_SIZE), + .PRFQ_STRIDE (`L2PRFQ_STRIDE), + .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE), + .SIMULATED_DRAM_LATENCY_CYCLES(`L2SIMULATED_DRAM_LATENCY_CYCLES) + ) gpu_l2cache ( .clk (clk), .reset (reset), @@ -295,8 +281,6 @@ module Vortex_Cluster .snp_fwd (snp_fwd), .snp_fwd_addr (snp_fwd_addr), .snp_fwd_delay (|snp_fwd_delay) - ); - - + ); endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index e58fd2c7..87d1cb34 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -1,5 +1,5 @@ -`include "VX_define.v" -`include "VX_cache_config.v" +`include "VX_define.vh" +`include "VX_cache_config.vh" module Vortex_Socket ( @@ -8,8 +8,8 @@ module Vortex_Socket ( input wire reset, // IO - output wire io_valid[`NUMBER_CORES-1:0], - output wire[31:0] io_data [`NUMBER_CORES-1:0], + output wire io_valid[`NUM_CORES-1:0], + output wire[31:0] io_data [`NUM_CORES-1:0], output wire[31:0] number_cores, @@ -19,7 +19,7 @@ module Vortex_Socket ( output wire out_dram_req_read, output wire [31:0] out_dram_req_addr, output wire [31:0] out_dram_req_size, - output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG], + output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0], output wire [31:0] out_dram_expected_lat, input wire out_dram_req_delay, @@ -27,7 +27,7 @@ module Vortex_Socket ( output wire out_dram_fill_accept, input wire out_dram_fill_rsp, input wire [31:0] out_dram_fill_rsp_addr, - input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], + input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0], // LLC Snooping input wire llc_snp_req, @@ -36,18 +36,16 @@ module Vortex_Socket ( output wire out_ebreak ); + assign number_cores = `NUM_CORES; - assign number_cores = `NUMBER_CORES; + if (`NUM_CLUSTERS == 1) begin - - if (`NUMBER_CLUSTERS == 1) begin - - wire[`NUMBER_CORES-1:0] cluster_io_valid; - wire[`NUMBER_CORES-1:0][31:0] cluster_io_data; + wire[`NUM_CORES-1:0] cluster_io_valid; + wire[`NUM_CORES-1:0][31:0] cluster_io_data; genvar curr_c; - for (curr_c = 0; curr_c < `NUMBER_CORES; curr_c=curr_c+1) begin + for (curr_c = 0; curr_c < `NUM_CORES; curr_c=curr_c+1) begin assign io_valid[curr_c] = cluster_io_valid[curr_c]; assign io_data [curr_c] = cluster_io_data [curr_c]; end @@ -76,62 +74,57 @@ module Vortex_Socket ( .llc_snp_req_addr (llc_snp_req_addr), .llc_snp_req_delay (llc_snp_req_delay), .out_ebreak (out_ebreak) - ); + ); + end else begin wire snp_fwd; wire[31:0] snp_fwd_addr; - wire[`NUMBER_CLUSTERS-1:0] snp_fwd_delay; + wire[`NUM_CLUSTERS-1:0] snp_fwd_delay; - wire[`NUMBER_CLUSTERS-1:0] per_cluster_out_ebreak; + wire[`NUM_CLUSTERS-1:0] per_cluster_out_ebreak; assign out_ebreak = (&per_cluster_out_ebreak); - // // DRAM Dcache Req - wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req; - wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_write; - wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_read; - wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr; - wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size; - wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat; - wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_req_data; - wire[31:0] per_cluster_dram_req_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG]; + wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req; + wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write; + wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read; + wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr; + wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size; + wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat; + wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data; + wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0]; wire l3c_core_accept; // // DRAM Dcache Res - wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_accept; - wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_rsp; - wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr; - wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_fill_rsp_data; - wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG]; + wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_accept; + wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_rsp; + wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr; + wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_fill_rsp_data; + wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0]; - wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0] per_cluster_io_valid; - wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data; + wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0] per_cluster_io_valid; + wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data; - genvar curr_c; - genvar curr_cc; - genvar curr_word; - for (curr_c = 0; curr_c < `NUMBER_CLUSTERS; curr_c =curr_c+1) begin - for (curr_cc = 0; curr_cc < `NUMBER_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin - assign io_valid[curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc]; - assign io_data [curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc]; + genvar curr_c, curr_cc, curr_word; + for (curr_c = 0; curr_c < `NUM_CLUSTERS; curr_c =curr_c+1) begin + for (curr_cc = 0; curr_cc < `NUM_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin + assign io_valid[curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc]; + assign io_data [curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc]; end - for (curr_word = 0; curr_word < `DBANK_LINE_SIZE_WORDS; curr_word = curr_word+1) begin + for (curr_word = 0; curr_word < `DBANK_LINE_WORDS; curr_word = curr_word+1) begin assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word]; assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word]; end end - - genvar curr_cluster; - for (curr_cluster = 0; curr_cluster < `NUMBER_CLUSTERS; curr_cluster=curr_cluster+1) begin - + for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster( .clk (clk), @@ -158,37 +151,33 @@ module Vortex_Socket ( .llc_snp_req_delay (snp_fwd_delay[curr_cluster]), .out_ebreak (per_cluster_out_ebreak [curr_cluster]) - ); + ); end - //////////////////// L3 Cache //////////////////// wire[`L3NUMBER_REQUESTS-1:0] l3c_core_req; wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_write; wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_read; wire[`L3NUMBER_REQUESTS-1:0][31:0] l3c_core_req_addr; - wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_core_req_data; + wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data; wire[`L3NUMBER_REQUESTS-1:0][1:0] l3c_core_req_wb; wire[`L3NUMBER_REQUESTS-1:0] l3c_core_no_wb_slot; - - wire[`L3NUMBER_REQUESTS-1:0] l3c_wb; wire[`L3NUMBER_REQUESTS-1:0] [31:0] l3c_wb_addr; - wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_wb_data; + wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data; - wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port; - wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port; + wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port; + wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port; genvar llb_index; - for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin + for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index]; assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index]; end - // genvar l3c_curr_cluster; for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUMBER_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin @@ -212,7 +201,6 @@ module Vortex_Socket ( assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster]; assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster]; assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster]; - end wire dram_snp_full; @@ -224,7 +212,7 @@ module Vortex_Socket ( .WORD_SIZE_BYTES (`L3WORD_SIZE_BYTES), .NUMBER_REQUESTS (`L3NUMBER_REQUESTS), .STAGE_1_CYCLES (`L3STAGE_1_CYCLES), - .FUNC_ID (`LLFUNC_ID), + .FUNC_ID (`L2FUNC_ID), .REQQ_SIZE (`L3REQQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE), .DFPQ_SIZE (`L3DFPQ_SIZE), @@ -238,9 +226,7 @@ module Vortex_Socket ( .PRFQ_STRIDE (`L3PRFQ_STRIDE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES) - ) - gpu_l3cache - ( + ) gpu_l3cache ( .clk (clk), .reset (reset), @@ -300,10 +286,8 @@ module Vortex_Socket ( .snp_fwd (snp_fwd), .snp_fwd_addr (snp_fwd_addr), .snp_fwd_delay (|snp_fwd_delay) - ); + ); end - - endmodule \ No newline at end of file diff --git a/hw/rtl/byte_enabled_simple_dual_port_ram.v b/hw/rtl/byte_enabled_simple_dual_port_ram.v index b4dcf5fc..2eb4cdee 100644 --- a/hw/rtl/byte_enabled_simple_dual_port_ram.v +++ b/hw/rtl/byte_enabled_simple_dual_port_ram.v @@ -1,50 +1,45 @@ -`include "VX_define.v" +`include "VX_define.vh" module byte_enabled_simple_dual_port_ram ( input we, clk, input wire reset, input wire[4:0] waddr, raddr1, raddr2, - input wire[`NT_M1:0] be, - input wire[`NT_M1:0][31:0] wdata, - output reg[`NT_M1:0][31:0] q1, q2 + input wire[`NUM_THREADS-1:0] be, + input wire[`NUM_THREADS-1:0][31:0] wdata, + output reg[`NUM_THREADS-1:0][31:0] q1, q2 ); - // integer regi; - // integer threadi; + // integer regi; + // integer threadi; - // Thread Byte Bit - logic [`NT_M1:0][3:0][7:0] GPR[31:0]; + // Thread Byte Bit + logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0]; // initial begin // for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0; // end integer ini; - always @(posedge clk, posedge reset) begin - // TODO Clearing ram not currently supported on FPGA. - if (reset) begin -// `ifdef ASIC - for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0; -// `endif - end - else if(we) begin + always @(posedge clk) begin + if (we) begin integer thread_ind; - for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin - if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0]; - if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8]; - if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16]; - if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24]; + for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin + if (be[thread_ind]) begin + GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0]; + GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8]; + GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16]; + GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24]; + end end end // $display("^^^^^^^^^^^^^^^^^^^^^^^"); // for (regi = 0; regi <= 31; regi = regi + 1) begin - // for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin + // for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin // if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]); // end // end - end assign q1 = GPR[raddr1]; diff --git a/hw/rtl/cache/VX_Cache_Bank.v b/hw/rtl/cache/VX_Cache_Bank.v index 62d53f3a..669e40e5 100644 --- a/hw/rtl/cache/VX_Cache_Bank.v +++ b/hw/rtl/cache/VX_Cache_Bank.v @@ -2,7 +2,7 @@ // Also add a bit about wheter the "Way ID" is valid / being held or if it is just default // Also make sure all possible output states are transmitted back to the bank correctly -`include "VX_define.v" +`include "VX_define.vh" module VX_Cache_Bank #( @@ -67,7 +67,7 @@ module VX_Cache_Bank localparam RECIV_MEM_RSP = 2; - localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK); + localparam BLOCK_NUM_BITS = `LOG2UP(CACHE_BLOCK); // Inputs input wire rst; input wire clk; diff --git a/hw/rtl/cache/VX_cache_bank_valid.v b/hw/rtl/cache/VX_cache_bank_valid.v index 21dbb71b..f9befcf2 100644 --- a/hw/rtl/cache/VX_cache_bank_valid.v +++ b/hw/rtl/cache/VX_cache_bank_valid.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_cache_bank_valid #( diff --git a/hw/rtl/cache/VX_cache_data.v b/hw/rtl/cache/VX_cache_data.v index b9c523c1..dc2e7594 100644 --- a/hw/rtl/cache/VX_cache_data.v +++ b/hw/rtl/cache/VX_cache_data.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_cache_data #( diff --git a/hw/rtl/cache/VX_cache_data_per_index.v b/hw/rtl/cache/VX_cache_data_per_index.v index 6d1dc89a..11e7b99b 100644 --- a/hw/rtl/cache/VX_cache_data_per_index.v +++ b/hw/rtl/cache/VX_cache_data_per_index.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" module VX_cache_data_per_index #( diff --git a/hw/rtl/cache/VX_d_cache.v b/hw/rtl/cache/VX_d_cache.v index cce71c98..5b305571 100644 --- a/hw/rtl/cache/VX_d_cache.v +++ b/hw/rtl/cache/VX_d_cache.v @@ -8,7 +8,7 @@ // TO DO: // - Send in a response from memory of what the data is from the test bench -`include "VX_define.v" +`include "VX_define.vh" //`include "VX_Cache_Bank.v" //`include "VX_cache_bank_valid.v" //`include "VX_priority_encoder.v" diff --git a/hw/rtl/cache/VX_d_cache_encapsulate.v b/hw/rtl/cache/VX_d_cache_encapsulate.v index b7560436..1d7413f5 100644 --- a/hw/rtl/cache/VX_d_cache_encapsulate.v +++ b/hw/rtl/cache/VX_d_cache_encapsulate.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" `define NUM_WORDS_PER_BLOCK 4 @@ -33,17 +33,17 @@ module VX_d_cache_encapsulate ( //parameter cache_entry = 9; input wire clk, rst; - input wire i_p_valid[`NT_M1:0]; - input wire [31:0] i_p_addr[`NT_M1:0]; + input wire i_p_valid[`NUM_THREADS-1:0]; + input wire [31:0] i_p_addr[`NUM_THREADS-1:0]; input wire i_p_initial_request; - input wire [31:0] i_p_writedata[`NT_M1:0]; + input wire [31:0] i_p_writedata[`NUM_THREADS-1:0]; input wire i_p_read_or_write; input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; input wire i_m_ready; - output reg [31:0] o_p_readdata[`NT_M1:0]; - output reg o_p_readdata_valid[`NT_M1:0] ; + output reg [31:0] o_p_readdata[`NUM_THREADS-1:0]; + output reg o_p_readdata_valid[`NUM_THREADS-1:0] ; output reg o_p_waitrequest; output reg [31:0] o_m_addr; @@ -53,12 +53,12 @@ module VX_d_cache_encapsulate ( // Inter - wire [`NT_M1:0] i_p_valid_inter; - wire [`NT_M1:0][31:0] i_p_addr_inter; - wire [`NT_M1:0][31:0] i_p_writedata_inter; + wire [`NUM_THREADS-1:0] i_p_valid_inter; + wire [`NUM_THREADS-1:0][31:0] i_p_addr_inter; + wire [`NUM_THREADS-1:0][31:0] i_p_writedata_inter; - reg [`NT_M1:0][31:0] o_p_readdata_inter; - reg [`NT_M1:0] o_p_readdata_valid_inter; + reg [`NUM_THREADS-1:0][31:0] o_p_readdata_inter; + reg [`NUM_THREADS-1:0] o_p_readdata_valid_inter; reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter; wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter; @@ -66,7 +66,7 @@ module VX_d_cache_encapsulate ( genvar curr_thraed, curr_bank, curr_word; generate - for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin : threads + for (curr_thraed = 0; curr_thraed < `NUM_THREADS; curr_thraed = curr_thraed + 1) begin : threads assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed]; assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed]; assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed]; diff --git a/hw/rtl/cache/VX_d_cache_tb.v b/hw/rtl/cache/VX_d_cache_tb.v index 4f5681c3..a29b648f 100644 --- a/hw/rtl/cache/VX_d_cache_tb.v +++ b/hw/rtl/cache/VX_d_cache_tb.v @@ -1,4 +1,4 @@ -`include "VX_define.v" +`include "VX_define.vh" `include "VX_d_cache.v" module VX_d_cache_tb; @@ -6,13 +6,13 @@ module VX_d_cache_tb; parameter NUMBER_BANKS = 8; reg clk, reset, im_ready; - reg [`NT_M1:0] i_p_valid; - reg [`NT_M1:0][13:0] i_p_addr; // FIXME + reg [`NUM_THREADS-1:0] i_p_valid; + reg [`NUM_THREADS-1:0][13:0] i_p_addr; // FIXME reg i_p_initial_request; - reg [`NT_M1:0][31:0] i_p_writedata; + reg [`NUM_THREADS-1:0][31:0] i_p_writedata; reg i_p_read_or_write; //, i_p_write; - reg [`NT_M1:0][31:0] o_p_readdata; - reg [`NT_M1:0] o_p_readdata_valid; + reg [`NUM_THREADS-1:0][31:0] o_p_readdata; + reg [`NUM_THREADS-1:0] o_p_readdata_valid; reg o_p_waitrequest; reg [13:0] o_m_addr; // Only one address is sent out at a time to memory reg o_m_valid; diff --git a/hw/rtl/cache/cache_set.v b/hw/rtl/cache/cache_set.v index 4f2445ea..1b95f022 100644 --- a/hw/rtl/cache/cache_set.v +++ b/hw/rtl/cache/cache_set.v @@ -2,7 +2,7 @@ // Also add a bit about wheter the "Way ID" is valid / being held or if it is just default // Also make sure all possible output states are transmitted back to the bank correctly -// `include "VX_define.v" +// `include "VX_define.vh" module cache_set(clk, rst, // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes @@ -94,7 +94,7 @@ module cache_set(clk, readdata <= data[3]; end end else if (access) begin - //tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12]; + //tag[`NUM_THREADS-1:0] <= i_p_addr[`NUM_THREADS-1:0][13:12]; counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC // Hit in First Column if (tag[0] == o_tag && valid[0]) begin diff --git a/hw/rtl/generic_cache/VX_bank.v b/hw/rtl/generic_cache/VX_bank.v index 6b19c99c..62da49d0 100644 --- a/hw/rtl/generic_cache/VX_bank.v +++ b/hw/rtl/generic_cache/VX_bank.v @@ -1,5 +1,5 @@ -`include "VX_cache_config.v" -`include "VX_define.v" +`include "VX_cache_config.vh" +`include "VX_define.vh" module VX_bank #( // Size of cache in bytes @@ -60,7 +60,7 @@ module VX_bank input wire [4:0] bank_rd, input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb, input wire [31:0] bank_pc, - input wire [`NW_M1:0] bank_warp_num, + input wire [`NW_BITS-1:0] bank_warp_num, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write, output wire reqq_full, @@ -71,7 +71,7 @@ module VX_bank output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid, output wire [4:0] bank_wb_rd, output wire [1:0] bank_wb_wb, - output wire [`NW_M1:0] bank_wb_warp_num, + output wire [`NW_BITS-1:0] bank_wb_warp_num, output wire [`WORD_SIZE_RNG] bank_wb_data, output wire [31:0] bank_wb_pc, output wire [31:0] bank_wb_address, @@ -86,14 +86,14 @@ module VX_bank // Dram Fill Response input wire dram_fill_rsp, input wire [31:0] dram_fill_addr, - input wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_fill_rsp_data, + input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_fill_rsp_data, output wire dram_fill_accept, // Dram WB Requests input wire dram_wb_queue_pop, output wire dram_wb_req, output wire[31:0] dram_wb_req_addr, - output wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_wb_req_data, + output wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_wb_req_data, // Snp Request input wire snp_req, @@ -112,7 +112,7 @@ module VX_bank if (reset) begin snoop_state <= 0; end else begin - snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)); + snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)); end end @@ -139,11 +139,11 @@ module VX_bank wire dfpq_empty; wire dfpq_full; wire[31:0] dfpq_addr_st0; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dfpq_filldata_st0; + wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dfpq_filldata_st0; assign dram_fill_accept = !dfpq_full; - VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_SIZE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue( + VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue( .clk (clk), .reset (reset), .push (dram_fill_rsp), @@ -164,7 +164,7 @@ module VX_bank wire [`WORD_SIZE_RNG] reqq_req_writeword_st0; wire [4:0] reqq_req_rd_st0; wire [1:0] reqq_req_wb_st0; - wire [`NW_M1:0] reqq_req_warp_num_st0; + wire [`NW_BITS-1:0] reqq_req_warp_num_st0; wire [2:0] reqq_req_mem_read_st0; wire [2:0] reqq_req_mem_write_st0; wire [31:0] reqq_req_pc_st0; @@ -231,7 +231,7 @@ module VX_bank wire [4:0] mrvq_rd_st0; wire [1:0] mrvq_wb_st0; wire [31:0] miss_resrv_pc_st0; - wire [`NW_M1:0] mrvq_warp_num_st0; + wire [`NW_BITS-1:0] mrvq_warp_num_st0; wire [2:0] mrvq_mem_read_st0; wire [2:0] mrvq_mem_write_st0; @@ -241,7 +241,7 @@ module VX_bank wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid; wire[4:0] miss_add_rd; wire[1:0] miss_add_wb; - wire[`NW_M1:0] miss_add_warp_num; + wire[`NW_BITS-1:0] miss_add_warp_num; wire[2:0] miss_add_mem_read; wire[2:0] miss_add_mem_write; @@ -336,7 +336,7 @@ module VX_bank wire qual_valid_st0; wire [31:0] qual_addr_st0; wire [`WORD_SIZE_RNG] qual_writeword_st0; - wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] qual_writedata_st0; + wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] qual_writedata_st0; wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0; wire qual_going_to_write_st0; wire qual_is_snp; @@ -344,7 +344,7 @@ module VX_bank wire [`WORD_SIZE_RNG] writeword_st1 [STAGE_1_CYCLES-1:0]; wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; - wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0]; + wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0]; wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0]; @@ -387,7 +387,7 @@ module VX_bank reqq_pop ? reqq_req_writeword_st0 : 0; - VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 ( + VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), @@ -399,7 +399,7 @@ module VX_bank genvar curr_stage; generate for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin - VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc ( + VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), @@ -412,7 +412,7 @@ module VX_bank wire[`WORD_SIZE_RNG] readword_st1e; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st1e; + wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st1e; wire[`TAG_SELECT_SIZE_RNG] readtag_st1e; wire miss_st1e; wire dirty_st1e; @@ -421,7 +421,7 @@ module VX_bank wire [4:0] rd_st1e; wire [1:0] wb_st1e; - wire [`NW_M1:0] warp_num_st1e; + wire [`NW_BITS-1:0] warp_num_st1e; wire [2:0] mem_read_st1e; wire [2:0] mem_write_st1e; wire [`vx_clog2(NUMBER_REQUESTS)-1:0] tid_st1e; @@ -488,7 +488,7 @@ module VX_bank wire valid_st2; wire[`WORD_SIZE_RNG] writeword_st2; wire[`WORD_SIZE_RNG] readword_st2; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st2; + wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st2; wire miss_st2; wire dirty_st2; wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2; @@ -498,7 +498,7 @@ module VX_bank wire [31:0] pc_st2; - VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_SIZE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 ( + VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), @@ -525,17 +525,17 @@ module VX_bank // Enqueue to CWB Queue - wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); + wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); wire [`WORD_SIZE_RNG] cwbq_data = readword_st2; wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [4:0] cwbq_rd = miss_add_rd; wire [1:0] cwbq_wb = miss_add_wb; - wire [`NW_M1:0] cwbq_warp_num = miss_add_warp_num; + wire [`NW_BITS-1:0] cwbq_warp_num = miss_add_warp_num; wire [31:0] cwbq_pc = pc_st2; wire cwbq_empty; assign bank_wb_valid = !cwbq_empty; - VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue( + VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue( .clk (clk), .reset (reset), @@ -554,8 +554,8 @@ module VX_bank wire[31:0] dwbq_req_addr; wire dwbq_empty; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data; - if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin + wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dwbq_req_data; + if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK); end else begin @@ -603,7 +603,7 @@ module VX_bank assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK; assign dram_wb_req = !dwbq_empty; - VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue( + VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue( .clk (clk), .reset (reset), diff --git a/hw/rtl/generic_cache/VX_cache.v b/hw/rtl/generic_cache/VX_cache.v index 084de00c..a6047e33 100644 --- a/hw/rtl/generic_cache/VX_cache.v +++ b/hw/rtl/generic_cache/VX_cache.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache #( @@ -66,7 +66,7 @@ module VX_cache // Req meta input wire [4:0] core_req_rd, input wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb, - input wire [`NW_M1:0] core_req_warp_num, + input wire [`NW_BITS-1:0] core_req_warp_num, input wire [31:0] core_req_pc, output wire delay_req, @@ -75,7 +75,7 @@ module VX_cache output wire [NUMBER_REQUESTS-1:0] core_wb_valid, output wire [4:0] core_wb_req_rd, output wire [1:0] core_wb_req_wb, - output wire [`NW_M1:0] core_wb_warp_num, + output wire [`NW_BITS-1:0] core_wb_warp_num, output wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata, output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_address, @@ -84,7 +84,7 @@ module VX_cache // Dram Fill Response input wire dram_fill_rsp, input wire [31:0] dram_fill_rsp_addr, - input wire [`IBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, + input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data, output wire dram_fill_accept, // Dram request @@ -93,7 +93,7 @@ module VX_cache output wire dram_req_read, output wire [31:0] dram_req_addr, output wire [31:0] dram_req_size, - output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data, + output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data, output wire dram_req_because_of_wb, input wire dram_req_delay, @@ -119,7 +119,7 @@ module VX_cache wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid; wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd; wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb; - wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num; + wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num; wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data; wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc; wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address; @@ -134,7 +134,7 @@ module VX_cache wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req; wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp; wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr; - wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data; + wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data; wire[NUMBER_BANKS-1:0] per_bank_reqq_full; @@ -287,7 +287,7 @@ module VX_cache wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata; wire [4:0] curr_bank_rd; wire [NUMBER_REQUESTS-1:0][1:0] curr_bank_wb; - wire [`NW_M1:0] curr_bank_warp_num; + wire [`NW_BITS-1:0] curr_bank_warp_num; wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_write; wire [31:0] curr_bank_pc; @@ -298,13 +298,13 @@ module VX_cache wire [31:0] curr_bank_wb_pc; wire [4:0] curr_bank_wb_rd; wire [1:0] curr_bank_wb_wb; - wire [`NW_M1:0] curr_bank_wb_warp_num; + wire [`NW_BITS-1:0] curr_bank_wb_warp_num; wire [`WORD_SIZE_RNG] curr_bank_wb_data; wire [31:0] curr_bank_wb_address; wire curr_bank_dram_fill_rsp; wire [31:0] curr_bank_dram_fill_rsp_addr; - wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data; + wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data; wire curr_bank_dram_fill_accept; wire curr_bank_dfqq_full; @@ -316,7 +316,7 @@ module VX_cache wire curr_bank_dram_wb_queue_pop; wire curr_bank_dram_wb_req; wire[31:0] curr_bank_dram_wb_req_addr; - wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data; + wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data; wire curr_bank_snp_req; wire[31:0] curr_bank_snp_req_addr; diff --git a/hw/rtl/generic_cache/VX_cache_config.v b/hw/rtl/generic_cache/VX_cache_config.vh similarity index 92% rename from hw/rtl/generic_cache/VX_cache_config.v rename to hw/rtl/generic_cache/VX_cache_config.vh index 360b5352..9d44f4ba 100644 --- a/hw/rtl/generic_cache/VX_cache_config.v +++ b/hw/rtl/generic_cache/VX_cache_config.vh @@ -1,7 +1,7 @@ `ifndef VX_CACHE_CONFIG `define VX_CACHE_CONFIG -`include "../VX_define.v" +`include "../VX_define.vh" // data tid rd wb warp_num read write @@ -10,10 +10,10 @@ `define vx_clog2(value) ((value == 1) ? 1 : $clog2(value)) -`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) + 3 + 3) +`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) + 3 + 3) // 5 + 2 + 4 + 3 + 3 + 1 -`define REQ_INST_META_SIZE (5 + 2 + (`NW_M1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS)) +`define REQ_INST_META_SIZE (5 + 2 + (`NW_BITS-1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS)) // `define vx_clog2_h(value, x) (value == (1 << x)) ? (x) @@ -60,9 +60,7 @@ // 8 `define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES) // 4 -`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES) -// 3:0 -`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0 +`define BANK_LINE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES) // Offset is fixed `define OFFSET_ADDR_NUM_BITS 2 @@ -73,7 +71,7 @@ `define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0 // 2 -`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_SIZE_WORDS)) +`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_WORDS)) // 2 `define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS) // 2 diff --git a/hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v b/hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v index e19531d1..9b49336d 100644 --- a/hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/generic_cache/VX_cache_core_req_bank_sel.v @@ -1,5 +1,5 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_core_req_bank_sel #( diff --git a/hw/rtl/generic_cache/VX_cache_dfq_queue.v b/hw/rtl/generic_cache/VX_cache_dfq_queue.v index 1b95dd02..9f586f22 100644 --- a/hw/rtl/generic_cache/VX_cache_dfq_queue.v +++ b/hw/rtl/generic_cache/VX_cache_dfq_queue.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_dfq_queue #( diff --git a/hw/rtl/generic_cache/VX_cache_dram_req_arb.v b/hw/rtl/generic_cache/VX_cache_dram_req_arb.v index 37264833..42adcff7 100644 --- a/hw/rtl/generic_cache/VX_cache_dram_req_arb.v +++ b/hw/rtl/generic_cache/VX_cache_dram_req_arb.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_dram_req_arb #( @@ -62,7 +62,7 @@ module VX_cache_dram_req_arb output wire[NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop, input wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req, input wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr, - input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data, + input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data, input wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp, // real Dram request @@ -71,7 +71,7 @@ module VX_cache_dram_req_arb output wire dram_req_read, output wire [31:0] dram_req_addr, output wire [31:0] dram_req_size, - output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data, + output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data, output wire dram_req_because_of_wb, input wire dram_req_delay diff --git a/hw/rtl/generic_cache/VX_cache_miss_resrv.v b/hw/rtl/generic_cache/VX_cache_miss_resrv.v index 3efae933..4f13f0b9 100644 --- a/hw/rtl/generic_cache/VX_cache_miss_resrv.v +++ b/hw/rtl/generic_cache/VX_cache_miss_resrv.v @@ -1,5 +1,5 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_miss_resrv #( @@ -56,7 +56,7 @@ module VX_cache_miss_resrv input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid, input wire[4:0] miss_add_rd, input wire[1:0] miss_add_wb, - input wire[`NW_M1:0] miss_add_warp_num, + input wire[`NW_BITS-1:0] miss_add_warp_num, input wire[2:0] miss_add_mem_read, input wire[2:0] miss_add_mem_write, input wire[31:0] miss_add_pc, @@ -75,14 +75,14 @@ module VX_cache_miss_resrv output wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_resrv_tid_st0, output wire[4:0] miss_resrv_rd_st0, output wire[1:0] miss_resrv_wb_st0, - output wire[`NW_M1:0] miss_resrv_warp_num_st0, + output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0, output wire[2:0] miss_resrv_mem_read_st0, output wire[31:0] miss_resrv_pc_st0, output wire[2:0] miss_resrv_mem_write_st0 ); - // Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) + // Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0]; reg[MRVQ_SIZE-1:0][31:0] addr_table; reg[MRVQ_SIZE-1:0][31:0] pc_table; diff --git a/hw/rtl/generic_cache/VX_cache_req_queue.v b/hw/rtl/generic_cache/VX_cache_req_queue.v index e4b20e80..7de0a745 100644 --- a/hw/rtl/generic_cache/VX_cache_req_queue.v +++ b/hw/rtl/generic_cache/VX_cache_req_queue.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_req_queue #( @@ -55,7 +55,7 @@ module VX_cache_req_queue input wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata, input wire [4:0] bank_rd, input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb, - input wire [`NW_M1:0] bank_warp_num, + input wire [`NW_BITS-1:0] bank_warp_num, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write, input wire [31:0] bank_pc, @@ -68,7 +68,7 @@ module VX_cache_req_queue output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0, output wire [4:0] reqq_req_rd_st0, output wire [1:0] reqq_req_wb_st0, - output wire [`NW_M1:0] reqq_req_warp_num_st0, + output wire [`NW_BITS-1:0] reqq_req_warp_num_st0, output wire [2:0] reqq_req_mem_read_st0, output wire [2:0] reqq_req_mem_write_st0, output wire [31:0] reqq_req_pc_st0, @@ -83,7 +83,7 @@ module VX_cache_req_queue wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata; wire [4:0] out_per_rd; wire [NUMBER_REQUESTS-1:0][1:0] out_per_wb; - wire [`NW_M1:0] out_per_warp_num; + wire [`NW_BITS-1:0] out_per_warp_num; wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_write; wire [31:0] out_per_pc; @@ -95,7 +95,7 @@ module VX_cache_req_queue reg [4:0] use_per_rd; reg [NUMBER_REQUESTS-1:0][1:0] use_per_wb; reg [31:0] use_per_pc; - reg [`NW_M1:0] use_per_warp_num; + reg [`NW_BITS-1:0] use_per_warp_num; reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_read; reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_write; @@ -105,7 +105,7 @@ module VX_cache_req_queue wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata; wire [4:0] qual_rd; wire [NUMBER_REQUESTS-1:0][1:0] qual_wb; - wire [`NW_M1:0] qual_warp_num; + wire [`NW_BITS-1:0] qual_warp_num; wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_write; wire [31:0] qual_pc; @@ -120,7 +120,7 @@ module VX_cache_req_queue wire push_qual = reqq_push && !reqq_full; wire pop_qual = !out_empty && use_empty; - VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_M1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue( + VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_BITS-1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue( .clk (clk), .reset (reset), .push (push_qual), diff --git a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v b/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v index 12cd04c7..89f4e9b4 100644 --- a/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v +++ b/hw/rtl/generic_cache/VX_cache_wb_sel_merge.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_cache_wb_sel_merge #( @@ -53,7 +53,7 @@ module VX_cache_wb_sel_merge input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid, input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd, input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb, - input wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num, + input wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num, input wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data, input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc, input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address, @@ -67,7 +67,7 @@ module VX_cache_wb_sel_merge output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, output wire [4:0] core_wb_req_rd, output wire [1:0] core_wb_req_wb, - output wire [`NW_M1:0] core_wb_warp_num, + output wire [`NW_BITS-1:0] core_wb_warp_num, output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_address ); @@ -105,7 +105,7 @@ module VX_cache_wb_sel_merge core_wb_pc = 0; core_wb_address = 0; for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin - if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin + if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin core_wb_valid[per_bank_wb_tid[this_bank]] = 1; diff --git a/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v b/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v index ef0863e8..7eae1819 100644 --- a/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v +++ b/hw/rtl/generic_cache/VX_dcache_llv_resp_bank_sel.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_dcache_llv_resp_bank_sel #( @@ -48,13 +48,13 @@ module VX_dcache_llv_resp_bank_sel output reg [NUMBER_BANKS-1:0] per_bank_llvq_pop, input wire[NUMBER_BANKS-1:0] per_bank_llvq_valid, input wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr, - input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_llvq_res_data, + input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_res_data, input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid, input wire llvq_pop, output reg[NUMBER_REQUESTS-1:0] llvq_valid, output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, - output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data + output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_res_data ); diff --git a/hw/rtl/generic_cache/VX_fill_invalidator.v b/hw/rtl/generic_cache/VX_fill_invalidator.v index cbf3ad64..9448d0b1 100644 --- a/hw/rtl/generic_cache/VX_fill_invalidator.v +++ b/hw/rtl/generic_cache/VX_fill_invalidator.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_fill_invalidator #( diff --git a/hw/rtl/generic_cache/VX_prefetcher.v b/hw/rtl/generic_cache/VX_prefetcher.v index 32fe83ec..9a5e0fb0 100644 --- a/hw/rtl/generic_cache/VX_prefetcher.v +++ b/hw/rtl/generic_cache/VX_prefetcher.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_prefetcher #( diff --git a/hw/rtl/generic_cache/VX_snp_fwd_arb.v b/hw/rtl/generic_cache/VX_snp_fwd_arb.v index 320381be..e540f3b4 100644 --- a/hw/rtl/generic_cache/VX_snp_fwd_arb.v +++ b/hw/rtl/generic_cache/VX_snp_fwd_arb.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_snp_fwd_arb #( diff --git a/hw/rtl/generic_cache/VX_tag_data_access.v b/hw/rtl/generic_cache/VX_tag_data_access.v index f66cfb85..b8330bdf 100644 --- a/hw/rtl/generic_cache/VX_tag_data_access.v +++ b/hw/rtl/generic_cache/VX_tag_data_access.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_tag_data_access #( @@ -60,12 +60,12 @@ module VX_tag_data_access input wire writefill_st1e, input wire[31:0] writeaddr_st1e, input wire[`WORD_SIZE_RNG] writeword_st1e, - input wire[`DBANK_LINE_SIZE_RNG][31:0] writedata_st1e, + input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e, input wire[2:0] mem_write_st1e, input wire[2:0] mem_read_st1e, output wire[`WORD_SIZE_RNG] readword_st1e, - output wire[`DBANK_LINE_SIZE_RNG][31:0] readdata_st1e, + output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e, output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e, output wire miss_st1e, output wire dirty_st1e, @@ -74,25 +74,25 @@ module VX_tag_data_access ); - reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0]; + reg[`DBANK_LINE_WORDS-1:0][31:0] readdata_st[STAGE_1_CYCLES-1:0]; reg read_valid_st1c[STAGE_1_CYCLES-1:0]; reg read_dirty_st1c[STAGE_1_CYCLES-1:0]; reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0]; - reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0]; + reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0]; wire qual_read_valid_st1; wire qual_read_dirty_st1; wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1; - wire[`DBANK_LINE_SIZE_RNG][31:0] qual_read_data_st1; + wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1; wire use_read_valid_st1e; wire use_read_dirty_st1e; wire[`TAG_SELECT_SIZE_RNG] use_read_tag_st1e; - wire[`DBANK_LINE_SIZE_RNG][31:0] use_read_data_st1e; - wire[`DBANK_LINE_SIZE_RNG][3:0] use_write_enable; - wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data; + wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e; + wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable; + wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data; wire sw, sb, sh; @@ -140,8 +140,8 @@ module VX_tag_data_access .fill_sent (fill_sent) ); - // VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 ( - VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 ( + // VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 ( + VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ), .Valid(0)) s0_1_c0 ( .clk (clk), .reset(reset), .stall(stall), @@ -153,7 +153,7 @@ module VX_tag_data_access genvar curr_stage; generate for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin - VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc ( + VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_cc ( .clk (clk), .reset(reset), .stall(stall), @@ -170,7 +170,7 @@ module VX_tag_data_access assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM genvar curr_w; - for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0]; + for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0]; // assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1]; /////////////////////// LOAD LOGIC /////////////////// @@ -243,23 +243,23 @@ module VX_tag_data_access wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e; wire force_write = real_writefill; - wire[`DBANK_LINE_SIZE_RNG][3:0] we; - wire[`DBANK_LINE_SIZE_RNG][31:0] data_write; + wire[`DBANK_LINE_WORDS-1:0][3:0] we; + wire[`DBANK_LINE_WORDS-1:0][31:0] data_write; genvar g; generate - for (g = 0; g < `DBANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables + for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill; assign we[g] = (force_write) ? 4'b1111 : - (should_write && !real_writefill && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 : + (should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 : (normal_write && sw) ? 4'b1111 : (normal_write && sb) ? sb_mask : (normal_write && sh) ? sh_mask : 4'b0000; - if (!(FUNC_ID == `LLFUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat; + if (!(FUNC_ID == `L2FUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat; end - if ((FUNC_ID == `LLFUNC_ID)) begin + if ((FUNC_ID == `L2FUNC_ID)) begin assign data_write = force_write ? writedata_st1e : writeword_st1e; end endgenerate @@ -268,7 +268,7 @@ module VX_tag_data_access assign use_write_data = data_write; /////////////////////// - if (FUNC_ID == `LLFUNC_ID) begin + if (FUNC_ID == `L2FUNC_ID) begin assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1]; end else begin assign readword_st1e = data_Qual; diff --git a/hw/rtl/generic_cache/VX_tag_data_structure.v b/hw/rtl/generic_cache/VX_tag_data_structure.v index 1cf9b7a9..d4ded057 100644 --- a/hw/rtl/generic_cache/VX_tag_data_structure.v +++ b/hw/rtl/generic_cache/VX_tag_data_structure.v @@ -1,4 +1,4 @@ -`include "VX_cache_config.v" +`include "VX_cache_config.vh" module VX_tag_data_structure #( @@ -55,18 +55,18 @@ module VX_tag_data_structure output wire read_valid, output wire read_dirty, output wire[`TAG_SELECT_SIZE_RNG] read_tag, - output wire[`DBANK_LINE_SIZE_RNG][31:0] read_data, + output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data, input wire invalidate, - input wire[`DBANK_LINE_SIZE_RNG][3:0] write_enable, + input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable, input wire write_fill, input wire[31:0] write_addr, - input wire[`DBANK_LINE_SIZE_RNG][31:0] write_data, + input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data, input wire fill_sent ); - reg[`DBANK_LINE_SIZE_RNG][3:0][7:0] data [`BANK_LINE_COUNT-1:0]; + reg[`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0]; reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0]; reg valid[`BANK_LINE_COUNT-1:0]; reg dirty[`BANK_LINE_COUNT-1:0]; @@ -110,7 +110,7 @@ module VX_tag_data_structure valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0; end - for (f = 0; f < `DBANK_LINE_SIZE_WORDS; f = f + 1) begin + for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ]; if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ]; if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16]; diff --git a/hw/rtl/interfaces/VX_branch_response_inter.v b/hw/rtl/interfaces/VX_branch_response_inter.v index b25b47c9..d4e0dbbf 100644 --- a/hw/rtl/interfaces/VX_branch_response_inter.v +++ b/hw/rtl/interfaces/VX_branch_response_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_BRANCH_RSP @@ -9,7 +9,7 @@ interface VX_branch_response_inter (); wire valid_branch; wire branch_dir; wire[31:0] branch_dest; - wire[`NW_M1:0] branch_warp_num; + wire[`NW_BITS-1:0] branch_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_csr_req_inter.v b/hw/rtl/interfaces/VX_csr_req_inter.v index 9080f0e1..ae1ff588 100644 --- a/hw/rtl/interfaces/VX_csr_req_inter.v +++ b/hw/rtl/interfaces/VX_csr_req_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_CSR_REQ @@ -7,8 +7,8 @@ interface VX_csr_req_inter (); - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; wire[4:0] rd; wire[1:0] wb; wire[4:0] alu_op; diff --git a/hw/rtl/interfaces/VX_csr_wb_inter.v b/hw/rtl/interfaces/VX_csr_wb_inter.v index d8389cdb..877e91c0 100644 --- a/hw/rtl/interfaces/VX_csr_wb_inter.v +++ b/hw/rtl/interfaces/VX_csr_wb_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_CSR_WB_REQ @@ -7,15 +7,13 @@ interface VX_csr_wb_inter (); - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; - wire[4:0] rd; - wire[1:0] wb; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; + wire[4:0] rd; + wire[1:0] wb; - wire[`NT_M1:0][31:0] csr_result; - + wire[`NUM_THREADS-1:0][31:0] csr_result; endinterface - `endif \ No newline at end of file diff --git a/hw/rtl/interfaces/VX_dcache_request_inter.v b/hw/rtl/interfaces/VX_dcache_request_inter.v index ac841a76..00590758 100644 --- a/hw/rtl/interfaces/VX_dcache_request_inter.v +++ b/hw/rtl/interfaces/VX_dcache_request_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_DCACHE_REQ @@ -7,11 +7,11 @@ interface VX_dcache_request_inter (); - wire[`NT_M1:0][31:0] out_cache_driver_in_address; + wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_address; wire[2:0] out_cache_driver_in_mem_read; wire[2:0] out_cache_driver_in_mem_write; - wire[`NT_M1:0] out_cache_driver_in_valid; - wire[`NT_M1:0][31:0] out_cache_driver_in_data; + wire[`NUM_THREADS-1:0] out_cache_driver_in_valid; + wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_data; endinterface diff --git a/hw/rtl/interfaces/VX_dcache_response_inter.v b/hw/rtl/interfaces/VX_dcache_response_inter.v index 98ed58a3..f2af6557 100644 --- a/hw/rtl/interfaces/VX_dcache_response_inter.v +++ b/hw/rtl/interfaces/VX_dcache_response_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_DCACHE_RSP @@ -7,7 +7,7 @@ interface VX_dcache_response_inter (); - wire[`NT_M1:0][31:0] in_cache_driver_out_data; + wire[`NUM_THREADS-1:0][31:0] in_cache_driver_out_data; wire delay; endinterface diff --git a/hw/rtl/interfaces/VX_dram_req_rsp_inter.v b/hw/rtl/interfaces/VX_dram_req_rsp_inter.v index f4d7012d..9ba5882f 100644 --- a/hw/rtl/interfaces/VX_dram_req_rsp_inter.v +++ b/hw/rtl/interfaces/VX_dram_req_rsp_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_DRAM_REQ_RSP_INTER diff --git a/hw/rtl/interfaces/VX_exec_unit_req_inter.v b/hw/rtl/interfaces/VX_exec_unit_req_inter.v index aab6c130..201bb7ba 100644 --- a/hw/rtl/interfaces/VX_exec_unit_req_inter.v +++ b/hw/rtl/interfaces/VX_exec_unit_req_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_EXE_UNIT_REQ_INTER @@ -8,8 +8,8 @@ interface VX_exec_unit_req_inter (); // Meta - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; wire[31:0] curr_PC; wire[31:0] PC_next; @@ -18,8 +18,8 @@ interface VX_exec_unit_req_inter (); wire[1:0] wb; // Data and alu op - wire[`NT_M1:0][31:0] a_reg_data; - wire[`NT_M1:0][31:0] b_reg_data; + wire[`NUM_THREADS-1:0][31:0] a_reg_data; + wire[`NUM_THREADS-1:0][31:0] b_reg_data; wire[4:0] alu_op; wire[4:0] rs1; wire[4:0] rs2; diff --git a/hw/rtl/interfaces/VX_frE_to_bckE_req_inter.v b/hw/rtl/interfaces/VX_frE_to_bckE_req_inter.v index ba4ac9be..dac6cb6e 100644 --- a/hw/rtl/interfaces/VX_frE_to_bckE_req_inter.v +++ b/hw/rtl/interfaces/VX_frE_to_bckE_req_inter.v @@ -1,5 +1,5 @@ -`include "VX_define.v" +`include "VX_define.vh" `ifndef VX_FrE_to_BE_INTER @@ -30,8 +30,8 @@ interface VX_frE_to_bckE_req_inter (); wire jal; wire[31:0] jal_offset; wire[31:0] PC_next; - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; // GPGPU stuff wire is_wspawn; diff --git a/hw/rtl/interfaces/VX_gpr_clone_inter.v b/hw/rtl/interfaces/VX_gpr_clone_inter.v index 26053ac9..de2ba9f0 100644 --- a/hw/rtl/interfaces/VX_gpr_clone_inter.v +++ b/hw/rtl/interfaces/VX_gpr_clone_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_GPR_CLONE_INTER @@ -9,7 +9,7 @@ interface VX_gpr_clone_inter (); /* verilator lint_off UNUSED */ wire is_clone; -wire[`NW_M1:0] warp_num; +wire[`NW_BITS-1:0] warp_num; /* verilator lint_on UNUSED */ endinterface diff --git a/hw/rtl/interfaces/VX_gpr_data_inter.v b/hw/rtl/interfaces/VX_gpr_data_inter.v index 912f04a1..f7cfbac1 100644 --- a/hw/rtl/interfaces/VX_gpr_data_inter.v +++ b/hw/rtl/interfaces/VX_gpr_data_inter.v @@ -1,13 +1,13 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_gpr_data_INTER `define VX_gpr_data_INTER interface VX_gpr_data_inter (); - wire[`NT_M1:0][31:0] a_reg_data; - wire[`NT_M1:0][31:0] b_reg_data; + wire[`NUM_THREADS-1:0][31:0] a_reg_data; + wire[`NUM_THREADS-1:0][31:0] b_reg_data; endinterface diff --git a/hw/rtl/interfaces/VX_gpr_jal_inter.v b/hw/rtl/interfaces/VX_gpr_jal_inter.v index 0c4b7afb..0b3eff8b 100644 --- a/hw/rtl/interfaces/VX_gpr_jal_inter.v +++ b/hw/rtl/interfaces/VX_gpr_jal_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_GPR_JAL_INTER `define VX_GPR_JAL_INTER diff --git a/hw/rtl/interfaces/VX_gpr_read_inter.v b/hw/rtl/interfaces/VX_gpr_read_inter.v index ccac96c0..3666b5e2 100644 --- a/hw/rtl/interfaces/VX_gpr_read_inter.v +++ b/hw/rtl/interfaces/VX_gpr_read_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_GPR_READ `define VX_GPR_READ @@ -8,7 +8,7 @@ interface VX_gpr_read_inter (); wire[4:0] rs1; wire[4:0] rs2; - wire[`NW_M1:0] warp_num; + wire[`NW_BITS-1:0] warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_gpr_wspawn_inter.v b/hw/rtl/interfaces/VX_gpr_wspawn_inter.v index dfa0fc4c..f730c79e 100644 --- a/hw/rtl/interfaces/VX_gpr_wspawn_inter.v +++ b/hw/rtl/interfaces/VX_gpr_wspawn_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_GPR_WSPAWN_INTER `define VX_GPR_WSPAWN_INTER @@ -7,8 +7,8 @@ interface VX_gpr_wspawn_inter (); /* verilator lint_off UNUSED */ wire is_wspawn; - wire[`NW_M1:0] which_wspawn; - // wire[`NW_M1:0] warp_num; + wire[`NW_BITS-1:0] which_wspawn; + // wire[`NW_BITS-1:0] warp_num; /* verilator lint_on UNUSED */ endinterface diff --git a/hw/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v b/hw/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v index e41928d2..1f5dc1e4 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_dram_req_inter.v @@ -1,6 +1,6 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_DRAM_DCACHE_REQ @@ -8,7 +8,7 @@ interface VX_gpu_dcache_dram_req_inter #( - parameter BANK_LINE_SIZE_WORDS = 2 + parameter BANK_LINE_WORDS = 2 ) (); @@ -18,7 +18,7 @@ interface VX_gpu_dcache_dram_req_inter wire dram_req_read; wire [31:0] dram_req_addr; wire [31:0] dram_req_size; - wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_req_data; + wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data; // Snoop wire dram_because_of_snp; diff --git a/hw/rtl/interfaces/VX_gpu_dcache_dram_res_inter.v b/hw/rtl/interfaces/VX_gpu_dcache_dram_res_inter.v index 9813445d..6df08286 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_dram_res_inter.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_dram_res_inter.v @@ -1,7 +1,7 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_DRAM_DCACHE_RES @@ -9,13 +9,13 @@ interface VX_gpu_dcache_dram_res_inter #( - parameter BANK_LINE_SIZE_WORDS = 2 + parameter BANK_LINE_WORDS = 2 ) (); // DRAM Rsponse wire dram_fill_rsp; wire [31:0] dram_fill_rsp_addr; - wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_fill_rsp_data; + wire [BANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data; endinterface diff --git a/hw/rtl/interfaces/VX_gpu_dcache_req_inter.v b/hw/rtl/interfaces/VX_gpu_dcache_req_inter.v index e25cca87..2063d1b8 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_req_inter.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_req_inter.v @@ -1,6 +1,6 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_DCACHE_REQ @@ -20,7 +20,7 @@ interface VX_gpu_dcache_req_inter wire [NUMBER_REQUESTS-1:0][2:0] core_req_mem_write; wire [4:0] core_req_rd; wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb; - wire [`NW_M1:0] core_req_warp_num; + wire [`NW_BITS-1:0] core_req_warp_num; wire [31:0] core_req_pc; // Can't WB diff --git a/hw/rtl/interfaces/VX_gpu_dcache_res_inter.v b/hw/rtl/interfaces/VX_gpu_dcache_res_inter.v index 90b02e5d..d6d01cb1 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_res_inter.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_res_inter.v @@ -1,6 +1,6 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_DCACHE_RES @@ -16,7 +16,7 @@ interface VX_gpu_dcache_res_inter wire [NUMBER_REQUESTS-1:0] core_wb_valid; wire [4:0] core_wb_req_rd; wire [1:0] core_wb_req_wb; - wire [`NW_M1:0] core_wb_warp_num; + wire [`NW_BITS-1:0] core_wb_warp_num; wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata; wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc; diff --git a/hw/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v b/hw/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v index fd5b9a62..9b4a4b02 100644 --- a/hw/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v +++ b/hw/rtl/interfaces/VX_gpu_dcache_snp_req_inter.v @@ -1,7 +1,7 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_SNP_REQ diff --git a/hw/rtl/interfaces/VX_gpu_inst_req_inter.v b/hw/rtl/interfaces/VX_gpu_inst_req_inter.v index 1d24c960..16f5ab6c 100644 --- a/hw/rtl/interfaces/VX_gpu_inst_req_inter.v +++ b/hw/rtl/interfaces/VX_gpu_inst_req_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_GPU_INST_REQ_IN @@ -6,8 +6,8 @@ interface VX_gpu_inst_req_inter(); - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; wire is_wspawn; wire is_tmc; wire is_split; @@ -16,7 +16,7 @@ interface VX_gpu_inst_req_inter(); wire[31:0] pc_next; - wire[`NT_M1:0][31:0] a_reg_data; + wire[`NUM_THREADS-1:0][31:0] a_reg_data; wire[31:0] rd2; diff --git a/hw/rtl/interfaces/VX_gpu_snp_req_rsp.v b/hw/rtl/interfaces/VX_gpu_snp_req_rsp.v index 1ab3094c..aa76328d 100644 --- a/hw/rtl/interfaces/VX_gpu_snp_req_rsp.v +++ b/hw/rtl/interfaces/VX_gpu_snp_req_rsp.v @@ -1,4 +1,4 @@ -`include "../generic_cache/VX_cache_config.v" +`include "../generic_cache/VX_cache_config.vh" `ifndef VX_GPU_SNP_REQ_RSP diff --git a/hw/rtl/interfaces/VX_icache_request_inter.v b/hw/rtl/interfaces/VX_icache_request_inter.v index 9de1312b..f6cc52a3 100644 --- a/hw/rtl/interfaces/VX_icache_request_inter.v +++ b/hw/rtl/interfaces/VX_icache_request_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_ICACHE_REQ diff --git a/hw/rtl/interfaces/VX_icache_response_inter.v b/hw/rtl/interfaces/VX_icache_response_inter.v index 2373046b..90a63a3f 100644 --- a/hw/rtl/interfaces/VX_icache_response_inter.v +++ b/hw/rtl/interfaces/VX_icache_response_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_ICACHE_RSP diff --git a/hw/rtl/interfaces/VX_inst_exec_wb_inter.v b/hw/rtl/interfaces/VX_inst_exec_wb_inter.v index 929ba88d..a3df1e67 100644 --- a/hw/rtl/interfaces/VX_inst_exec_wb_inter.v +++ b/hw/rtl/interfaces/VX_inst_exec_wb_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_EXEC_UNIT_WB_INST_INTER @@ -7,12 +7,12 @@ interface VX_inst_exec_wb_inter (); - wire[`NT_M1:0][31:0] alu_result; + wire[`NUM_THREADS-1:0][31:0] alu_result; wire[31:0] exec_wb_pc; wire[4:0] rd; wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; + wire[`NUM_THREADS-1:0] wb_valid; + wire[`NW_BITS-1:0] wb_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_inst_mem_wb_inter.v b/hw/rtl/interfaces/VX_inst_mem_wb_inter.v index d752a3a6..315c8002 100644 --- a/hw/rtl/interfaces/VX_inst_mem_wb_inter.v +++ b/hw/rtl/interfaces/VX_inst_mem_wb_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_MEM_WB_INST_INTER @@ -7,12 +7,12 @@ interface VX_inst_mem_wb_inter (); - wire[`NT_M1:0][31:0] loaded_data; + wire[`NUM_THREADS-1:0][31:0] loaded_data; wire[31:0] mem_wb_pc; wire[4:0] rd; wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; + wire[`NUM_THREADS-1:0] wb_valid; + wire[`NW_BITS-1:0] wb_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_inst_meta_inter.v b/hw/rtl/interfaces/VX_inst_meta_inter.v index 2fd68625..5ec45e73 100644 --- a/hw/rtl/interfaces/VX_inst_meta_inter.v +++ b/hw/rtl/interfaces/VX_inst_meta_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_F_D_INTER @@ -7,8 +7,8 @@ interface VX_inst_meta_inter (); wire[31:0] instruction; wire[31:0] inst_pc; - wire[`NW_M1:0] warp_num; - wire[`NT_M1:0] valid; + wire[`NW_BITS-1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; endinterface diff --git a/hw/rtl/interfaces/VX_jal_response_inter.v b/hw/rtl/interfaces/VX_jal_response_inter.v index e93a2d0a..4a973e10 100644 --- a/hw/rtl/interfaces/VX_jal_response_inter.v +++ b/hw/rtl/interfaces/VX_jal_response_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_JAL_RSP @@ -9,7 +9,7 @@ interface VX_jal_response_inter (); wire jal; wire[31:0] jal_dest; - wire[`NW_M1:0] jal_warp_num; + wire[`NW_BITS-1:0] jal_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_join_inter.v b/hw/rtl/interfaces/VX_join_inter.v index a465bf65..2216ddb6 100644 --- a/hw/rtl/interfaces/VX_join_inter.v +++ b/hw/rtl/interfaces/VX_join_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_JOIN_INTER @@ -8,7 +8,7 @@ interface VX_join_inter (); wire is_join; - wire[`NW_M1:0] join_warp_num; + wire[`NW_BITS-1:0] join_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_lsu_req_inter.v b/hw/rtl/interfaces/VX_lsu_req_inter.v index 408791f6..2066d034 100644 --- a/hw/rtl/interfaces/VX_lsu_req_inter.v +++ b/hw/rtl/interfaces/VX_lsu_req_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_LSU_REQ_INTER @@ -7,11 +7,11 @@ interface VX_lsu_req_inter (); - wire[`NT_M1:0] valid; + wire[`NUM_THREADS-1:0] valid; wire[31:0] lsu_pc; - wire[`NW_M1:0] warp_num; - wire[`NT_M1:0][31:0] store_data; - wire[`NT_M1:0][31:0] base_address; // A reg data + wire[`NW_BITS-1:0] warp_num; + wire[`NUM_THREADS-1:0][31:0] store_data; + wire[`NUM_THREADS-1:0][31:0] base_address; // A reg data wire[31:0] offset; // itype_immed wire[2:0] mem_read; wire[2:0] mem_write; diff --git a/hw/rtl/interfaces/VX_mem_req_inter.v b/hw/rtl/interfaces/VX_mem_req_inter.v index ee2a975d..6f913521 100644 --- a/hw/rtl/interfaces/VX_mem_req_inter.v +++ b/hw/rtl/interfaces/VX_mem_req_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_MEM_REQ_IN @@ -6,20 +6,20 @@ interface VX_mem_req_inter (); - wire[`NT_M1:0][31:0] alu_result; + wire[`NUM_THREADS-1:0][31:0] alu_result; wire[2:0] mem_read; wire[2:0] mem_write; wire[4:0] rd; wire[1:0] wb; wire[4:0] rs1; wire[4:0] rs2; - wire[`NT_M1:0][31:0] rd2; + wire[`NUM_THREADS-1:0][31:0] rd2; wire[31:0] PC_next; wire[31:0] curr_PC; wire[31:0] branch_offset; wire[2:0] branch_type; - wire[`NT_M1:0] valid; - wire[`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire[`NW_BITS-1:0] warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_mw_wb_inter.v b/hw/rtl/interfaces/VX_mw_wb_inter.v index bbf4733e..e170e14a 100644 --- a/hw/rtl/interfaces/VX_mw_wb_inter.v +++ b/hw/rtl/interfaces/VX_mw_wb_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_MW_WB_INTER @@ -7,13 +7,13 @@ interface VX_mw_wb_inter (); - wire[`NT_M1:0][31:0] alu_result; - wire[`NT_M1:0][31:0] mem_result; + wire[`NUM_THREADS-1:0][31:0] alu_result; + wire[`NUM_THREADS-1:0][31:0] mem_result; wire[4:0] rd; wire[1:0] wb; wire[31:0] PC_next; - wire[`NT_M1:0] valid; - wire [`NW_M1:0] warp_num; + wire[`NUM_THREADS-1:0] valid; + wire [`NW_BITS-1:0] warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_warp_ctl_inter.v b/hw/rtl/interfaces/VX_warp_ctl_inter.v index 53dec2a1..15efbb3f 100644 --- a/hw/rtl/interfaces/VX_warp_ctl_inter.v +++ b/hw/rtl/interfaces/VX_warp_ctl_inter.v @@ -1,5 +1,5 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_WARP_CTL_INTER @@ -7,26 +7,26 @@ interface VX_warp_ctl_inter (); - wire[`NW_M1:0] warp_num; + wire[`NW_BITS-1:0] warp_num; wire change_mask; - wire[`NT_M1:0] thread_mask; + wire[`NUM_THREADS-1:0] thread_mask; wire wspawn; wire[31:0] wspawn_pc; - wire[`NW-1:0] wspawn_new_active; + wire[`NUM_WARPS-1:0] wspawn_new_active; wire ebreak; // barrier wire is_barrier; wire[31:0] barrier_id; - wire[$clog2(`NW):0] num_warps; + wire[$clog2(`NUM_WARPS):0] num_warps; wire is_split; wire dont_split; - wire[`NW_M1:0] split_warp_num; - wire[`NT_M1:0] split_new_mask; - wire[`NT_M1:0] split_later_mask; + wire[`NW_BITS-1:0] split_warp_num; + wire[`NUM_THREADS-1:0] split_new_mask; + wire[`NUM_THREADS-1:0] split_later_mask; wire[31:0] split_save_pc; diff --git a/hw/rtl/interfaces/VX_wb_inter.v b/hw/rtl/interfaces/VX_wb_inter.v index c40cf4fe..12f988c0 100644 --- a/hw/rtl/interfaces/VX_wb_inter.v +++ b/hw/rtl/interfaces/VX_wb_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_WB_INTER @@ -7,12 +7,12 @@ interface VX_wb_inter (); - wire[`NT_M1:0][31:0] write_data; + wire[`NUM_THREADS-1:0][31:0] write_data; wire[31:0] wb_pc; wire[4:0] rd; wire[1:0] wb; - wire[`NT_M1:0] wb_valid; - wire[`NW_M1:0] wb_warp_num; + wire[`NUM_THREADS-1:0] wb_valid; + wire[`NW_BITS-1:0] wb_warp_num; endinterface diff --git a/hw/rtl/interfaces/VX_wstall_inter.v b/hw/rtl/interfaces/VX_wstall_inter.v index 8699b022..a38130d3 100644 --- a/hw/rtl/interfaces/VX_wstall_inter.v +++ b/hw/rtl/interfaces/VX_wstall_inter.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" `ifndef VX_WSTALL_INTER @@ -7,7 +7,7 @@ interface VX_wstall_inter(); wire wstall; - wire[`NW_M1:0] warp_num; + wire[`NW_BITS-1:0] warp_num; endinterface diff --git a/hw/rtl/pipe_regs/VX_d_e_reg.v b/hw/rtl/pipe_regs/VX_d_e_reg.v index 6fed097e..26b3576b 100644 --- a/hw/rtl/pipe_regs/VX_d_e_reg.v +++ b/hw/rtl/pipe_regs/VX_d_e_reg.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_d_e_reg ( input wire clk, @@ -16,7 +16,7 @@ module VX_d_e_reg ( wire flush = (in_branch_stall == `STALL); - VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg + VX_generic_register #(.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)) d_e_reg ( .clk (clk), .reset(reset), diff --git a/hw/rtl/pipe_regs/VX_f_d_reg.v b/hw/rtl/pipe_regs/VX_f_d_reg.v index 50003179..99cc82c2 100644 --- a/hw/rtl/pipe_regs/VX_f_d_reg.v +++ b/hw/rtl/pipe_regs/VX_f_d_reg.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_f_d_reg ( input wire clk, @@ -13,7 +13,7 @@ module VX_f_d_reg ( wire flush = 1'b0; wire stall = in_freeze == 1'b1; - VX_generic_register #( .N(64+`NW_M1+1+`NT) ) f_d_reg ( + VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) ) f_d_reg ( .clk (clk), .reset(reset), .stall(stall), diff --git a/hw/rtl/pipe_regs/VX_i_d_reg.v b/hw/rtl/pipe_regs/VX_i_d_reg.v index 2b7740c4..10841c57 100644 --- a/hw/rtl/pipe_regs/VX_i_d_reg.v +++ b/hw/rtl/pipe_regs/VX_i_d_reg.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_i_d_reg ( input wire clk, @@ -14,7 +14,7 @@ module VX_i_d_reg ( wire stall = in_freeze == 1'b1; - VX_generic_register #( .N( 64 + `NW_M1 + 1 + `NT ) ) i_d_reg ( + VX_generic_register #( .N( 64 + `NW_BITS-1 + 1 + `NUM_THREADS ) ) i_d_reg ( .clk (clk), .reset(reset), .stall(stall), diff --git a/hw/rtl/shared_memory/VX_bank_valids.v b/hw/rtl/shared_memory/VX_bank_valids.v index 3b1e63ab..af5a9f24 100644 --- a/hw/rtl/shared_memory/VX_bank_valids.v +++ b/hw/rtl/shared_memory/VX_bank_valids.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" // Converts in_valids to bank_valids module VX_bank_valids @@ -7,16 +7,16 @@ module VX_bank_valids parameter BITS_PER_BANK = 3 ) ( - input wire[`NT_M1:0] in_valids, - input wire[`NT_M1:0][31:0] in_addr, - output reg[NB:0][`NT_M1:0] bank_valids + input wire[`NUM_THREADS-1:0] in_valids, + input wire[`NUM_THREADS-1:0][31:0] in_addr, + output reg[NB:0][`NUM_THREADS-1:0] bank_valids ); integer i, j; always@(*) begin for(j = 0; j <= NB; j = j+1 ) begin - for(i = 0; i <= `NT_M1; i = i+1) begin + for(i = 0; i < `NUM_THREADS; i = i+1) begin if(in_valids[i]) begin if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin bank_valids[j][i] = 1'b1; diff --git a/hw/rtl/shared_memory/VX_priority_encoder_sm.v b/hw/rtl/shared_memory/VX_priority_encoder_sm.v index b9df21d2..d976d6da 100644 --- a/hw/rtl/shared_memory/VX_priority_encoder_sm.v +++ b/hw/rtl/shared_memory/VX_priority_encoder_sm.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_priority_encoder_sm #( @@ -10,9 +10,9 @@ module VX_priority_encoder_sm //INPUTS input wire clk, input wire reset, - input wire[`NT_M1:0] in_valid, - input wire[`NT_M1:0][31:0] in_address, - input wire[`NT_M1:0][31:0] in_data, + input wire[`NUM_THREADS-1:0] in_valid, + input wire[`NUM_THREADS-1:0][31:0] in_address, + input wire[`NUM_THREADS-1:0][31:0] in_data, // OUTPUTS // To SM Module output reg[NB:0] out_valid, @@ -20,16 +20,16 @@ module VX_priority_encoder_sm output reg[NB:0][31:0] out_data, // To Processor - output wire[NB:0][`CLOG2(NUM_REQ) - 1:0] req_num, + output wire[NB:0][`LOG2UP(NUM_REQ) - 1:0] req_num, output reg stall, output wire send_data // Finished all of the requests ); - reg[`NT_M1:0] left_requests; - reg[`NT_M1:0] serviced; + reg[`NUM_THREADS-1:0] left_requests; + reg[`NUM_THREADS-1:0] serviced; - wire[`NT_M1:0] use_valid; + wire[`NUM_THREADS-1:0] use_valid; wire requests_left = (|left_requests); @@ -37,7 +37,7 @@ module VX_priority_encoder_sm assign use_valid = (requests_left) ? left_requests : in_valid; - wire[NB:0][`NT_M1:0] bank_valids; + wire[NB:0][`NUM_THREADS-1:0] bank_valids; VX_bank_valids #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_bank_valid( .in_valids(use_valid), .in_addr(in_address), @@ -49,9 +49,9 @@ module VX_priority_encoder_sm genvar curr_bank; generate for (curr_bank = 0; curr_bank <= NB; curr_bank = curr_bank + 1) begin : countones_blocks - wire[`CLOG2(`NT):0] num_valids; + wire[`LOG2UP(`NUM_THREADS):0] num_valids; - VX_countones #(.N(`NT)) valids_counter ( + VX_countones #(.N(`NUM_THREADS)) valids_counter ( .valids(bank_valids[curr_bank]), .count (num_valids) ); @@ -64,7 +64,7 @@ module VX_priority_encoder_sm assign stall = (|more_than_one_valid); assign send_data = (!stall) && (|in_valid); // change - wire[NB:0][(`CLOG2(NUM_REQ)) - 1:0] internal_req_num; + wire[NB:0][(`LOG2UP(NUM_REQ)) - 1:0] internal_req_num; wire[NB:0] internal_out_valid; @@ -96,11 +96,11 @@ module VX_priority_encoder_sm assign out_valid = internal_out_valid; - wire[`NT_M1:0] serviced_qual = in_valid & (serviced); + wire[`NUM_THREADS-1:0] serviced_qual = in_valid & (serviced); - wire[`NT_M1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual); + wire[`NUM_THREADS-1:0] new_left_requests = (left_requests == 0) ? (in_valid & ~serviced_qual) : (left_requests & ~ serviced_qual); - // wire[`NT_M1:0] new_left_requests = left_requests & ~(serviced_qual); + // wire[`NUM_THREADS-1:0] new_left_requests = left_requests & ~(serviced_qual); always @(posedge clk, posedge reset) begin if (reset) begin diff --git a/hw/rtl/shared_memory/VX_shared_memory.v b/hw/rtl/shared_memory/VX_shared_memory.v index 207c9157..6a5e07f9 100644 --- a/hw/rtl/shared_memory/VX_shared_memory.v +++ b/hw/rtl/shared_memory/VX_shared_memory.v @@ -1,4 +1,4 @@ -`include "../VX_define.v" +`include "../VX_define.vh" module VX_shared_memory #( @@ -21,14 +21,14 @@ module VX_shared_memory //INPUTS input wire clk, input wire reset, - input wire[`NT_M1:0] in_valid, - input wire[`NT_M1:0][31:0] in_address, - input wire[`NT_M1:0][31:0] in_data, + input wire[`NUM_THREADS-1:0] in_valid, + input wire[`NUM_THREADS-1:0][31:0] in_address, + input wire[`NUM_THREADS-1:0][31:0] in_data, input wire[2:0] mem_read, input wire[2:0] mem_write, //OUTPUTS - output wire[`NT_M1:0] out_valid, - output wire[`NT_M1:0][31:0] out_data, + output wire[`NUM_THREADS-1:0] out_valid, + output wire[`NUM_THREADS-1:0][31:0] out_data, output wire stall ); @@ -39,8 +39,8 @@ reg[SM_BANKS - 1:0][31:0] temp_address; reg[SM_BANKS - 1:0][31:0] temp_in_data; reg[SM_BANKS - 1:0] temp_in_valid; -reg[`NT_M1:0] temp_out_valid; -reg[`NT_M1:0][31:0] temp_out_data; +reg[`NUM_THREADS-1:0] temp_out_valid; +reg[`NUM_THREADS-1:0][31:0] temp_out_data; //reg [NB:0][6:0] block_addr; //reg [NB:0][3:0][31:0] block_wdata; @@ -54,20 +54,19 @@ reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we; wire send_data; //reg[NB:0][1:0] req_num; -reg[SM_BANKS - 1:0][`CLOG2(NUM_REQ) - 1:0] req_num; // not positive about this - -wire [`NT_M1:0] orig_in_valid; +reg[SM_BANKS - 1:0][`LOG2UP(NUM_REQ) - 1:0] req_num; // not positive about this +wire [`NUM_THREADS-1:0] orig_in_valid; genvar f; - generate - for(f = 0; f < `NT; f = f+1) begin : orig_in_valid_setup - assign orig_in_valid[f] = in_valid[f]; - end +generate + for(f = 0; f < `NUM_THREADS; f = f+1) begin : orig_in_valid_setup + assign orig_in_valid[f] = in_valid[f]; + end - assign out_valid = send_data ? temp_out_valid : 0; - assign out_data = send_data ? temp_out_data : 0; - endgenerate + assign out_valid = send_data ? temp_out_valid : 0; + assign out_data = send_data ? temp_out_data : 0; +endgenerate //VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm( diff --git a/hw/gen_config.py b/hw/scripts/gen_config.py similarity index 81% rename from hw/gen_config.py rename to hw/scripts/gen_config.py index 8d80c346..b622802b 100755 --- a/hw/gen_config.py +++ b/hw/scripts/gen_config.py @@ -8,7 +8,7 @@ import re import argparse from datetime import datetime -rtl_root = path.dirname(path.realpath(__file__)) +script_dir = path.dirname(path.realpath(__file__)) defines = {} for k, v in os.environ.items(): @@ -20,14 +20,9 @@ print('Custom params:', ', '.join(['='.join(x) for x in defines.items()])) parser = argparse.ArgumentParser() parser.add_argument('--outc', default='none', help='Output C header') parser.add_argument('--outv', default='none', help='Output Verilog header') -parser.add_argument('--rtl_locations', action='store_true', help='use outc and outv for rtl and verilator') args = parser.parse_args() -if args.rtl_locations: - args.outc = path.join(rtl_root, 'simulate/VX_define.h') - args.outv = path.join(rtl_root, 'rtl/VX_define_synth.v') - if args.outc == 'none' and args.outv == 'none': print('Warning: not emitting any files. Specify arguments') @@ -37,8 +32,8 @@ if args.outv != 'none': // auto-generated by gen_config.py. DO NOT EDIT // Generated at {date} -`ifndef VX_DEFINE_SYNTH -`define VX_DEFINE_SYNTH +`ifndef VX_USER_CONFIG +`define VX_USER_CONFIG '''[1:].format(date=datetime.now()), file=f) for k, v in defines.items(): @@ -52,8 +47,8 @@ if args.outc != 'none': // auto-generated by gen_config.py. DO NOT EDIT // Generated at {date} -#ifndef VX_DEFINE_SYNTH -#define VX_DEFINE_SYNTH +#ifndef VX_USER_CONFIG +#define VX_USER_CONFIG '''[1:].format(date=datetime.now()), file=f) for k, v in defines.items(): @@ -66,7 +61,7 @@ translation_rules = [ (re.compile(r'^( *)`ifndef ([^ ]+)$'), r'\1#ifndef \2'), (re.compile(r'^( *)`define ([^ ]+)$'), r'\1#define \2'), # (re.compile(r'^( *)`include "\./VX_define_synth\.v"$'), r'\1#include "VX_define_synth.h"'), - (re.compile(r'^( *)`include "\./VX_define_synth\.v"$'), r''), + (re.compile(r'^( *)`include "VX_user_config\.vh"$'), r''), (re.compile(r'^( *)`define ([^ ]+) (.+)$'), r'\1#define \2 \3'), (re.compile(r'^( *)`endif$'), r'\1#endif'), (re.compile(r'^( *)// (.*)$'), r'\1// \2'), @@ -98,9 +93,9 @@ if args.outc != 'none': // auto-generated by gen_config.py. DO NOT EDIT // Generated at {date} -// Translated from VX_define.v: +// Translated from VX_config.vh: '''[1:].format(date=datetime.now()), file=f) - with open(path.join(rtl_root, 'rtl/VX_define.v'), 'r') as r: + with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r: for line in r: if in_expansion: f.write(post_process_line(line)) @@ -119,15 +114,13 @@ if args.outc != 'none': print(''' // Misc -#define THREADS_PER_WARP NT -#define WARPS_PER_CORE NW -#define NUMBER_WI (NW * NT * NUMBER_CORES_PER_CLUSTER * NUMBER_CLUSTERS) +#define THREADS_PER_WARP NUM_THREADS +#define WARPS_PER_CORE NUM_WARPS +#define NUMBER_WI (NUM_WARPS * NUM_THREADS * NUMBER_CORES_PER_CLUSTER * NUMBER_CLUSTERS) // legacy #define TOTAL_THREADS NUMBER_WI -#define TOTAL_WARPS (NW * NUMBER_CORES_PER_CLUSTER * NUMBER_CLUSTERS) - - +#define TOTAL_WARPS (NUM_WARPS * NUMBER_CORES_PER_CLUSTER * NUMBER_CLUSTERS) // COLORS #define GREEN "\\033[32m" diff --git a/hw/gen_synth_configs.py b/hw/scripts/gen_synth_configs.py similarity index 97% rename from hw/gen_synth_configs.py rename to hw/scripts/gen_synth_configs.py index 56d8b528..8bb7ccc9 100755 --- a/hw/gen_synth_configs.py +++ b/hw/scripts/gen_synth_configs.py @@ -16,7 +16,7 @@ export V_DCACHE_SIZE_BYTES={dcachek} export V_ICACHE_SIZE_BYTES={icachek} # L2 Cache size -export V_LLCACHE_SIZE_BYTES={l2k} +export V_L2CACHE_SIZE_BYTES={l2k} {codegen} diff --git a/hw/simulate/.gitignore b/hw/simulate/.gitignore new file mode 100644 index 00000000..851ef598 --- /dev/null +++ b/hw/simulate/.gitignore @@ -0,0 +1 @@ +VX_config.h \ No newline at end of file diff --git a/hw/syn/synopsys/esyn.tcl b/hw/syn/synopsys/esyn.tcl index f9ab2e73..358a1ca6 100644 --- a/hw/syn/synopsys/esyn.tcl +++ b/hw/syn/synopsys/esyn.tcl @@ -4,9 +4,9 @@ set link_library [concat ./NanGate_15nm_OCL.db] set symbol_library {} set target_library [concat ./NanGate_15nm_OCL.db] -set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_define_synth.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \ +set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_config.vh VX_user_config.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \ ] -# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ +# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ # ] set top_level Vortex diff --git a/hw/syn/synopsys/fsyn.tcl b/hw/syn/synopsys/fsyn.tcl index a2cbc75b..89a44f74 100644 --- a/hw/syn/synopsys/fsyn.tcl +++ b/hw/syn/synopsys/fsyn.tcl @@ -2,9 +2,9 @@ set search_path [concat ../../models/memory/cln28hpm/rf2_128x128_wm1 ../../mod set link_library [concat NanGate_15nm_OCL.db] set symbol_library {} set target_library [concat NanGate_15nm_OCL.db] -set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_bank_valid.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v rf2_128x128_wm1.v \ +set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_bank_valid.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v rf2_128x128_wm1.v \ ] -# set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v \ +# set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v \ # ] set top_level Vortex diff --git a/hw/syn/synopsys/syn.tcl b/hw/syn/synopsys/syn.tcl index 9e7a711e..077f6b2e 100755 --- a/hw/syn/synopsys/syn.tcl +++ b/hw/syn/synopsys/syn.tcl @@ -3,9 +3,9 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_ set symbol_library {} set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db] -set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \ +set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \ ] -# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ +# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.vh VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \ # ] set top_level Vortex diff --git a/runtime/Makefile b/runtime/Makefile index 300c71c2..b66181b9 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,6 +1,6 @@ .PHONY: build_config build_config: - ../../hw/gen_config.py --outv none --outc ./config.h + ../hw/scripts/gen_config.py --outv none --outc ./config.h