From ec349a854f75017fcf57db00c7bbcd6de5a135e9 Mon Sep 17 00:00:00 2001 From: abnerhexu Date: Wed, 6 May 2026 14:52:08 +0800 Subject: [PATCH] Add Blackwell tensor core support to Chipyard - Update RadianceConfigs.scala with Blackwell configurations - Update Verilator Makefile with optimized build flags - Update submodules: radiance (Blackwell implementation), gemmini (params update) - Update build flags and gitignore --- .gitignore | 4 ++++ .../src/main/scala/config/RadianceConfigs.scala | 2 +- generators/gemmini | 2 +- generators/radiance | 2 +- sims/.gitignore | 2 +- sims/common-sim-flags.mk | 2 +- sims/verilator/Makefile | 16 +++++++++++----- variables.mk | 4 ++++ 8 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 5dd75bec..1f4da4dc 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,7 @@ project/project/ .sbt .classpath_cache/ .vscode/ +**/*.o +**/*.fir +**/*.d +test_run_dir/ \ No newline at end of file diff --git a/generators/chipyard/src/main/scala/config/RadianceConfigs.scala b/generators/chipyard/src/main/scala/config/RadianceConfigs.scala index 3f0e8a39..296e4d76 100644 --- a/generators/chipyard/src/main/scala/config/RadianceConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RadianceConfigs.scala @@ -88,7 +88,7 @@ class Radiance4CFP16ClusterConfig extends Config( class RadianceBlackwellClusterConfig extends Config( new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++ - new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, tensorCoreBlackwell = true, useVxCache = false) ++ + new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, tensorCoreBlackwell = true, startupAddress = BigInt("80000000", 16), useVxCache = false) ++ new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++ new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++ new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++ diff --git a/generators/gemmini b/generators/gemmini index 041342d3..c1cb0461 160000 --- a/generators/gemmini +++ b/generators/gemmini @@ -1 +1 @@ -Subproject commit 041342d37f656f3177007b2a6bae510cc529f59b +Subproject commit c1cb0461a7d3ad9455e4fad72d5937d668e926cd diff --git a/generators/radiance b/generators/radiance index 136cf70a..5112f366 160000 --- a/generators/radiance +++ b/generators/radiance @@ -1 +1 @@ -Subproject commit 136cf70a5800fd466758c4f22285668ef50860ed +Subproject commit 5112f3665a0e05b8f7a1bc22305cdf471ed1c3aa diff --git a/sims/.gitignore b/sims/.gitignore index 049e6519..d5c17f1c 100644 --- a/sims/.gitignore +++ b/sims/.gitignore @@ -1,2 +1,2 @@ *.bin -*.bin.* +*.bin.* \ No newline at end of file diff --git a/sims/common-sim-flags.mk b/sims/common-sim-flags.mk index dd5130c4..540f66a8 100644 --- a/sims/common-sim-flags.mk +++ b/sims/common-sim-flags.mk @@ -1,7 +1,7 @@ #---------------------------------------------------------------------------------------- # common gcc configuration/optimization #---------------------------------------------------------------------------------------- -SIM_OPT_CXXFLAGS := -O3 +SIM_OPT_CXXFLAGS := -O0 LRISCV=-lriscv export USE_CHISEL6=1 diff --git a/sims/verilator/Makefile b/sims/verilator/Makefile index 12c793b0..124ad54f 100644 --- a/sims/verilator/Makefile +++ b/sims/verilator/Makefile @@ -91,6 +91,7 @@ RUNTIME_PROFILING_VFLAGS := $(if $(filter $(VERILATOR_PROFILE),all),\ VERILATOR_THREADS ?= 1 RUNTIME_THREADS := --threads $(VERILATOR_THREADS) --threads-dpi all +VERILATOR_MAKE_JOBS ?= 4 USE_FST ?= 0 TRACING_OPTS := $(if $(filter $(USE_FST),0),\ @@ -103,12 +104,16 @@ get_waveform_flag = +vcdfile=$(1).$(if $(filter $(USE_FST),0),vcd,fst) #---------------------------------------------------------------------------------------- # we initially had --noassert for performance, but several modules use # assertions, including dramsim, so we enable --assert by default +VERILATOR_OUTPUT_SPLIT ?= 10000 +VERILATOR_OUTPUT_SPLIT_CFUNCS ?= 100 + VERILATOR_OPT_FLAGS ?= \ - -O3 \ + -O0 \ --x-assign fast \ --x-initial fast \ - --output-split 10000 \ - --output-split-cfuncs 100 + --output-split $(VERILATOR_OUTPUT_SPLIT) \ + --output-split-cfuncs $(VERILATOR_OUTPUT_SPLIT_CFUNCS) \ + -DPRINTF_COND_=1 # default flags added for external IP (cva6/NVDLA) VERILOG_IP_VERILATOR_FLAGS := \ @@ -164,6 +169,7 @@ VERILATOR_NONCC_OPTS = \ VERILATOR_CXXFLAGS = \ $(SIM_CXXFLAGS) \ $(RUNTIME_PROFILING_CFLAGS) \ + -fno-inline \ -DVERILATOR VERILATOR_LDFLAGS = $(SIM_LDFLAGS) @@ -208,10 +214,10 @@ $(model_mk_debug): $(sim_common_files) $(EXTRA_SIM_REQS) # invoke make to make verilator sim rules ######################################################################################### $(sim): $(model_mk) $(dramsim_lib) - $(MAKE) VM_PARALLEL_BUILDS=1 -C $(model_dir) -f V$(TB).mk + $(MAKE) -j $(VERILATOR_MAKE_JOBS) VM_PARALLEL_BUILDS=1 -C $(model_dir) -f V$(TB).mk $(sim_debug): $(model_mk_debug) $(dramsim_lib) - $(MAKE) VM_PARALLEL_BUILDS=1 -C $(model_dir_debug) -f V$(TB).mk + $(MAKE) -j $(VERILATOR_MAKE_JOBS) VM_PARALLEL_BUILDS=1 -C $(model_dir_debug) -f V$(TB).mk ######################################################################################### # create a verilator vpd rule diff --git a/variables.mk b/variables.mk index 8b5a3647..c6067119 100644 --- a/variables.mk +++ b/variables.mk @@ -285,6 +285,10 @@ get_out_name = $(subst $() $(),_,$(notdir $(basename $(1)))) LOADMEM ?= LOADARCH ?= +ifeq ($(CONFIG),VirgoBlackwellConfig) +override LOADMEM = 1 +endif + ifneq ($(LOADARCH),) override BINARY = $(addsuffix /mem.elf,$(LOADARCH)) override BINARIES = $(addsuffix /mem.elf,$(LOADARCH))