Add Blackwell tensor core support to Chipyard
Some checks failed
update-circt / update-circt (push) Has been cancelled

- Update RadianceConfigs.scala with Blackwell configurations
- Update Verilator Makefile with optimized build flags
- Update submodules: radiance (Blackwell implementation), gemmini (params update)
- Update build flags and gitignore
This commit is contained in:
2026-05-06 14:52:08 +08:00
parent f8c98496f5
commit ec349a854f
8 changed files with 24 additions and 10 deletions

4
.gitignore vendored
View File

@@ -28,3 +28,7 @@ project/project/
.sbt .sbt
.classpath_cache/ .classpath_cache/
.vscode/ .vscode/
**/*.o
**/*.fir
**/*.d
test_run_dir/

View File

@@ -88,7 +88,7 @@ class Radiance4CFP16ClusterConfig extends Config(
class RadianceBlackwellClusterConfig extends Config( class RadianceBlackwellClusterConfig extends Config(
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++ new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, tensorCoreBlackwell = true, useVxCache = false) ++ new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, tensorCoreBlackwell = true, startupAddress = BigInt("80000000", 16), useVxCache = false) ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++ new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++ new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++ new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++

View File

@@ -1,7 +1,7 @@
#---------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------
# common gcc configuration/optimization # common gcc configuration/optimization
#---------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------
SIM_OPT_CXXFLAGS := -O3 SIM_OPT_CXXFLAGS := -O0
LRISCV=-lriscv LRISCV=-lriscv
export USE_CHISEL6=1 export USE_CHISEL6=1

View File

@@ -91,6 +91,7 @@ RUNTIME_PROFILING_VFLAGS := $(if $(filter $(VERILATOR_PROFILE),all),\
VERILATOR_THREADS ?= 1 VERILATOR_THREADS ?= 1
RUNTIME_THREADS := --threads $(VERILATOR_THREADS) --threads-dpi all RUNTIME_THREADS := --threads $(VERILATOR_THREADS) --threads-dpi all
VERILATOR_MAKE_JOBS ?= 4
USE_FST ?= 0 USE_FST ?= 0
TRACING_OPTS := $(if $(filter $(USE_FST),0),\ TRACING_OPTS := $(if $(filter $(USE_FST),0),\
@@ -103,12 +104,16 @@ get_waveform_flag = +vcdfile=$(1).$(if $(filter $(USE_FST),0),vcd,fst)
#---------------------------------------------------------------------------------------- #----------------------------------------------------------------------------------------
# we initially had --noassert for performance, but several modules use # we initially had --noassert for performance, but several modules use
# assertions, including dramsim, so we enable --assert by default # assertions, including dramsim, so we enable --assert by default
VERILATOR_OUTPUT_SPLIT ?= 10000
VERILATOR_OUTPUT_SPLIT_CFUNCS ?= 100
VERILATOR_OPT_FLAGS ?= \ VERILATOR_OPT_FLAGS ?= \
-O3 \ -O0 \
--x-assign fast \ --x-assign fast \
--x-initial fast \ --x-initial fast \
--output-split 10000 \ --output-split $(VERILATOR_OUTPUT_SPLIT) \
--output-split-cfuncs 100 --output-split-cfuncs $(VERILATOR_OUTPUT_SPLIT_CFUNCS) \
-DPRINTF_COND_=1
# default flags added for external IP (cva6/NVDLA) # default flags added for external IP (cva6/NVDLA)
VERILOG_IP_VERILATOR_FLAGS := \ VERILOG_IP_VERILATOR_FLAGS := \
@@ -164,6 +169,7 @@ VERILATOR_NONCC_OPTS = \
VERILATOR_CXXFLAGS = \ VERILATOR_CXXFLAGS = \
$(SIM_CXXFLAGS) \ $(SIM_CXXFLAGS) \
$(RUNTIME_PROFILING_CFLAGS) \ $(RUNTIME_PROFILING_CFLAGS) \
-fno-inline \
-DVERILATOR -DVERILATOR
VERILATOR_LDFLAGS = $(SIM_LDFLAGS) VERILATOR_LDFLAGS = $(SIM_LDFLAGS)
@@ -208,10 +214,10 @@ $(model_mk_debug): $(sim_common_files) $(EXTRA_SIM_REQS)
# invoke make to make verilator sim rules # invoke make to make verilator sim rules
######################################################################################### #########################################################################################
$(sim): $(model_mk) $(dramsim_lib) $(sim): $(model_mk) $(dramsim_lib)
$(MAKE) VM_PARALLEL_BUILDS=1 -C $(model_dir) -f V$(TB).mk $(MAKE) -j $(VERILATOR_MAKE_JOBS) VM_PARALLEL_BUILDS=1 -C $(model_dir) -f V$(TB).mk
$(sim_debug): $(model_mk_debug) $(dramsim_lib) $(sim_debug): $(model_mk_debug) $(dramsim_lib)
$(MAKE) VM_PARALLEL_BUILDS=1 -C $(model_dir_debug) -f V$(TB).mk $(MAKE) -j $(VERILATOR_MAKE_JOBS) VM_PARALLEL_BUILDS=1 -C $(model_dir_debug) -f V$(TB).mk
######################################################################################### #########################################################################################
# create a verilator vpd rule # create a verilator vpd rule

View File

@@ -285,6 +285,10 @@ get_out_name = $(subst $() $(),_,$(notdir $(basename $(1))))
LOADMEM ?= LOADMEM ?=
LOADARCH ?= LOADARCH ?=
ifeq ($(CONFIG),VirgoBlackwellConfig)
override LOADMEM = 1
endif
ifneq ($(LOADARCH),) ifneq ($(LOADARCH),)
override BINARY = $(addsuffix /mem.elf,$(LOADARCH)) override BINARY = $(addsuffix /mem.elf,$(LOADARCH))
override BINARIES = $(addsuffix /mem.elf,$(LOADARCH)) override BINARIES = $(addsuffix /mem.elf,$(LOADARCH))