Switch to NVIDIA HPC Toolchain
This commit is contained in:
@@ -1,28 +1,7 @@
|
||||
## GCC version (commented out)
|
||||
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
||||
|
||||
## Intel oneAPI version with oneMKL (Optimized for performance)
|
||||
filein = -I/usr/include/ -I${MKLROOT}/include
|
||||
|
||||
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
||||
|
||||
## Memory allocator switch
|
||||
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
||||
## 0 : use system default allocator (ptmalloc)
|
||||
USE_TBBMALLOC ?= 1
|
||||
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
||||
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
||||
else
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
||||
endif
|
||||
ifeq ($(USE_TBBMALLOC),1)
|
||||
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||
endif
|
||||
## Toolchain selection
|
||||
## nvhpc : NVIDIA HPC SDK + CUDA-aware MPI (default)
|
||||
## intel : Intel oneAPI toolchain (legacy path)
|
||||
TOOLCHAIN ?= nvhpc
|
||||
|
||||
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
||||
## opt : (default) maximum performance with PGO profile-guided optimization
|
||||
@@ -43,6 +22,14 @@ else
|
||||
INTERP_LB_FLAGS =
|
||||
endif
|
||||
|
||||
MKLROOT ?= /home/intel/oneapi/mkl/latest
|
||||
MKL_LIBDIR ?= $(MKLROOT)/lib/intel64
|
||||
MKL_INC ?= -I$(MKLROOT)/include
|
||||
|
||||
NVHPC_ROOT ?= /home/nvidia/hpc_sdk/Linux_x86_64/25.11
|
||||
CUDA_HOME ?= $(NVHPC_ROOT)/cuda
|
||||
CUDA_ARCH ?= sm_80
|
||||
|
||||
## Kernel implementation switch
|
||||
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
||||
## 0 : fall back to original Fortran kernels
|
||||
@@ -58,17 +45,47 @@ USE_CXX_Z4C_KERNELS ?= 1
|
||||
## 0 : use original Fortran rungekutta4_rout.o
|
||||
USE_CXX_RK4 ?= 1
|
||||
|
||||
## Memory allocator switch
|
||||
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
|
||||
## 0 : use system default allocator (ptmalloc)
|
||||
USE_TBBMALLOC ?= 1
|
||||
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
|
||||
ifneq ($(wildcard $(TBBMALLOC_SO)),)
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
|
||||
else
|
||||
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
|
||||
endif
|
||||
|
||||
ifeq ($(TOOLCHAIN),intel)
|
||||
f90 = ifx
|
||||
f77 = ifx
|
||||
CXX = icpx
|
||||
CC = icx
|
||||
CLINKER = mpiicpx
|
||||
filein = -I/usr/include/ $(MKL_INC) -I$(CUDA_HOME)/include
|
||||
LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \
|
||||
-lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
|
||||
-lifcore -limf -liomp5 -lpthread -lm -ldl \
|
||||
-L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart
|
||||
else ifeq ($(TOOLCHAIN),nvhpc)
|
||||
f90 = mpifort
|
||||
f77 = mpifort
|
||||
CXX = mpicxx
|
||||
CC = mpicc
|
||||
CLINKER = mpicxx
|
||||
|
||||
Cu = nvcc
|
||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||
CUDA_ARCH ?= sm_80
|
||||
ifneq ($(strip $(CUDA_ARCH)),)
|
||||
CUDA_APP_FLAGS += -arch=$(CUDA_ARCH)
|
||||
filein = -I/usr/include/ $(MKL_INC) -I$(CUDA_HOME)/include
|
||||
LDLIBS = -L$(MKL_LIBDIR) -Wl,-rpath,$(MKL_LIBDIR) \
|
||||
-lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
|
||||
-lpthread -lm -ldl \
|
||||
-L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcuda -lcudart \
|
||||
-fortranlibs
|
||||
endif
|
||||
|
||||
ifeq ($(USE_TBBMALLOC),1)
|
||||
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
|
||||
endif
|
||||
|
||||
Cu = $(NVHPC_ROOT)/compilers/bin/nvcc
|
||||
CUDA_LIB_PATH = -L$(CUDA_HOME)/lib64 -I$(CUDA_HOME)/include
|
||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc -arch=$(CUDA_ARCH)
|
||||
Reference in New Issue
Block a user