Migrate build system from Intel oneAPI to AMD AOCC/AOCL/OpenMPI

Replace Intel compilers (ifx/icpx/icx) with AOCC (flang/clang++/clang),
Intel MPI (mpiicpx) with AOCC-built OpenMPI (mpicxx), and Intel MKL
with AOCL BLIS/libFLAME. Replace -xHost with -march=znver4, -ipo with
-flto, -fp-model fast=2 with -ffast-math, -qopenmp with -fopenmp.
Remove PGO, TBB allocator, and Intel-specific runtime libraries.
Fix MKL-specific includes in TwoPunctures.C and gaussj.C to use
standard CBLAS/LAPACKE headers from AOCL.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 02:19:00 +08:00
parent 0db537479b
commit 0992e2219a
4 changed files with 145 additions and 181 deletions

View File

@@ -27,7 +27,7 @@ using namespace std;
#endif #endif
#include "TwoPunctures.h" #include "TwoPunctures.h"
#include <mkl_cblas.h> #include <cblas.h>
TwoPunctures::TwoPunctures(double mp, double mm, double b, TwoPunctures::TwoPunctures(double mp, double mm, double b,
double P_plusx, double P_plusy, double P_plusz, double P_plusx, double P_plusy, double P_plusz,

View File

@@ -18,7 +18,7 @@ using namespace std;
#endif #endif
// Intel oneMKL LAPACK interface // Intel oneMKL LAPACK interface
#include <mkl_lapacke.h> #include <lapacke.h>
/* Linear equation solution using Intel oneMKL LAPACK. /* Linear equation solution using Intel oneMKL LAPACK.
a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input a[0..n-1][0..n-1] is the input matrix. b[0..n-1] is input
containing the right-hand side vectors. On output a is containing the right-hand side vectors. On output a is

View File

@@ -1,5 +1,5 @@
include makefile.inc include makefile.inc
-include AMSS_NCKU_build.mk -include AMSS_NCKU_build.mk
@@ -40,58 +40,40 @@ POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE) TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL) ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) ## AMD AOCC build flags optimized for EPYC Zen 4 (-march=znver4)
## make -> opt (PGO-guided, maximum performance) CXXAPPFLAGS = -O3 -march=znver4 -ffast-math -flto \
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data) -Dfortran3 -Dnewc -I$(AOCL_ROOT)/include $(INTERP_LB_FLAGS) \
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ f90appflags = -O3 -march=znver4 -ffast-math -flto \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -cpp -I$(AOCL_ROOT)/include $(POLINT6_FLAG)
else
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
## PGO has been turned off, now tested and found to be negative optimization
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
.SUFFIXES: .o .f90 .C .for .cu
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ .f90.o:
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \ $(f90) $(f90appflags) -c $< -o $@
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ .C.o:
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
endif
.for.o:
.SUFFIXES: .o .f90 .C .for .cu $(f77) -c $< -o $@
.f90.o: .cu.o:
$(f90) $(f90appflags) -c $< -o $@ $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
.C.o: # C rewrite of BSSN RHS kernel and helpers
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
.for.o:
$(f77) -c $< -o $@
.cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
# C rewrite of BSSN RHS kernel and helpers
bssn_rhs_c.o: bssn_rhs_c.C bssn_rhs_c.o: bssn_rhs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fderivs_c.o: fderivs_c.C fderivs_c.o: fderivs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
fdderivs_c.o: fdderivs_c.C fdderivs_c.o: fdderivs_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
kodiss_c.o: kodiss_c.C kodiss_c.o: kodiss_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
lopsided_c.o: lopsided_c.C lopsided_c.o: lopsided_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
@@ -100,22 +82,20 @@ lopsided_kodis_c.o: lopsided_kodis_c.C
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h #interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ # ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS ## TwoPunctureABE uses fixed optimal flags (AMD AOCC, no PGO)
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata TP_OPTFLAGS = -O3 -march=znver4 -ffast-math -flto \
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ -Dfortran3 -Dnewc -I$(AOCL_ROOT)/include
-fprofile-instr-use=$(TP_PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include TwoPunctures.o: TwoPunctures.C
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
TwoPunctures.o: TwoPunctures.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -fopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ # Input files
# Input files ## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
ifeq ($(USE_CXX_KERNELS),0) ifeq ($(USE_CXX_KERNELS),0)
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
CFILES = CFILES =
@@ -134,95 +114,95 @@ RK4_F90_OBJ =
else else
RK4_F90_OBJ = rungekutta4_rout.o RK4_F90_OBJ = rungekutta4_rout.o
endif endif
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o bssn_class.o surface_integral.o ShellPatch.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o surface_integral.o ShellPatch.o\ cgh.o surface_integral.o ShellPatch.o\
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
NullShellPatch2_Evo.o \ NullShellPatch2_Evo.o \
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\ F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
prolongrestrict_cell.o prolongrestrict_vertex.o\ prolongrestrict_cell.o prolongrestrict_vertex.o\
$(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\ $(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\ lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\ shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\ getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\ fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\
cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\ cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\
getnpem2.o empart.o NullNews.o fourdcurvature.o\ getnpem2.o empart.o NullNews.o fourdcurvature.o\
bssn2adm.o adm_constraint.o adm_ricci_gamma.o\ bssn2adm.o adm_constraint.o adm_ricci_gamma.o\
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\ scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
NullNews2.o tool_f.o NullNews2.o tool_f.o
ifeq ($(USE_CXX_KERNELS),0) ifeq ($(USE_CXX_KERNELS),0)
# Fortran mode: include original bssn_rhs.o # Fortran mode: include original bssn_rhs.o
F90FILES = $(F90FILES_BASE) bssn_rhs.o F90FILES = $(F90FILES_BASE) bssn_rhs.o
else else
# C++ mode (default): bssn_rhs.o replaced by C++ kernel # C++ mode (default): bssn_rhs.o replaced by C++ kernel
F90FILES = $(F90FILES_BASE) F90FILES = $(F90FILES_BASE)
endif endif
F77FILES = zbesh.o F77FILES = zbesh.o
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \ AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
tgrid.o fd_grid.o ghost_zone.o array.o round.o norm.o fuzzy.o error_exit.o miscfp.o \ tgrid.o fd_grid.o ghost_zone.o array.o round.o norm.o fuzzy.o error_exit.o miscfp.o \
linear_map.o cpm_map.o BH_diagnostics.o setup.o horizon_sequence.o find_horizons.o \ linear_map.o cpm_map.o BH_diagnostics.o setup.o horizon_sequence.o find_horizons.o \
initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o
TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
# file dependences # file dependences
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh $(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
rungekutta4_rout.h var.h bssn_class.h bssn_rhs.h sommerfeld_rout.h\ rungekutta4_rout.h var.h bssn_class.h bssn_rhs.h sommerfeld_rout.h\
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\ cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\ fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\ NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
initial_null2.h NullShellPatch2.h initial_null2.h NullShellPatch2.h
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\ rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\
cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\ cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\ fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\ NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
initial_null2.h NullShellPatch2.h \ initial_null2.h NullShellPatch2.h \
bssn_gpu_class.h bssn_macro.h bssn_gpu_class.h bssn_macro.h
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h $(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h $(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
TwoPunctureFILES: TwoPunctures.h TwoPunctureFILES: TwoPunctures.h
$(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
misc.o : zbesh.o misc.o : zbesh.o
# projects # projects
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES) TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) $(CLINKER) $(TP_OPTFLAGS) -fopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean: clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -1,33 +1,17 @@
## GCC version (commented out) ## AMD AOCC version with AOCL (Optimized for AMD EPYC Zen 4)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## Intel oneAPI version with oneMKL (Optimized for performance) ## AOCL root path for includes and libraries
filein = -I/usr/include/ -I${MKLROOT}/include AOCL_ROOT ?= /home/gh0s7/AOCC/aocl/5.2.0/aocc
## Using sequential MKL (OpenMP disabled for better single-threaded performance) ## AOCC-built OpenMPI prefix
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library OMPI_PREFIX ?= /home/gh0s7/AOCC/aocc-openmpi
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
## Memory allocator switch filein = -I/usr/include/ -I$(AOCL_ROOT)/include
## 1 (default) : link Intel oneTBB allocator (libtbbmalloc)
## 0 : use system default allocator (ptmalloc)
USE_TBBMALLOC ?= 1
TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
ifneq ($(wildcard $(TBBMALLOC_SO)),)
TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
else
TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
endif
ifeq ($(USE_TBBMALLOC),1)
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
endif
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags) ## Using AOCL BLIS + libFLAME for BLAS/LAPACK
## opt : (default) maximum performance with PGO profile-guided optimization ## AOCC Fortran runtime: -lflang (includes FortranRuntime)
## instrument : PGO Phase 1 instrumentation to collect fresh profile data ## AOCC OpenMP runtime: -lomp (LLVM OpenMP)
PGO_MODE ?= opt LDLIBS = -L$(AOCL_ROOT)/lib -lblis -lflame -lamdlibm -lflang -lpgmath -lpthread -lm -ldl -lomp
## Interp_Points load balance profiling mode ## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation ## off : (default) no load balance instrumentation
@@ -65,11 +49,11 @@ USE_TRANSFER_CACHE ?= auto
## 0 : use original Fortran rungekutta4_rout.o ## 0 : use original Fortran rungekutta4_rout.o
USE_CXX_RK4 ?= 1 USE_CXX_RK4 ?= 1
f90 = ifx f90 = flang
f77 = ifx f77 = flang
CXX = icpx CXX = clang++
CC = icx CC = clang
CLINKER = mpiicpx CLINKER = $(OMPI_PREFIX)/bin/mpicxx
Cu = nvcc Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include