Prepare conservative plan-b build
This commit is contained in:
@@ -65,10 +65,10 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
|||||||
|
|
||||||
ifeq ($(PGO_MODE),instrument)
|
ifeq ($(PGO_MODE),instrument)
|
||||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
CXXAPPFLAGS = -O3 -march=x86-64-v4 -fma -fprofile-instr-generate -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
||||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
f90appflags = -O3 -march=x86-64-v4 -fma -fprofile-instr-generate -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
else
|
else
|
||||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||||
@@ -76,10 +76,10 @@ else
|
|||||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||||
|
|
||||||
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -147,7 +147,7 @@ z4c_rhs_c.o: z4c_rhs_c.C
|
|||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
TP_OPTFLAGS = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(TP_PROFDATA) \
|
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
|
|
||||||
|
|||||||
@@ -44,20 +44,20 @@ INTERP_LB_FLAGS =
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
## Kernel implementation switch
|
## Kernel implementation switch
|
||||||
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
## 1 : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
||||||
## 0 : fall back to original Fortran kernels
|
## 0 (default): fall back to original Fortran kernels
|
||||||
USE_CXX_KERNELS ?= 1
|
USE_CXX_KERNELS ?= 0
|
||||||
|
|
||||||
## Z4C Cartesian RHS kernel switch
|
## Z4C Cartesian RHS kernel switch
|
||||||
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
## 1 : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
||||||
## 0 : use original Fortran Z4c_rhs.o
|
## 0 (default): use original Fortran Z4c_rhs.o
|
||||||
USE_CXX_Z4C_KERNELS ?= 1
|
USE_CXX_Z4C_KERNELS ?= 0
|
||||||
|
|
||||||
## BSSN-EScalar RHS switch
|
## BSSN-EScalar RHS switch
|
||||||
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
|
## 1 : use BSSN-EScalar C wrapper on the normal patch path
|
||||||
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
||||||
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
||||||
USE_CXX_ESCALAR_KERNEL ?= 1
|
USE_CXX_ESCALAR_KERNEL ?= 0
|
||||||
|
|
||||||
## BSSN-EM RHS switch
|
## BSSN-EM RHS switch
|
||||||
## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path
|
## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path
|
||||||
@@ -72,9 +72,9 @@ USE_CXX_EM_KERNEL ?= 0
|
|||||||
USE_TRANSFER_CACHE ?= auto
|
USE_TRANSFER_CACHE ?= auto
|
||||||
|
|
||||||
## RK4 kernel implementation switch
|
## RK4 kernel implementation switch
|
||||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
## 1 : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||||
## 0 : use original Fortran rungekutta4_rout.o
|
## 0 (default): use original Fortran rungekutta4_rout.o
|
||||||
USE_CXX_RK4 ?= 1
|
USE_CXX_RK4 ?= 0
|
||||||
|
|
||||||
f90 = ifx
|
f90 = ifx
|
||||||
f77 = ifx
|
f77 = ifx
|
||||||
|
|||||||
Reference in New Issue
Block a user