Compare commits
2 Commits
main
...
asc26-plan
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f62e75245 | |||
| 3806e82c47 |
@@ -312,8 +312,8 @@
|
|||||||
end function decide3d
|
end function decide3d
|
||||||
|
|
||||||
!---------------------------------------------------------------------------------------
|
!---------------------------------------------------------------------------------------
|
||||||
subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
!~~~~~~> input arguments
|
!~~~~~~> input arguments
|
||||||
integer,intent(in) :: ord
|
integer,intent(in) :: ord
|
||||||
@@ -322,9 +322,12 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
|||||||
real*8, dimension(-ord+1:extc(1),-ord+1:extc(2),-ord+1:extc(3)),intent(out):: funcc
|
real*8, dimension(-ord+1:extc(1),-ord+1:extc(2),-ord+1:extc(3)),intent(out):: funcc
|
||||||
real*8, dimension(1:3), intent(in) :: SoA
|
real*8, dimension(1:3), intent(in) :: SoA
|
||||||
|
|
||||||
integer::i
|
integer::i
|
||||||
|
|
||||||
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
#if USE_FMISC_SAFE_MODE
|
||||||
|
funcc = 0.d0
|
||||||
|
#endif
|
||||||
|
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
||||||
enddo
|
enddo
|
||||||
@@ -337,8 +340,8 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
|||||||
|
|
||||||
end subroutine symmetry_bd
|
end subroutine symmetry_bd
|
||||||
|
|
||||||
subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
|
subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
!~~~~~~> input arguments
|
!~~~~~~> input arguments
|
||||||
integer,intent(in) :: ord
|
integer,intent(in) :: ord
|
||||||
@@ -347,9 +350,12 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
|
|||||||
real*8, dimension(-ord+1:extc(1)+ord,-ord+1:extc(2)+ord,-ord+1:extc(3)+ord),intent(out):: funcc
|
real*8, dimension(-ord+1:extc(1)+ord,-ord+1:extc(2)+ord,-ord+1:extc(3)+ord),intent(out):: funcc
|
||||||
real*8, dimension(1:3), intent(in) :: SoA
|
real*8, dimension(1:3), intent(in) :: SoA
|
||||||
|
|
||||||
integer::i
|
integer::i
|
||||||
|
|
||||||
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
#if USE_FMISC_SAFE_MODE
|
||||||
|
funcc = 0.d0
|
||||||
|
#endif
|
||||||
|
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
||||||
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
|
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
|
||||||
@@ -365,8 +371,8 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
|
|||||||
|
|
||||||
end subroutine symmetry_tbd
|
end subroutine symmetry_tbd
|
||||||
|
|
||||||
subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
|
subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
!~~~~~~> input arguments
|
!~~~~~~> input arguments
|
||||||
integer,intent(in) :: ord
|
integer,intent(in) :: ord
|
||||||
@@ -375,9 +381,12 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
|
|||||||
real*8, dimension(-ord+1:extc(1)+ord,-ord+1:extc(2)+ord,extc(3)),intent(out):: funcc
|
real*8, dimension(-ord+1:extc(1)+ord,-ord+1:extc(2)+ord,extc(3)),intent(out):: funcc
|
||||||
real*8, dimension(2), intent(in) :: SoA
|
real*8, dimension(2), intent(in) :: SoA
|
||||||
|
|
||||||
integer::i
|
integer::i
|
||||||
|
|
||||||
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
#if USE_FMISC_SAFE_MODE
|
||||||
|
funcc = 0.d0
|
||||||
|
#endif
|
||||||
|
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
|
||||||
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
|
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
|
||||||
@@ -1119,6 +1128,10 @@ end subroutine d2dump
|
|||||||
#define POLINT6_USE_BARYCENTRIC 1
|
#define POLINT6_USE_BARYCENTRIC 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef USE_FMISC_SAFE_MODE
|
||||||
|
#define USE_FMISC_SAFE_MODE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_neville
|
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_neville
|
||||||
subroutine polint6_neville(xa, ya, x, y, dy)
|
subroutine polint6_neville(xa, ya, x, y, dy)
|
||||||
implicit none
|
implicit none
|
||||||
@@ -1271,7 +1284,9 @@ end subroutine d2dump
|
|||||||
real*8 :: dif, dift, hp, h, den_val
|
real*8 :: dif, dift, hp, h, den_val
|
||||||
|
|
||||||
if (ordn == 6) then
|
if (ordn == 6) then
|
||||||
#if POLINT6_USE_BARYCENTRIC
|
#if USE_FMISC_SAFE_MODE
|
||||||
|
call polint6_neville(xa, ya, x, y, dy)
|
||||||
|
#elif POLINT6_USE_BARYCENTRIC
|
||||||
call polint6_barycentric(xa, ya, x, y, dy)
|
call polint6_barycentric(xa, ya, x, y, dy)
|
||||||
#else
|
#else
|
||||||
call polint6_neville(xa, ya, x, y, dy)
|
call polint6_neville(xa, ya, x, y, dy)
|
||||||
@@ -1376,10 +1391,10 @@ end subroutine d2dump
|
|||||||
real*8, intent(in) :: x1,x2
|
real*8, intent(in) :: x1,x2
|
||||||
real*8, intent(out) :: y,dy
|
real*8, intent(out) :: y,dy
|
||||||
|
|
||||||
#ifdef POLINT_LEGACY_ORDER
|
#if USE_FMISC_SAFE_MODE || defined(POLINT_LEGACY_ORDER)
|
||||||
integer :: i,m
|
integer :: i,m
|
||||||
real*8, dimension(ordn) :: ymtmp
|
real*8, dimension(ordn) :: ymtmp
|
||||||
real*8, dimension(ordn) :: yntmp
|
real*8, dimension(ordn) :: yntmp
|
||||||
|
|
||||||
m=size(x1a)
|
m=size(x1a)
|
||||||
do i=1,m
|
do i=1,m
|
||||||
@@ -1414,7 +1429,7 @@ end subroutine d2dump
|
|||||||
real*8, intent(in) :: x1,x2,x3
|
real*8, intent(in) :: x1,x2,x3
|
||||||
real*8, intent(out) :: y,dy
|
real*8, intent(out) :: y,dy
|
||||||
|
|
||||||
#ifdef POLINT_LEGACY_ORDER
|
#if USE_FMISC_SAFE_MODE || defined(POLINT_LEGACY_ORDER)
|
||||||
integer :: i,j,m,n
|
integer :: i,j,m,n
|
||||||
real*8, dimension(ordn,ordn) :: yatmp
|
real*8, dimension(ordn,ordn) :: yatmp
|
||||||
real*8, dimension(ordn) :: ymtmp
|
real*8, dimension(ordn) :: ymtmp
|
||||||
@@ -1502,12 +1517,23 @@ if(dabs(X(1)-xmin) < dX) imin = 1
|
|||||||
if(dabs(Y(1)-ymin) < dY) jmin = 1
|
if(dabs(Y(1)-ymin) < dY) jmin = 1
|
||||||
if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
||||||
|
|
||||||
! Optimized with oneMKL BLAS DDOT for dot product
|
#if USE_FMISC_SAFE_MODE
|
||||||
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
f_out = 0.d0
|
||||||
allocate(f_flat(n_elements))
|
do k = kmin, kmax
|
||||||
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
|
do j = jmin, jmax
|
||||||
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
|
do i = imin, imax
|
||||||
deallocate(f_flat)
|
f_out = f_out + f(i,j,k)*f(i,j,k)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Optimized with oneMKL BLAS DDOT for dot product
|
||||||
|
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
||||||
|
allocate(f_flat(n_elements))
|
||||||
|
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
|
||||||
|
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
|
||||||
|
deallocate(f_flat)
|
||||||
|
#endif
|
||||||
|
|
||||||
f_out = f_out*dX*dY*dZ
|
f_out = f_out*dX*dY*dZ
|
||||||
|
|
||||||
@@ -1565,7 +1591,9 @@ if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
|||||||
|
|
||||||
do k=kmin,kmax
|
do k=kmin,kmax
|
||||||
do j=jmin,jmax
|
do j=jmin,jmax
|
||||||
|
#if !USE_FMISC_SAFE_MODE
|
||||||
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
|
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
|
||||||
|
#endif
|
||||||
do i=imin,imax
|
do i=imin,imax
|
||||||
s1 = s1 + f1(i,j,k)*f1(i,j,k)
|
s1 = s1 + f1(i,j,k)*f1(i,j,k)
|
||||||
s2 = s2 + f2(i,j,k)*f2(i,j,k)
|
s2 = s2 + f2(i,j,k)*f2(i,j,k)
|
||||||
@@ -1672,12 +1700,23 @@ if(Symmetry==2)then
|
|||||||
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
|
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! Optimized with oneMKL BLAS DDOT for dot product
|
#if USE_FMISC_SAFE_MODE
|
||||||
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
f_out = 0.d0
|
||||||
allocate(f_flat(n_elements))
|
do k = kmin, kmax
|
||||||
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
|
do j = jmin, jmax
|
||||||
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
|
do i = imin, imax
|
||||||
deallocate(f_flat)
|
f_out = f_out + f(i,j,k)*f(i,j,k)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Optimized with oneMKL BLAS DDOT for dot product
|
||||||
|
n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
||||||
|
allocate(f_flat(n_elements))
|
||||||
|
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
|
||||||
|
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
|
||||||
|
deallocate(f_flat)
|
||||||
|
#endif
|
||||||
|
|
||||||
f_out = f_out*dX*dY*dZ
|
f_out = f_out*dX*dY*dZ
|
||||||
|
|
||||||
@@ -1769,12 +1808,23 @@ if(Symmetry==2)then
|
|||||||
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
|
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! Optimized with oneMKL BLAS DDOT for dot product
|
#if USE_FMISC_SAFE_MODE
|
||||||
Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
f_out = 0.d0
|
||||||
allocate(f_flat(Nout))
|
do k = kmin, kmax
|
||||||
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
|
do j = jmin, jmax
|
||||||
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
|
do i = imin, imax
|
||||||
deallocate(f_flat)
|
f_out = f_out + f(i,j,k)*f(i,j,k)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Optimized with oneMKL BLAS DDOT for dot product
|
||||||
|
Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
|
||||||
|
allocate(f_flat(Nout))
|
||||||
|
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
|
||||||
|
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
|
||||||
|
deallocate(f_flat)
|
||||||
|
#endif
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -1878,13 +1928,23 @@ deallocate(f_flat)
|
|||||||
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
|
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
|
||||||
integer :: i,j,k
|
integer :: i,j,k
|
||||||
|
|
||||||
|
#if USE_FMISC_SAFE_MODE
|
||||||
|
do k=1,ext(3)
|
||||||
|
do j=1,ext(2)
|
||||||
|
do i=1,ext(1)
|
||||||
|
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
#else
|
||||||
do concurrent (k=1:ext(3), j=1:ext(2), i=1:ext(1))
|
do concurrent (k=1:ext(3), j=1:ext(2), i=1:ext(1))
|
||||||
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
|
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
|
||||||
end do
|
end do
|
||||||
|
#endif
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
end subroutine average2
|
end subroutine average2
|
||||||
!-----------------------------------------------------------------------------
|
!-----------------------------------------------------------------------------
|
||||||
subroutine average2p(ext,f1,f2,f3,fout)
|
subroutine average2p(ext,f1,f2,f3,fout)
|
||||||
implicit none
|
implicit none
|
||||||
@@ -2024,8 +2084,15 @@ deallocate(f_flat)
|
|||||||
tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m)
|
tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! Third dimension: x-direction weighted sum using BLAS DDOT
|
#if USE_FMISC_SAFE_MODE
|
||||||
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
|
f_int = 0.d0
|
||||||
|
do m = 1, ORDN
|
||||||
|
f_int = f_int + coef(m) * tmp1(m)
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Third dimension: x-direction weighted sum using BLAS DDOT
|
||||||
|
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -2091,8 +2158,15 @@ deallocate(f_flat)
|
|||||||
tmp1 = tmp1 + coef(ORDN+m)*ya(:,m)
|
tmp1 = tmp1 + coef(ORDN+m)*ya(:,m)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! Use BLAS DDOT for final weighted sum
|
#if USE_FMISC_SAFE_MODE
|
||||||
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
|
f_int = 0.d0
|
||||||
|
do m = 1, ORDN
|
||||||
|
f_int = f_int + coef(m) * tmp1(m)
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Use BLAS DDOT for final weighted sum
|
||||||
|
f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -2184,8 +2258,15 @@ deallocate(f_flat)
|
|||||||
write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd
|
write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd
|
||||||
endif
|
endif
|
||||||
|
|
||||||
! Optimized with BLAS DDOT for weighted sum
|
#if USE_FMISC_SAFE_MODE
|
||||||
f_int = DDOT(ORDN, coef, 1, ya, 1)
|
f_int = 0.d0
|
||||||
|
do m = 1, ORDN
|
||||||
|
f_int = f_int + coef(m) * ya(m)
|
||||||
|
end do
|
||||||
|
#else
|
||||||
|
! Optimized with BLAS DDOT for weighted sum
|
||||||
|
f_int = DDOT(ORDN, coef, 1, ya, 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ EM_KERNEL_FLAG = -DBSSN_USE_EM_C_KERNEL=$(EFFECTIVE_USE_CXX_EM_KERNEL)
|
|||||||
## 0 : fallback to Neville path
|
## 0 : fallback to Neville path
|
||||||
POLINT6_USE_BARY ?= 1
|
POLINT6_USE_BARY ?= 1
|
||||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||||
|
FMISC_SAFE_FLAG = -DUSE_FMISC_SAFE_MODE=$(USE_FMISC_SAFE_MODE)
|
||||||
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(EFFECTIVE_USE_TRANSFER_CACHE)
|
||||||
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(EFFECTIVE_USE_CXX_ESCALAR_KERNEL)
|
||||||
|
|
||||||
@@ -65,22 +66,26 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
|||||||
|
|
||||||
ifeq ($(PGO_MODE),instrument)
|
ifeq ($(PGO_MODE),instrument)
|
||||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
CXXAPPFLAGS = -O3 -march=x86-64-v4 -fma -fprofile-instr-generate -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
|
$(FMISC_SAFE_FLAG) \
|
||||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
||||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
f90appflags = -O3 -march=x86-64-v4 -fma -fprofile-instr-generate -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) \
|
||||||
|
$(FMISC_SAFE_FLAG)
|
||||||
else
|
else
|
||||||
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||||
## PGO has been turned off, now tested and found to be negative optimization
|
## PGO has been turned off, now tested and found to be negative optimization
|
||||||
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||||
|
|
||||||
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||||
|
$(FMISC_SAFE_FLAG) \
|
||||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG) $(EM_KERNEL_FLAG)
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) \
|
||||||
|
$(FMISC_SAFE_FLAG)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
.SUFFIXES: .o .f90 .C .for .cu
|
.SUFFIXES: .o .f90 .C .for .cu
|
||||||
@@ -147,7 +152,7 @@ z4c_rhs_c.o: z4c_rhs_c.C
|
|||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
TP_OPTFLAGS = -O3 -march=x86-64-v4 -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(TP_PROFDATA) \
|
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
|
|
||||||
|
|||||||
@@ -44,20 +44,20 @@ INTERP_LB_FLAGS =
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
## Kernel implementation switch
|
## Kernel implementation switch
|
||||||
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
## 1 : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
||||||
## 0 : fall back to original Fortran kernels
|
## 0 (default): fall back to original Fortran kernels
|
||||||
USE_CXX_KERNELS ?= 1
|
USE_CXX_KERNELS ?= 0
|
||||||
|
|
||||||
## Z4C Cartesian RHS kernel switch
|
## Z4C Cartesian RHS kernel switch
|
||||||
## 1 (default) : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
## 1 : use C++ rewrite of Z4c_rhs (main Cartesian path faster)
|
||||||
## 0 : use original Fortran Z4c_rhs.o
|
## 0 (default): use original Fortran Z4c_rhs.o
|
||||||
USE_CXX_Z4C_KERNELS ?= 1
|
USE_CXX_Z4C_KERNELS ?= 0
|
||||||
|
|
||||||
## BSSN-EScalar RHS switch
|
## BSSN-EScalar RHS switch
|
||||||
## 1 (default) : use BSSN-EScalar C wrapper on the normal patch path
|
## 1 : use BSSN-EScalar C wrapper on the normal patch path
|
||||||
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
## 0 : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
||||||
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
||||||
USE_CXX_ESCALAR_KERNEL ?= 1
|
USE_CXX_ESCALAR_KERNEL ?= 0
|
||||||
|
|
||||||
## BSSN-EM RHS switch
|
## BSSN-EM RHS switch
|
||||||
## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path
|
## 1 : use BSSN-EM C kernel (bssn_em_rhs_c.C) on the normal patch path
|
||||||
@@ -72,9 +72,14 @@ USE_CXX_EM_KERNEL ?= 0
|
|||||||
USE_TRANSFER_CACHE ?= auto
|
USE_TRANSFER_CACHE ?= auto
|
||||||
|
|
||||||
## RK4 kernel implementation switch
|
## RK4 kernel implementation switch
|
||||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
## 1 : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||||
## 0 : use original Fortran rungekutta4_rout.o
|
## 0 (default): use original Fortran rungekutta4_rout.o
|
||||||
USE_CXX_RK4 ?= 1
|
USE_CXX_RK4 ?= 0
|
||||||
|
|
||||||
|
## fmisc conservative mode switch
|
||||||
|
## 1 : restore lower-optimization / legacy fmisc numerics
|
||||||
|
## 0 (default): keep the optimized fmisc paths
|
||||||
|
USE_FMISC_SAFE_MODE ?= 0
|
||||||
|
|
||||||
f90 = ifx
|
f90 = ifx
|
||||||
f77 = ifx
|
f77 = ifx
|
||||||
|
|||||||
Reference in New Issue
Block a user