Compare commits

...

4 Commits

14 changed files with 1800 additions and 655 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,5 @@
__pycache__ __pycache__
GW150914 GW150914
GW150914*
.codex
docs/

View File

@@ -9,9 +9,19 @@
################################################################## ##################################################################
################################################################## ##################################################################
## Print program introduction ## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script.
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
##################################################################
## Print program introduction
import print_information import print_information
@@ -422,31 +432,36 @@ print( " Plotting the txt and binary results data from the AMSS-NCKU simulation
print( ) print( )
import plot_xiaoqu import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
## Plot black hole trajectory
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory ) plot_tasks = []
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
## Plot black hole trajectory
## Plot black hole separation vs. time plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory ) plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
## Plot gravitational waveforms (psi4 and strain amplitude) ## Plot black hole separation vs. time
for i in range(input_data.Detector_Number): plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i ) ## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
## Plot ADM mass evolution plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
for i in range(input_data.Detector_Number): plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
## Plot ADM mass evolution
## Plot Hamiltonian constraint violation over time for i in range(input_data.Detector_Number):
for i in range(input_data.grid_level): plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
## Plot Hamiltonian constraint violation over time
## Plot stored binary data for i in range(input_data.grid_level):
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory ) plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
print( ) print( )
print( f" This Program Cost = {elapsed_time} Seconds " ) print( f" This Program Cost = {elapsed_time} Seconds " )

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,8 @@
#ifndef TWO_PUNCTURES_H #ifndef TWO_PUNCTURES_H
#define TWO_PUNCTURES_H #define TWO_PUNCTURES_H
#include <omp.h>
#define StencilSize 19 #define StencilSize 19
#define N_PlaneRelax 1 #define N_PlaneRelax 1
#define NRELAX 200 #define NRELAX 200
@@ -32,7 +33,7 @@ private:
int npoints_A, npoints_B, npoints_phi; int npoints_A, npoints_B, npoints_phi;
double target_M_plus, target_M_minus; double target_M_plus, target_M_minus;
double admMass; double admMass;
double adm_tol; double adm_tol;
@@ -42,6 +43,18 @@ private:
int ntotal; int ntotal;
// ===== Precomputed spectral derivative matrices =====
double *D1_A, *D2_A;
double *D1_B, *D2_B;
double *DF1_phi, *DF2_phi;
// ===== Pre-allocated workspace for LineRelax (per-thread) =====
int max_threads;
double **ws_diag_be, **ws_e_be, **ws_f_be, **ws_b_be, **ws_x_be;
double **ws_l_be, **ws_u_be, **ws_d_be, **ws_y_be;
double **ws_diag_al, **ws_e_al, **ws_f_al, **ws_b_al, **ws_x_al;
double **ws_l_al, **ws_u_al, **ws_d_al, **ws_y_al;
struct parameters struct parameters
{ {
int nvar, n1, n2, n3; int nvar, n1, n2, n3;
@@ -58,6 +71,28 @@ public:
int Newtonmaxit); int Newtonmaxit);
~TwoPunctures(); ~TwoPunctures();
// 02/07: New/modified methods
void allocate_workspace();
void free_workspace();
void precompute_derivative_matrices();
void build_cheb_deriv_matrices(int n, double *D1, double *D2);
void build_fourier_deriv_matrices(int N, double *DF1, double *DF2);
void Derivatives_AB3_MatMul(int nvar, int n1, int n2, int n3, derivs v);
void ThomasAlgorithm_ws(int N, double *b, double *a, double *c, double *x, double *q,
double *l, double *u_ws, double *d, double *y);
void LineRelax_be_omp(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD, int tid);
void LineRelax_al_omp(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD, int tid);
void relax_omp(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void Solve(); void Solve();
void set_initial_guess(derivs v); void set_initial_guess(derivs v);
int index(int i, int j, int k, int l, int a, int b, int c, int d); int index(int i, int j, int k, int l, int a, int b, int c, int d);
@@ -116,23 +151,11 @@ public:
double BY_KKofxyz(double x, double y, double z); double BY_KKofxyz(double x, double y, double z);
void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix); void SetMatrix_JFD(int nvar, int n1, int n2, int n3, derivs u, int *ncols, int **cols, double **Matrix);
void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u); void J_times_dv(int nvar, int n1, int n2, int n3, derivs dv, double *Jdv, derivs u);
void relax(double *dv, int const nvar, int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols, double **JFD);
void LineRelax_be(double *dv,
int const i, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols, int **cols,
double **JFD);
void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2, void JFD_times_dv(int i, int j, int k, int nvar, int n1, int n2,
int n3, derivs dv, derivs u, double *values); int n3, derivs dv, derivs u, double *values);
void LinEquations(double A, double B, double X, double R, void LinEquations(double A, double B, double X, double R,
double x, double r, double phi, double x, double r, double phi,
double y, double z, derivs dU, derivs U, double *values); double y, double z, derivs dU, derivs U, double *values);
void LineRelax_al(double *dv,
int const j, int const k, int const nvar,
int const n1, int const n2, int const n3,
double const *rhs, int const *ncols,
int **cols, double **JFD);
void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q); void ThomasAlgorithm(int N, double *b, double *a, double *c, double *x, double *q);
void Save(char *fname); void Save(char *fname);
// provided by Vasileios Paschalidis (vpaschal@illinois.edu) // provided by Vasileios Paschalidis (vpaschal@illinois.edu)
@@ -141,4 +164,4 @@ public:
void SpecCoef(parameters par, int ivar, double *v, double *cf); void SpecCoef(parameters par, int ivar, double *v, double *cf);
}; };
#endif /* TWO_PUNCTURES_H */ #endif /* TWO_PUNCTURES_H */

View File

@@ -17,50 +17,62 @@
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
real*8, dimension(ex(1),ex(2),ex(3)) :: trA,detg integer :: i,j,k
real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz real*8 :: lgxx,lgyy,lgzz,ldetg
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8 :: ltrA,lscale
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~>
!~~~~~~>
gxx = dxx + ONE
gyy = dyy + ONE do k=1,ex(3)
gzz = dzz + ONE do j=1,ex(2)
do i=1,ex(1)
detg = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz lgxx = dxx(i,j,k) + ONE
gupxx = ( gyy * gzz - gyz * gyz ) / detg lgyy = dyy(i,j,k) + ONE
gupxy = - ( gxy * gzz - gyz * gxz ) / detg lgzz = dzz(i,j,k) + ONE
gupxz = ( gxy * gyz - gyy * gxz ) / detg
gupyy = ( gxx * gzz - gxz * gxz ) / detg ldetg = lgxx * lgyy * lgzz &
gupyz = - ( gxx * gyz - gxy * gxz ) / detg + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
gupzz = ( gxx * gyy - gxy * gxy ) / detg + gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
- gxz(i,j,k) * lgyy * gxz(i,j,k) &
trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz & - gxy(i,j,k) * gxy(i,j,k) * lgzz &
+ TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz) - lgxx * gyz(i,j,k) * gyz(i,j,k)
Axx = Axx - F1o3 * gxx * trA lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
Axy = Axy - F1o3 * gxy * trA lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
Axz = Axz - F1o3 * gxz * trA lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
Ayy = Ayy - F1o3 * gyy * trA lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
Ayz = Ayz - F1o3 * gyz * trA lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
Azz = Azz - F1o3 * gzz * trA lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
detg = ONE / ( detg ** F1o3 ) ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
gxx = gxx * detg + TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
gxy = gxy * detg + lgupyz * Ayz(i,j,k))
gxz = gxz * detg
gyy = gyy * detg Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
gyz = gyz * detg Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
gzz = gzz * detg Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
dxx = gxx - ONE Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
dyy = gyy - ONE Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
dzz = gzz - ONE
lscale = ONE / ( ldetg ** F1o3 )
dxx(i,j,k) = lgxx * lscale - ONE
gxy(i,j,k) = gxy(i,j,k) * lscale
gxz(i,j,k) = gxz(i,j,k) * lscale
dyy(i,j,k) = lgyy * lscale - ONE
gyz(i,j,k) = gyz(i,j,k) * lscale
dzz(i,j,k) = lgzz * lscale - ONE
enddo
enddo
enddo
return return
@@ -81,52 +93,72 @@
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable: !~~~~~~~> Local variable:
real*8, dimension(ex(1),ex(2),ex(3)) :: trA integer :: i,j,k
real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz real*8 :: lgxx,lgyy,lgzz,lscale
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz,gupyy,gupyz,gupzz real*8 :: lgxy,lgxz,lgyz
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0 real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8 :: ltrA
!~~~~~~> real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
gxx = dxx + ONE !~~~~~~>
gyy = dyy + ONE
gzz = dzz + ONE do k=1,ex(3)
! for g do j=1,ex(2)
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - & do i=1,ex(1)
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
! for g: normalize determinant first
gupzz = ONE / ( gupzz ** F1o3 ) lgxx = dxx(i,j,k) + ONE
lgyy = dyy(i,j,k) + ONE
gxx = gxx * gupzz lgzz = dzz(i,j,k) + ONE
gxy = gxy * gupzz lgxy = gxy(i,j,k)
gxz = gxz * gupzz lgxz = gxz(i,j,k)
gyy = gyy * gupzz lgyz = gyz(i,j,k)
gyz = gyz * gupzz
gzz = gzz * gupzz lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
+ lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
dxx = gxx - ONE - lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
dyy = gyy - ONE
dzz = gzz - ONE lscale = ONE / ( lscale ** F1o3 )
! for A
lgxx = lgxx * lscale
gupxx = ( gyy * gzz - gyz * gyz ) lgxy = lgxy * lscale
gupxy = - ( gxy * gzz - gyz * gxz ) lgxz = lgxz * lscale
gupxz = ( gxy * gyz - gyy * gxz ) lgyy = lgyy * lscale
gupyy = ( gxx * gzz - gxz * gxz ) lgyz = lgyz * lscale
gupyz = - ( gxx * gyz - gxy * gxz ) lgzz = lgzz * lscale
gupzz = ( gxx * gyy - gxy * gxy )
dxx(i,j,k) = lgxx - ONE
trA = gupxx * Axx + gupyy * Ayy + gupzz * Azz & gxy(i,j,k) = lgxy
+ TWO * (gupxy * Axy + gupxz * Axz + gupyz * Ayz) gxz(i,j,k) = lgxz
dyy(i,j,k) = lgyy - ONE
Axx = Axx - F1o3 * gxx * trA gyz(i,j,k) = lgyz
Axy = Axy - F1o3 * gxy * trA dzz(i,j,k) = lgzz - ONE
Axz = Axz - F1o3 * gxz * trA
Ayy = Ayy - F1o3 * gyy * trA ! for A: trace-free using normalized metric (det=1, no division needed)
Ayz = Ayz - F1o3 * gyz * trA lgupxx = ( lgyy * lgzz - lgyz * lgyz )
Azz = Azz - F1o3 * gzz * trA lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
lgupxz = ( lgxy * lgyz - lgyy * lgxz )
lgupyy = ( lgxx * lgzz - lgxz * lgxz )
lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
lgupzz = ( lgxx * lgyy - lgxy * lgxy )
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
enddo
enddo
enddo
return return

View File

@@ -324,8 +324,7 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
enddo enddo
@@ -350,8 +349,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1) funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
@@ -379,8 +377,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+2,1:extc(2),1:extc(3))*SoA(1)
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1) funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-1-i,1:extc(2),1:extc(3))*SoA(1)
@@ -886,17 +883,20 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 !DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
funcc(1:extc(1),1:extc(2),1:extc(3)) = func funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 !DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) do i=0,ord-1
enddo funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
do i=0,ord-1 enddo
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2) !DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
enddo do i=0,ord-1
do i=0,ord-1 funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3) enddo
enddo !DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
do i=0,ord-1
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
enddo
end subroutine symmetry_bd end subroutine symmetry_bd
@@ -912,8 +912,7 @@ subroutine symmetry_tbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-i,1:extc(2),1:extc(3))*SoA(1) funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-i,1:extc(2),1:extc(3))*SoA(1)
@@ -941,8 +940,7 @@ subroutine symmetry_stbd(ord,extc,func,funcc,SoA)
integer::i integer::i
funcc = 0.d0 funcc(1:extc(1),1:extc(2),1:extc(3)) = func
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
do i=0,ord-1 do i=0,ord-1
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1) funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-i,1:extc(2),1:extc(3))*SoA(1) funcc(extc(1)+1+i,1:extc(2),1:extc(3)) = funcc(extc(1)-i,1:extc(2),1:extc(3))*SoA(1)
@@ -1113,151 +1111,353 @@ end subroutine d2dump
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
! common code for cell and vertex ! common code for cell and vertex
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
! Lagrangian polynomial interpolation ! Lagrangian polynomial interpolation
!------------------------------------------------------------------------------ !------------------------------------------------------------------------------
#ifndef POLINT6_USE_BARYCENTRIC
subroutine polint(xa,ya,x,y,dy,ordn) #define POLINT6_USE_BARYCENTRIC 1
#endif
implicit none
!DIR$ ATTRIBUTES FORCEINLINE :: polint6_neville
!~~~~~~> Input Parameter: subroutine polint6_neville(xa, ya, x, y, dy)
integer,intent(in) :: ordn implicit none
real*8, dimension(ordn), intent(in) :: xa,ya
real*8, intent(in) :: x real*8, dimension(6), intent(in) :: xa, ya
real*8, intent(out) :: y,dy real*8, intent(in) :: x
real*8, intent(out) :: y, dy
!~~~~~~> Other parameter:
integer :: i, m, ns, n_m
integer :: m,n,ns real*8, dimension(6) :: c, d, ho
real*8, dimension(ordn) :: c,d,den,ho real*8 :: dif, dift, hp, h, den_val
real*8 :: dif,dift
c = ya
!~~~~~~> d = ya
ho = xa - x
n=ordn
m=ordn ns = 1
dif = abs(x - xa(1))
c=ya
d=ya do i = 2, 6
ho=xa-x dift = abs(x - xa(i))
if (dift < dif) then
ns=1 ns = i
dif=abs(x-xa(1)) dif = dift
do m=1,n end if
dift=abs(x-xa(m)) end do
if(dift < dif) then
ns=m y = ya(ns)
dif=dift ns = ns - 1
end if
end do do m = 1, 5
n_m = 6 - m
y=ya(ns) do i = 1, n_m
ns=ns-1 hp = ho(i)
do m=1,n-1 h = ho(i+m)
den(1:n-m)=ho(1:n-m)-ho(1+m:n) den_val = hp - h
if (any(den(1:n-m) == 0.0))then
write(*,*) 'failure in polint for point',x if (den_val == 0.0d0) then
write(*,*) 'with input points: ',xa write(*,*) 'failure in polint for point',x
stop write(*,*) 'with input points: ',xa
endif stop
den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m) end if
d(1:n-m)=ho(1+m:n)*den(1:n-m)
c(1:n-m)=ho(1:n-m)*den(1:n-m) den_val = (c(i+1) - d(i)) / den_val
if (2*ns < n-m) then
dy=c(ns+1) d(i) = h * den_val
else c(i) = hp * den_val
dy=d(ns) end do
ns=ns-1
end if if (2 * ns < n_m) then
y=y+dy dy = c(ns + 1)
end do else
dy = d(ns)
return ns = ns - 1
end if
end subroutine polint y = y + dy
!------------------------------------------------------------------------------ end do
!
! interpolation in 2 dimensions, follow yx order return
! end subroutine polint6_neville
!------------------------------------------------------------------------------
subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) !DIR$ ATTRIBUTES FORCEINLINE :: polint6_barycentric
subroutine polint6_barycentric(xa, ya, x, y, dy)
implicit none implicit none
!~~~~~~> Input parameters: real*8, dimension(6), intent(in) :: xa, ya
integer,intent(in) :: ordn real*8, intent(in) :: x
real*8, dimension(1:ordn), intent(in) :: x1a,x2a real*8, intent(out) :: y, dy
real*8, dimension(1:ordn,1:ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2 integer :: i, j
real*8, intent(out) :: y,dy logical :: is_uniform
real*8, dimension(6) :: lambda
!~~~~~~> Other parameters: real*8 :: dx, den_i, term, num, den, step, tol
real*8, parameter :: c_uniform(6) = (/ -1.d0, 5.d0, -10.d0, 10.d0, -5.d0, 1.d0 /)
integer :: i,m
real*8, dimension(ordn) :: ymtmp do i = 1, 6
real*8, dimension(ordn) :: yntmp if (x == xa(i)) then
y = ya(i)
m=size(x1a) dy = 0.d0
return
do i=1,m end if
end do
yntmp=ya(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) step = xa(2) - xa(1)
is_uniform = (step /= 0.d0)
end do if (is_uniform) then
tol = 64.d0 * epsilon(1.d0) * max(1.d0, abs(step))
call polint(x1a,ymtmp,x1,y,dy,ordn) do i = 3, 6
if (abs((xa(i) - xa(i-1)) - step) > tol) then
return is_uniform = .false.
exit
end subroutine polin2 end if
!------------------------------------------------------------------------------ end do
! end if
! interpolation in 3 dimensions, follow zyx order
! if (is_uniform) then
!------------------------------------------------------------------------------ num = 0.d0
subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) den = 0.d0
do i = 1, 6
implicit none term = c_uniform(i) / (x - xa(i))
num = num + term * ya(i)
!~~~~~~> Input parameters: den = den + term
integer,intent(in) :: ordn end do
real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a y = num / den
real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya dy = 0.d0
real*8, intent(in) :: x1,x2,x3 return
real*8, intent(out) :: y,dy end if
!~~~~~~> Other parameters: do i = 1, 6
den_i = 1.d0
integer :: i,j,m,n do j = 1, 6
real*8, dimension(ordn,ordn) :: yatmp if (j /= i) then
real*8, dimension(ordn) :: ymtmp dx = xa(i) - xa(j)
real*8, dimension(ordn) :: yntmp if (dx == 0.0d0) then
real*8, dimension(ordn) :: yqtmp write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
m=size(x1a) stop
n=size(x2a) end if
den_i = den_i * dx
do i=1,m end if
do j=1,n end do
lambda(i) = 1.d0 / den_i
yqtmp=ya(i,j,:) end do
call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
num = 0.d0
end do den = 0.d0
do i = 1, 6
yntmp=yatmp(i,:) term = lambda(i) / (x - xa(i))
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) num = num + term * ya(i)
den = den + term
end do end do
call polint(x1a,ymtmp,x1,y,dy,ordn) y = num / den
dy = 0.d0
return
return
end subroutine polin3 end subroutine polint6_barycentric
!DIR$ ATTRIBUTES FORCEINLINE :: polint
subroutine polint(xa, ya, x, y, dy, ordn)
implicit none
integer, intent(in) :: ordn
real*8, dimension(ordn), intent(in) :: xa, ya
real*8, intent(in) :: x
real*8, intent(out) :: y, dy
integer :: i, m, ns, n_m
real*8, dimension(ordn) :: c, d, ho
real*8 :: dif, dift, hp, h, den_val
if (ordn == 6) then
#if POLINT6_USE_BARYCENTRIC
call polint6_barycentric(xa, ya, x, y, dy)
#else
call polint6_neville(xa, ya, x, y, dy)
#endif
return
end if
c = ya
d = ya
ho = xa - x
ns = 1
dif = abs(x - xa(1))
do i = 2, ordn
dift = abs(x - xa(i))
if (dift < dif) then
ns = i
dif = dift
end if
end do
y = ya(ns)
ns = ns - 1
do m = 1, ordn - 1
n_m = ordn - m
do i = 1, n_m
hp = ho(i)
h = ho(i+m)
den_val = hp - h
if (den_val == 0.0d0) then
write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
stop
end if
den_val = (c(i+1) - d(i)) / den_val
d(i) = h * den_val
c(i) = hp * den_val
end do
if (2 * ns < n_m) then
dy = c(ns + 1)
else
dy = d(ns)
ns = ns - 1
end if
y = y + dy
end do
return
end subroutine polint
!------------------------------------------------------------------------------
! Compute Lagrange interpolation basis weights for one target point.
!------------------------------------------------------------------------------
!DIR$ ATTRIBUTES FORCEINLINE :: polint_lagrange_weights
subroutine polint_lagrange_weights(xa, x, w, ordn)
implicit none
integer, intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: xa
real*8, intent(in) :: x
real*8, dimension(1:ordn), intent(out) :: w
integer :: i, j
real*8 :: num, den, dx
do i = 1, ordn
num = 1.d0
den = 1.d0
do j = 1, ordn
if (j /= i) then
dx = xa(i) - xa(j)
if (dx == 0.0d0) then
write(*,*) 'failure in polint for point',x
write(*,*) 'with input points: ',xa
stop
end if
num = num * (x - xa(j))
den = den * dx
end if
end do
w(i) = num / den
end do
return
end subroutine polint_lagrange_weights
!------------------------------------------------------------------------------
!
! interpolation in 2 dimensions, follow yx order
!
!------------------------------------------------------------------------------
subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
implicit none
integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a
real*8, dimension(1:ordn,1:ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2
real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER
integer :: i,m
real*8, dimension(ordn) :: ymtmp
real*8, dimension(ordn) :: yntmp
m=size(x1a)
do i=1,m
yntmp=ya(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: j
real*8, dimension(ordn) :: ymtmp
real*8 :: dy_temp
do j=1,ordn
call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn)
end do
call polint(x2a, ymtmp, x2, y, dy, ordn)
#endif
return
end subroutine polin2
!------------------------------------------------------------------------------
!
! interpolation in 3 dimensions, follow zyx order
!
!------------------------------------------------------------------------------
subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn)
implicit none
integer,intent(in) :: ordn
real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a
real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya
real*8, intent(in) :: x1,x2,x3
real*8, intent(out) :: y,dy
#ifdef POLINT_LEGACY_ORDER
integer :: i,j,m,n
real*8, dimension(ordn,ordn) :: yatmp
real*8, dimension(ordn) :: ymtmp
real*8, dimension(ordn) :: yntmp
real*8, dimension(ordn) :: yqtmp
m=size(x1a)
n=size(x2a)
do i=1,m
do j=1,n
yqtmp=ya(i,j,:)
call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
end do
yntmp=yatmp(i,:)
call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
end do
call polint(x1a,ymtmp,x1,y,dy,ordn)
#else
integer :: i, j, k
real*8, dimension(ordn) :: w1, w2
real*8, dimension(ordn) :: ymtmp
real*8 :: yx_sum, x_sum
call polint_lagrange_weights(x1a, x1, w1, ordn)
call polint_lagrange_weights(x2a, x2, w2, ordn)
do k = 1, ordn
yx_sum = 0.d0
do j = 1, ordn
x_sum = 0.d0
do i = 1, ordn
x_sum = x_sum + w1(i) * ya(i,j,k)
end do
yx_sum = yx_sum + w2(j) * x_sum
end do
ymtmp(k) = yx_sum
end do
call polint(x3a, ymtmp, x3, y, dy, ordn)
#endif
return
end subroutine polin3
!-------------------------------------------------------------------------------------- !--------------------------------------------------------------------------------------
! calculate L2norm ! calculate L2norm
subroutine l2normhelper(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,& subroutine l2normhelper(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
@@ -1276,7 +1476,9 @@ end subroutine d2dump
real*8 :: dX, dY, dZ real*8 :: dX, dY, dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k integer::i,j,k,n_elements
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
dX = X(2) - X(1) dX = X(2) - X(1)
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
@@ -1300,15 +1502,91 @@ if(dabs(X(1)-xmin) < dX) imin = 1
if(dabs(Y(1)-ymin) < dY) jmin = 1 if(dabs(Y(1)-ymin) < dY) jmin = 1
if(dabs(Z(1)-zmin) < dZ) kmin = 1 if(dabs(Z(1)-zmin) < dZ) kmin = 1
f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax)) n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ f_out = f_out*dX*dY*dZ
return return
end subroutine l2normhelper end subroutine l2normhelper
!-------------------------------------------------------------------------------------- !--------------------------------------------------------------------------------------
! calculate L2norm especially for shell Blocks subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
f1,f2,f3,f4,f5,f6,f7,f_out,gw)
implicit none
!~~~~~~> Input parameters:
integer,intent(in ):: ex(1:3)
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax
integer,intent(in)::gw
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7
real*8, intent(out) :: f_out(7)
!~~~~~~> Other variables:
real*8 :: dX, dY, dZ
integer::imin,jmin,kmin
integer::imax,jmax,kmax
integer::i,j,k
real*8 :: s1,s2,s3,s4,s5,s6,s7
dX = X(2) - X(1)
dY = Y(2) - Y(1)
dZ = Z(2) - Z(1)
imin = gw+1
jmin = gw+1
kmin = gw+1
imax = ex(1) - gw
jmax = ex(2) - gw
kmax = ex(3) - gw
if(dabs(X(ex(1))-xmax) < dX) imax = ex(1)
if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2)
if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3)
if(dabs(X(1)-xmin) < dX) imin = 1
if(dabs(Y(1)-ymin) < dY) jmin = 1
if(dabs(Z(1)-zmin) < dZ) kmin = 1
s1 = 0.d0
s2 = 0.d0
s3 = 0.d0
s4 = 0.d0
s5 = 0.d0
s6 = 0.d0
s7 = 0.d0
do k=kmin,kmax
do j=jmin,jmax
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
do i=imin,imax
s1 = s1 + f1(i,j,k)*f1(i,j,k)
s2 = s2 + f2(i,j,k)*f2(i,j,k)
s3 = s3 + f3(i,j,k)*f3(i,j,k)
s4 = s4 + f4(i,j,k)*f4(i,j,k)
s5 = s5 + f5(i,j,k)*f5(i,j,k)
s6 = s6 + f6(i,j,k)*f6(i,j,k)
s7 = s7 + f7(i,j,k)*f7(i,j,k)
enddo
enddo
enddo
f_out(1) = s1*dX*dY*dZ
f_out(2) = s2*dX*dY*dZ
f_out(3) = s3*dX*dY*dZ
f_out(4) = s4*dX*dY*dZ
f_out(5) = s5*dX*dY*dZ
f_out(6) = s6*dX*dY*dZ
f_out(7) = s7*dX*dY*dZ
return
end subroutine l2normhelper7
!--------------------------------------------------------------------------------------
! calculate L2norm especially for shell Blocks
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,& subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
f,f_out,gw,ogw,Symmetry) f,f_out,gw,ogw,Symmetry)
@@ -1325,7 +1603,9 @@ f_out = f_out*dX*dY*dZ
real*8 :: dX, dY, dZ real*8 :: dX, dY, dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k integer::i,j,k,n_elements
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
real*8 :: PIo4 real*8 :: PIo4
@@ -1388,7 +1668,11 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1 if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif endif
f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax)) n_elements = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(n_elements))
f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [n_elements])
f_out = DDOT(n_elements, f_flat, 1, f_flat, 1)
deallocate(f_flat)
f_out = f_out*dX*dY*dZ f_out = f_out*dX*dY*dZ
@@ -1415,7 +1699,9 @@ f_out = f_out*dX*dY*dZ
real*8 :: dX, dY, dZ real*8 :: dX, dY, dZ
integer::imin,jmin,kmin integer::imin,jmin,kmin
integer::imax,jmax,kmax integer::imax,jmax,kmax
integer::i,j,k integer::i,j,k
real*8, dimension(:), allocatable :: f_flat
real*8, external :: DDOT
real*8 :: PIo4 real*8 :: PIo4
@@ -1478,11 +1764,11 @@ if(Symmetry==2)then
if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1 if(dabs(ymin+gw*dY)<dY.and.Y(1)<0.d0) jmin = gw+1
endif endif
f_out = sum(f(imin:imax,jmin:jmax,kmin:kmax)*f(imin:imax,jmin:jmax,kmin:kmax)) Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
allocate(f_flat(Nout))
f_out = f_out f_flat = reshape(f(imin:imax,jmin:jmax,kmin:kmax), [Nout])
f_out = DDOT(Nout, f_flat, 1, f_flat, 1)
Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1) deallocate(f_flat)
return return
@@ -1583,9 +1869,12 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
! ^ ! ^
! f=3/8*f_1 + 3/4*f_2 - 1/8*f_3 ! f=3/8*f_1 + 3/4*f_2 - 1/8*f_3
real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0 real*8,parameter::C1=3.d0/8.d0,C2=3.d0/4.d0,C3=-1.d0/8.d0
integer :: i,j,k
fout = C1*f1+C2*f2+C3*f3
do concurrent (k=1:ext(3), j=1:ext(2), i=1:ext(1))
fout(i,j,k) = C1*f1(i,j,k)+C2*f2(i,j,k)+C3*f3(i,j,k)
end do
return return
@@ -1679,7 +1968,8 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
real*8, dimension(ORDN,ORDN,ORDN) :: ya real*8, dimension(ORDN,ORDN,ORDN) :: ya
real*8, dimension(ORDN,ORDN) :: tmp2 real*8, dimension(ORDN,ORDN) :: tmp2
real*8, dimension(ORDN) :: tmp1 real*8, dimension(ORDN) :: tmp1
real*8, dimension(3) :: SoAh real*8, dimension(3) :: SoAh
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
cxB = inds+1 cxB = inds+1
@@ -1725,10 +2015,7 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m) tmp1 = tmp1 + coef(ORDN+m)*tmp2(:,m)
enddo enddo
f_int=0 f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
do m=1,ORDN
f_int = f_int + coef(m)*tmp1(m)
enddo
return return
@@ -1757,7 +2044,8 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
integer,dimension(2) :: cxB,cxT integer,dimension(2) :: cxB,cxT
real*8, dimension(ORDN,ORDN) :: ya real*8, dimension(ORDN,ORDN) :: ya
real*8, dimension(ORDN) :: tmp1 real*8, dimension(ORDN) :: tmp1
real*8, dimension(2) :: SoAh real*8, dimension(2) :: SoAh
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
cxB = inds(1:2)+1 cxB = inds(1:2)+1
@@ -1792,10 +2080,7 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
tmp1 = tmp1 + coef(ORDN+m)*ya(:,m) tmp1 = tmp1 + coef(ORDN+m)*ya(:,m)
enddo enddo
f_int=0 f_int = DDOT(ORDN, coef(1:ORDN), 1, tmp1, 1)
do m=1,ORDN
f_int = f_int + coef(m)*tmp1(m)
enddo
return return
@@ -1821,11 +2106,12 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
!~~~~~~> Other parameters: !~~~~~~> Other parameters:
real*8, dimension(-ORDN+1:ex(1)+ORDN,-ORDN+1:ex(2)+ORDN,ex(3)) :: fh real*8, dimension(-ORDN+1:ex(1)+ORDN,-ORDN+1:ex(2)+ORDN,ex(3)) :: fh
integer :: m integer :: m
integer :: cxB,cxT integer :: cxB,cxT
real*8, dimension(ORDN) :: ya real*8, dimension(ORDN) :: ya
real*8 :: SoAh real*8 :: SoAh
integer,dimension(3) :: inds integer,dimension(3) :: inds
real*8, external :: DDOT
! +1 because c++ gives 0 for first point ! +1 because c++ gives 0 for first point
inds = indsi + 1 inds = indsi + 1
@@ -1886,10 +2172,7 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd write(*,*)"error in global_interpind1d, not recognized dumyd = ",dumyd
endif endif
f_int=0 f_int = DDOT(ORDN, coef, 1, ya, 1)
do m=1,ORDN
f_int = f_int + coef(m)*ya(m)
enddo
return return
@@ -2125,20 +2408,28 @@ Nout = (imax-imin+1)*(jmax-jmin+1)*(kmax-kmin+1)
implicit none implicit none
integer,intent(in) :: N integer,intent(in) :: N
real*8 :: gont real*8 :: gont
integer :: i integer :: i
real*8, parameter, dimension(0:20) :: fact_table = [ &
1.d0, 1.d0, 2.d0, 6.d0, 24.d0, 120.d0, 720.d0, 5040.d0, 40320.d0, &
362880.d0, 3628800.d0, 39916800.d0, 479001600.d0, 6227020800.d0, &
87178291200.d0, 1307674368000.d0, 20922789888000.d0, &
355687428096000.d0, 6402373705728000.d0, 121645100408832000.d0, &
2432902008176640000.d0 ]
! sanity check ! sanity check
if(N < 0)then if(N < 0)then
write(*,*) "ffact: error input for factorial" write(*,*) "ffact: error input for factorial"
return gont = 1.d0
endif return
endif
gont = 1.d0
do i=1,N if(N <= 20)then
gont = gont*i gont = fact_table(N)
enddo else
gont = exp(log_gamma(dble(N+1)))
endif
return return

View File

@@ -12,9 +12,10 @@
#define f_global_interpind global_interpind #define f_global_interpind global_interpind
#define f_global_interpind2d global_interpind2d #define f_global_interpind2d global_interpind2d
#define f_global_interpind1d global_interpind1d #define f_global_interpind1d global_interpind1d
#define f_l2normhelper l2normhelper #define f_l2normhelper l2normhelper
#define f_l2normhelper_sh l2normhelper_sh #define f_l2normhelper7 l2normhelper7
#define f_l2normhelper_sh_rms l2normhelper_sh_rms #define f_l2normhelper_sh l2normhelper_sh
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
#define f_average average #define f_average average
#define f_average3 average3 #define f_average3 average3
#define f_average2 average2 #define f_average2 average2
@@ -41,9 +42,10 @@
#define f_global_interpind GLOBAL_INTERPIND #define f_global_interpind GLOBAL_INTERPIND
#define f_global_interpind2d GLOBAL_INTERPIND2D #define f_global_interpind2d GLOBAL_INTERPIND2D
#define f_global_interpind1d GLOBAL_INTERPIND1D #define f_global_interpind1d GLOBAL_INTERPIND1D
#define f_l2normhelper L2NORMHELPER #define f_l2normhelper L2NORMHELPER
#define f_l2normhelper_sh L2NORMHELPER_SH #define f_l2normhelper7 L2NORMHELPER7
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS #define f_l2normhelper_sh L2NORMHELPER_SH
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
#define f_average AVERAGE #define f_average AVERAGE
#define f_average3 AVERAGE3 #define f_average3 AVERAGE3
#define f_average2 AVERAGE2 #define f_average2 AVERAGE2
@@ -70,9 +72,10 @@
#define f_global_interpind global_interpind_ #define f_global_interpind global_interpind_
#define f_global_interpind2d global_interpind2d_ #define f_global_interpind2d global_interpind2d_
#define f_global_interpind1d global_interpind1d_ #define f_global_interpind1d global_interpind1d_
#define f_l2normhelper l2normhelper_ #define f_l2normhelper l2normhelper_
#define f_l2normhelper_sh l2normhelper_sh_ #define f_l2normhelper7 l2normhelper7_
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_ #define f_l2normhelper_sh l2normhelper_sh_
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
#define f_average average_ #define f_average average_
#define f_average3 average3_ #define f_average3 average3_
#define f_average2 average2_ #define f_average2 average2_
@@ -156,21 +159,30 @@ extern "C"
int *, double *, int &, int &); int *, double *, int &, int &);
} }
extern "C" extern "C"
{ {
void f_l2normhelper(int *, double *, double *, double *, void f_l2normhelper(int *, double *, double *, double *,
double &, double &, double &, double &, double &, double &,
double &, double &, double &, double &, double &, double &,
double *, double &, int &); double *, double &, int &);
} }
extern "C" extern "C"
{ {
void f_l2normhelper_sh(int *, double *, double *, double *, void f_l2normhelper7(int *, double *, double *, double *,
double &, double &, double &, double &, double &, double &,
double &, double &, double &, double &, double &, double &,
double *, double &, int &, int &, int &); double *, double *, double *, double *,
} double *, double *, double *, double *, int &);
}
extern "C"
{
void f_l2normhelper_sh(int *, double *, double *, double *,
double &, double &, double &,
double &, double &, double &,
double *, double &, int &, int &, int &);
}
extern "C" extern "C"
{ {

View File

@@ -2,7 +2,7 @@
#ifndef MICRODEF_H #ifndef MICRODEF_H
#define MICRODEF_H #define MICRODEF_H
#include "microdef.fh" #include "macrodef.fh"
// application parameters // application parameters

View File

@@ -1,11 +1,25 @@
include makefile.inc include makefile.inc
.SUFFIXES: .o .f90 .C .for .cu ## polint(ordn=6) kernel selector:
## 1 (default): barycentric fast path
.f90.o: ## 0 : fallback to Neville path
$(f90) $(f90appflags) -c $< -o $@ POLINT6_USE_BARY ?= 1
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
ARCH_OPT = -march=x86-64-v4
CXXAPPFLAGS = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
TP_OPTFLAGS = -O3 $(ARCH_OPT) -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include
.SUFFIXES: .o .f90 .C .for .cu
.f90.o:
$(f90) $(f90appflags) -c $< -o $@
.C.o: .C.o:
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
@@ -13,8 +27,14 @@ include makefile.inc
.for.o: .for.o:
$(f77) -c $< -o $@ $(f77) -c $< -o $@
.cu.o: .cu.o:
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
TwoPunctures.o: TwoPunctures.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
TwoPunctureABE.o: TwoPunctureABE.C
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
# Input files # Input files
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
@@ -95,8 +115,8 @@ ABE: $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
TwoPunctureABE: $(TwoPunctureFILES) TwoPunctureABE: $(TwoPunctureFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(TwoPunctureFILES) $(LDLIBS) $(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
clean: clean:
rm *.o ABE ABEGPU TwoPunctureABE make.log -f rm *.o ABE ABEGPU TwoPunctureABE make.log -f

View File

@@ -1,19 +1,32 @@
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## Intel oneAPI version with oneMKL
filein = -I/usr/include/ -I${MKLROOT}/include
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -lmpich -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran ## Use sequential oneMKL to avoid introducing extra OpenMP behavior into ABE.
LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
CXXAPPFLAGS = -O0 -Wno-deprecated -Dfortran3 -Dnewc ## Optional Intel oneTBB allocator, kept aligned with main's build environment.
#f90appflags = -O0 -fpp USE_TBBMALLOC ?= 1
f90appflags = -O0 -x f95-cpp-input TBBMALLOC_SO ?= /home/intel/oneapi/2025.3/lib/libtbbmalloc.so
f90 = gfortran ifneq ($(wildcard $(TBBMALLOC_SO)),)
f77 = gfortran TBBMALLOC_LIBS = -Wl,--no-as-needed $(TBBMALLOC_SO) -Wl,--as-needed
CXX = g++ else
CC = gcc TBBMALLOC_LIBS = -Wl,--no-as-needed -ltbbmalloc -Wl,--as-needed
CLINKER = mpic++ endif
ifeq ($(USE_TBBMALLOC),1)
LDLIBS := $(TBBMALLOC_LIBS) $(LDLIBS)
endif
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
Cu = nvcc Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include

12
parallel_plot_helper.py Normal file
View File

@@ -0,0 +1,12 @@
import multiprocessing
def run_plot_task(task):
func, args = task
return func(*args)
def run_plot_tasks_parallel(plot_tasks):
ctx = multiprocessing.get_context('fork')
with ctx.Pool() as pool:
pool.map(run_plot_task, plot_tasks)

View File

@@ -8,11 +8,13 @@
## ##
################################################# #################################################
import numpy ## numpy for array operations import numpy ## numpy for array operations
import scipy ## scipy for interpolation and signal processing import scipy ## scipy for interpolation and signal processing
import math import math
import matplotlib.pyplot as plt ## matplotlib for plotting import matplotlib
import os ## os for system/file operations matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt ## matplotlib for plotting
import os ## os for system/file operations
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data

View File

@@ -6,17 +6,22 @@
## Author: Xiaoqu ## Author: Xiaoqu
## Dates: 2024/10/01 --- 2025/09/14 ## Dates: 2024/10/01 --- 2025/09/14
## ##
################################################# #################################################
import numpy ## Restrict OpenMP to one thread per process so that parallel
import scipy ## subprocess plotting does not multiply BLAS thread counts.
import matplotlib.pyplot as plt import os
from matplotlib.colors import LogNorm os.environ.setdefault("OMP_NUM_THREADS", "1")
from mpl_toolkits.mplot3d import Axes3D
## import torch import numpy
import AMSS_NCKU_Input as input_data import scipy
import matplotlib
import os matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D
## import torch
import AMSS_NCKU_Input as input_data
######################################################################################### #########################################################################################
@@ -92,9 +97,9 @@ def plot_binary_data( filename, binary_outdir, figure_outdir ):
#################################################################################### ####################################################################################
# Plot a single binary dataset (2D slices and 3D surface) # Plot a single binary dataset (2D slices and 3D surface)
def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ): def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
@@ -188,7 +193,15 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
plt.savefig( os.path.join(figure_surfaceplot_outdir, figure_title + " time = " + str(time) + " surface_plot.pdf") ) # save figure plt.savefig( os.path.join(figure_surfaceplot_outdir, figure_title + " time = " + str(time) + " surface_plot.pdf") ) # save figure
plt.close() plt.close()
return return
#################################################################################### ####################################################################################
## Allow standalone subprocess execution for parallel binary-data plotting.
if __name__ == '__main__':
import sys
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
sys.exit(1)
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@@ -6,15 +6,20 @@
## 2024/10/01 --- 2025/09/14 ## 2024/10/01 --- 2025/09/14
## ##
################################################# #################################################
import numpy ## numpy for array operations import numpy ## numpy for array operations
import matplotlib.pyplot as plt ## matplotlib for plotting import matplotlib
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
import glob import matplotlib.pyplot as plt ## matplotlib for plotting
import os ## operating system utilities from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
import glob
import plot_binary_data import os ## operating system utilities
import AMSS_NCKU_Input as input_data
import plot_binary_data
import AMSS_NCKU_Input as input_data
import subprocess
import sys
import multiprocessing
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots # plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
@@ -50,13 +55,37 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
file_list.append(x) file_list.append(x)
print(x) print(x)
## Plot each file in the list ## Plot each file in parallel using subprocesses.
for filename in file_list: ## Each subprocess starts with BLAS thread limits in plot_binary_data.py.
print(filename) script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir) max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
print( ) running = []
print( " Binary Data Plot Has been Finished " ) failed = []
for filename in file_list:
print(filename)
proc = subprocess.Popen(
[sys.executable, script, filename, binary_outdir, figure_outdir],
)
running.append( (proc, filename) )
if len(running) >= max_workers:
p, fn = running.pop(0)
p.wait()
if p.returncode != 0:
failed.append(fn)
for p, fn in running:
p.wait()
if p.returncode != 0:
failed.append(fn)
if failed:
print( " WARNING: the following binary data plots failed:" )
for fn in failed:
print( " ", fn )
print( )
print( " Binary Data Plot Has been Finished " )
print( ) print( )
return return