From 86a683de264404f8bd26a2c924dc3fc45c5ddb64 Mon Sep 17 00:00:00 2001 From: ianchb Date: Sun, 12 Apr 2026 21:19:14 +0800 Subject: [PATCH] Replace legacy ABEGPU stack with ABE_CUDA backend --- AMSS_NCKU_Input.py | 4 +- AMSS_NCKU_Program.py | 2 +- AMSS_NCKU_source/bssn_gpu.cu | 2908 ----------- AMSS_NCKU_source/bssn_gpu.h | 73 - AMSS_NCKU_source/bssn_gpu_class.C | 7790 ----------------------------- AMSS_NCKU_source/bssn_gpu_class.h | 210 - AMSS_NCKU_source/bssn_rhs_cuda.cu | 2565 ++++++++++ AMSS_NCKU_source/bssn_rhs_cuda.h | 36 + AMSS_NCKU_source/bssn_step_gpu.C | 1942 ------- AMSS_NCKU_source/makefile | 163 +- AMSS_NCKU_source/makefile.inc | 4 + generate_macrodef.py | 2 +- makefile_and_run.py | 4 +- 13 files changed, 2707 insertions(+), 12996 deletions(-) delete mode 100644 AMSS_NCKU_source/bssn_gpu.cu delete mode 100644 AMSS_NCKU_source/bssn_gpu.h delete mode 100644 AMSS_NCKU_source/bssn_gpu_class.C delete mode 100644 AMSS_NCKU_source/bssn_gpu_class.h create mode 100644 AMSS_NCKU_source/bssn_rhs_cuda.cu create mode 100644 AMSS_NCKU_source/bssn_rhs_cuda.h delete mode 100644 AMSS_NCKU_source/bssn_step_gpu.C diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index fe25a50..67e7c1c 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -16,9 +16,9 @@ import numpy File_directory = "GW150914" ## output file directory Output_directory = "binary_output" ## binary data file directory ## The file directory name should not be too long -MPI_processes = 64 ## number of mpi processes used in the simulation +MPI_processes = 8 ## number of mpi processes used in the simulation -GPU_Calculation = "no" ## Use GPU or not +GPU_Calculation = "yes" ## Use GPU or not ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) CPU_Part = 1.0 GPU_Part = 0.0 diff --git a/AMSS_NCKU_Program.py b/AMSS_NCKU_Program.py index 2d777cd..9ba512e 100755 --- a/AMSS_NCKU_Program.py +++ b/AMSS_NCKU_Program.py @@ -258,7 +258,7 @@ print() if (input_data.GPU_Calculation == "no"): ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE") elif (input_data.GPU_Calculation == "yes"): - ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU") + ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE_CUDA") if not os.path.exists( ABE_file ): print( ) diff --git a/AMSS_NCKU_source/bssn_gpu.cu b/AMSS_NCKU_source/bssn_gpu.cu deleted file mode 100644 index e67ae18..0000000 --- a/AMSS_NCKU_source/bssn_gpu.cu +++ /dev/null @@ -1,2908 +0,0 @@ -// includes, system -#include -#include -#include -#include -#include -#include -#include -//#include "cutil.h" -#include -#include -using namespace std; - -//includes, bssn -#include "gpu_mem.h" -#include "bssn_gpu.h" -#ifdef RESULT_CHECK -#include -#endif - -void compare_result_gpu(int ftag1,double * datac,int data_num){ - double * data = (double*)malloc(sizeof(double)*data_num); - cudaMemcpy(data, datac, data_num * sizeof(double), cudaMemcpyDeviceToHost); - compare_result(ftag1,data,data_num); - free(data); -} - -__global__ void test_const_address(double * testd){ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - if(_t == 0) - testd[0] = F1o3; -} - -__global__ void enforce_ga(double * trA){ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - //int ps; //TOTRY: i,j,k; double value; - - while(_t < _3D_SIZE[0]) - { - M_ gxx[_t] = M_ dxx[_t] + 1; - M_ gyy[_t] = M_ dyy[_t] + 1; - M_ gzz[_t] = M_ dzz[_t] + 1; - // for M_ g; - M_ gupzz[_t] = M_ gxx[_t] * M_ gyy[_t] * M_ gzz[_t] + M_ gxy[_t] * M_ gyz[_t] * M_ gxz[_t] + M_ gxz[_t] * M_ gxy[_t] * M_ gyz[_t] - - M_ gxz[_t] * M_ gyy[_t] * M_ gxz[_t] - M_ gxy[_t] * M_ gxy[_t] * M_ gzz[_t] - M_ gxx[_t] * M_ gyz[_t] * M_ gyz[_t]; - - M_ gupzz[_t] = 1.0 / pow( M_ gupzz[_t] , F1o3 ) ; - - M_ gxx[_t] = M_ gxx[_t] * M_ gupzz[_t]; - M_ gxy[_t] = M_ gxy[_t] * M_ gupzz[_t]; - M_ gxz[_t] = M_ gxz[_t] * M_ gupzz[_t]; - M_ gyy[_t] = M_ gyy[_t] * M_ gupzz[_t]; - M_ gyz[_t] = M_ gyz[_t] * M_ gupzz[_t]; - M_ gzz[_t] = M_ gzz[_t] * M_ gupzz[_t]; - - M_ dxx[_t] = M_ gxx[_t] - 1; - M_ dyy[_t] = M_ gyy[_t] - 1; - M_ dzz[_t] = M_ gzz[_t] - 1; - // for A ; - - M_ gupxx[_t] = ( M_ gyy[_t] * M_ gzz[_t] - M_ gyz[_t] * M_ gyz[_t] ); - M_ gupxy[_t] = - ( M_ gxy[_t] * M_ gzz[_t] - M_ gyz[_t] * M_ gxz[_t] ); - M_ gupxz[_t] = ( M_ gxy[_t] * M_ gyz[_t] - M_ gyy[_t] * M_ gxz[_t] ); - M_ gupyy[_t] = ( M_ gxx[_t] * M_ gzz[_t] - M_ gxz[_t] * M_ gxz[_t] ); - M_ gupyz[_t] = - ( M_ gxx[_t] * M_ gyz[_t] - M_ gxy[_t] * M_ gxz[_t] ); - M_ gupzz[_t] = ( M_ gxx[_t] * M_ gyy[_t] - M_ gxy[_t] * M_ gxy[_t] ); - - trA[_t] = M_ gupxx[_t] *M_ Axx[_t] + M_ gupyy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Azz[_t] - + 2 * (M_ gupxy[_t] *M_ Axy[_t] + M_ gupxz[_t] *M_ Axz[_t] + M_ gupyz[_t] * M_ Ayz[_t]); - - M_ Axx[_t] = M_ Axx[_t] - F1o3 * M_ gxx[_t] * trA[_t]; - M_ Axy[_t] = M_ Axy[_t] - F1o3 * M_ gxy[_t] * trA[_t]; - M_ Axz[_t] = M_ Axz[_t] - F1o3 * M_ gxz[_t] * trA[_t]; - M_ Ayy[_t] = M_ Ayy[_t] - F1o3 * M_ gyy[_t] * trA[_t]; - M_ Ayz[_t] = M_ Ayz[_t] - F1o3 * M_ gyz[_t] * trA[_t]; - M_ Azz[_t] = M_ Azz[_t] - F1o3 * M_ gzz[_t] * trA[_t]; - //------------------- - _t += STEP_SIZE; - } -} - -inline void sub_enforce_ga(int matrix_size){ - double * trA = M_ chin1; - enforce_ga<<>>(trA); - cudaMemset(trA,0,matrix_size * sizeof(double)); - cudaThreadSynchronize(); - - //cudaMemset(Mh_ gupxx,0,matrix_size * sizeof(double)); - //trA gxx,gyy,gzz gupxx,gupxy,gupxz,gupyy,gupyz,gupzz - -} -__device__ volatile unsigned int global_count = 0; -__global__ void test_init_matrix(){ - int tid = blockIdx.x*blockDim.x+threadIdx.x; - int curr = tid; - while(curr < _3D_SIZE[2]) - { - metac.fh[curr] = 0; - curr += STEP_SIZE; - } - curr = tid; - while(curr < _3D_SIZE[0]) - { - metac.betaxx[curr] = 0; - metac.betaxy[curr] = 0; - metac.betaxz[curr] = 0; - curr += STEP_SIZE; - } -} -__global__ void init_matrix(double * mat){ - int tid = blockIdx.x*blockDim.x+threadIdx.x; - int curr = tid; - while(curr < _3D_SIZE[0]) - { - mat[curr] = 0; - curr += STEP_SIZE; - } -} -__global__ void init_3_matrixs(double * mat1,double* mat2,double *mat3){ - int tid = blockIdx.x*blockDim.x+threadIdx.x; - int curr = tid; - while(curr < _3D_SIZE[0]) - { - mat1[curr] = 0; - mat2[curr] = 0; - mat3[curr] = 0; - curr += STEP_SIZE; - } -} -__global__ void init_matrix_fh(double * mat){ - int tid = blockIdx.x*blockDim.x+threadIdx.x; - int curr = tid; - while(curr < _3D_SIZE[2]) - { - mat[curr] = 0; - curr += STEP_SIZE; - } -} - - -__global__ void sub_symmetry_bd_partF(int ord, double * func, double *funcc) -{ - int curr = blockIdx.x*blockDim.x+threadIdx.x; - int ps; //TOTRY: i,j,k; double value; - - while(curr < _3D_SIZE[0]) - { - int k = curr / _2D_SIZE[0]; - ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); //= ps % ex_c[0]; - - funcc[i+ ord + (ord +j)* _1D_SIZE[ord] + (k + ord) * _2D_SIZE[ord]] = func[curr]; - - curr += STEP_SIZE; - } - -} - -#ifdef Vertex -__global__ void sub_symmetry_bd_partI(int ord, double * func, double * funcc,double S1){ - //for i - int curr = blockIdx.x*blockDim.x+threadIdx.x; - int ps; - int m; - while(curr < (ex_c[1]+ord)*(ex_c[2]+ord) ){ - m = ord * 2; - ps = curr * _1D_SIZE[ord]; - for(int i = 0;i < ord; ++i){ - funcc[ps] = funcc [ps + m] * S1; - ps ++; - m -= 2; - } - curr+= STEP_SIZE; - } - __syncthreads(); -} -__global__ void sub_symmetry_bd_partJ(int ord,double * func, double * funcc,double S2){ - //for j - int curr = blockIdx.x*blockDim.x+threadIdx.x; - int ps; - int m; - - while(curr < (ex_c[0]+ord)*(ex_c[2]+ord)) - { - m = 2 * ord; - ps = (curr/_1D_SIZE[ord])*_2D_SIZE[ord] + (curr % _1D_SIZE[ord]); - for(int i = 0;i>>(ord,func,funcc); - cudaThreadSynchronize(); - sub_symmetry_bd_partI<<>>(ord,func,funcc,SoA[0]); - cudaThreadSynchronize(); - sub_symmetry_bd_partJ<<>>(ord,func,funcc,SoA[1]); - cudaThreadSynchronize(); - sub_symmetry_bd_partK<<>>(ord,func,funcc,SoA[2]); - cudaThreadSynchronize(); -} - - -__global__ void sub_fdderivs_part1(double * f,double *fh,double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz) - { - int curr = blockIdx.x*blockDim.x+threadIdx.x; - int ps; //TOTRY: i,j,k; double value; - - while(curr < _3D_SIZE[0]) - { - int k = curr / _2D_SIZE[0]; - ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - if(k == ex_c[2]-1 || i == ex_c[0]-1 || j == ex_c[1]-1){ - curr += STEP_SIZE; - continue; - } - else - { - //xx - if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]){ - fxx[curr] = Fdxdx*(-_FH2_(i,(j+2),(k+2))+16*_FH2_((i+1),(j+2),(k+2))-30*_FH2_((i+2),(j+2),(k+2)) - -_FH2_((i+4),(j+2),(k+2))+16*_FH2_((i+3),(j+2),(k+2)) ); - - } - else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]){ - fxx[curr] = Sdxdx*(_FH2_((i+1),(j+2),(k+2))-2*_FH2_((i+2),(j+2),(k+2)) - +_FH2_(i+3,(j+2),(k+2)) ); - } - //zz-- - if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]){ - fzz[curr] = Fdzdz * (-_FH2_((i+2),(j+2),k) + 16 *_FH2_((i+2),(j+2),(k+1))- 30*_FH2_((i+2),(j+2),(k+2)) - -_FH2_((i+2),(j+2),(k+4))+ 16*_FH2_((i+2),(j+2),(k+3)) ); - } - else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]){ - fzz[curr] = Sdzdz*(_FH2_((i+2),(j+2),(k+1))- 2 * _FH2_((i+2),(j+2),(k+2)) - + _FH2_((i+2),(j+2),(k+3)) ); - } - - //yy-- - if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]){ - fyy[curr] = Fdydy*(-_FH2_((i+2),j,(k+2))+16*_FH2_((i+2),(j+1),(k+2))-30*_FH2_((i+2),(j+2),(k+2)) - -_FH2_((i+2),(j+4),(k+2))+16*_FH2_((i+2),(j+3),(k+2)) ); - } - else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]){ - fyy[curr] = Sdydy*(_FH2_((i+2),(j+1),(k+2))-2*_FH2_((i+2),(j+2),(k+2)) - +_FH2_((i+2),(j+3),(k+2)) ); - } - - - - //xy - if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) - fxy[curr] = Fdxdy*((_FH2_(i,j,(k+2))-8*_FH2_((i+1),j,(k+2))+8*_FH2_((i+3),j,(k+2))-_FH2_((i+4),j,(k+2))) - -8 *(_FH2_(i,(j+1),(k+2))-8*_FH2_((i+1),(j+1),(k+2))+8*_FH2_((i+3),(j+1),(k+2))-_FH2_((i+4),(j+1),(k+2))) - +8 *(_FH2_(i,(j+3),(k+2))-8*_FH2_((i+1),(j+3),(k+2))+8*_FH2_((i+3),(j+3),(k+2))-_FH2_((i+4),(j+3),(k+2))) - - (_FH2_(i,(j+4),(k+2))-8*_FH2_((i+1),(j+4),(k+2))+8*_FH2_((i+3),(j+4),(k+2))-_FH2_((i+4),(j+4),(k+2)))); - - else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) - - fxy[curr] = Sdxdy*(_FH2_((i+1),(j+1),(k+2))-_FH2_((i+3),(j+1),(k+2))-_FH2_((i+1),(j+3),(k+2))+_FH2_((i+3),(j+3),(k+2))); - //xz - if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) - fxz[curr] = Fdxdz*( (_FH2_(i,(j+2),k)-8*_FH2_((i+1),(j+2),k)+8*_FH2_((i+3),(j+2),k)-_FH2_((i+4),(j+2),k)) - -8 *(_FH2_(i,(j+2),(k+1))-8*_FH2_((i+1),(j+2),(k+1))+8*_FH2_((i+3),(j+2),(k+1))-_FH2_((i+4),(j+2),(k+1))) - +8 *(_FH2_(i,(j+2),(k+3))-8*_FH2_((i+1),(j+2),(k+3))+8*_FH2_((i+3),(j+2),(k+3))-_FH2_((i+4),(j+2),(k+3))) - - (_FH2_(i,(j+2),(k+4))-8*_FH2_((i+1),(j+2),(k+4))+8*_FH2_((i+3),(j+2),(k+4))-_FH2_((i+4),(j+2),(k+4)))); - - else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) - fxz[curr] = Sdxdz*(_FH2_((i+1),(j+2),(k+1))-_FH2_((i+3),(j+2),(k+1))-_FH2_((i+1),(j+2),(k+3))+_FH2_((i+3),(j+2),(k+3))); - //yz - if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) - fyz[curr] = Fdydz*( (_FH2_((i+2),j,k)-8*_FH2_((i+2),(j+1),k)+8*_FH2_((i+2),(j+3),k)-_FH2_((i+2),(j+4),k)) - -8 *(_FH2_((i+2),j,(k+1))-8*_FH2_((i+2),(j+1),(k+1))+8*_FH2_((i+2),(j+3),(k+1))-_FH2_((i+2),(j+4),(k+1))) - +8 *(_FH2_((i+2),j,(k+3))-8*_FH2_((i+2),(j+1),(k+3))+8*_FH2_((i+2),(j+3),(k+3))-_FH2_((i+2),(j+4),(k+3))) - - (_FH2_((i+2),j,(k+4))-8*_FH2_((i+2),(j+1),(k+4))+8*_FH2_((i+2),(j+3),(k+4))-_FH2_((i+2),(j+4),(k+4)))); - - else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) - fyz[curr] = Sdydz*(_FH2_((i+2),(j+1),(k+1))-_FH2_((i+2),(j+3),(k+1))-_FH2_((i+2),(j+1),(k+3))+_FH2_((i+2),(j+3),(k+3))); - - curr += STEP_SIZE; - } - } - - __syncthreads(); - } - -inline void sub_fdderivs(double * f,double *fh,double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz,double* SoA) -{ - sub_symmetry_bd(2,f,fh,SoA); - cudaMemset(fxx,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fxy,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fxz,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fyy,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fyz,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fzz,0,_3D_SIZE[0] * sizeof(double)); - cudaThreadSynchronize(); - sub_fdderivs_part1<<>>(f,fh,fxx,fxy,fxz,fyy,fyz,fzz); - cudaThreadSynchronize(); -} - -__global__ void sub_fderivs_part1(double * f,double * fh,double *fx,double *fy,double *fz ) - { - int curr = blockIdx.x*blockDim.x+threadIdx.x; - int ps; //TOTRY: i,j,k; double value; - - while(curr < _3D_SIZE[0]) - { - int k = curr / _2D_SIZE[0]; - ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - if(k == ex_c[2]-1 || i == ex_c[0]-1 || j == ex_c[1]-1){ - curr += STEP_SIZE; - continue; - } - - //X-- - if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]) - fx[curr] = d12dxyz[0]*(fh[i+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - - 8*fh[i+1+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + - 8*fh[i+3+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - - fh[i+4+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] ); - - else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]) - fx[curr] = d2dxyz[0]*(-fh[i+1+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + - fh[i+3+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] ); - //Y-- - if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) - fy[curr]=d12dxyz[1]*(fh[i+2+j*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]- - 8*fh[i+2+(j+1)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + - 8*fh[i+2+(j+3)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - - fh[i+2+(j+4)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]); - - else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) - fy[curr]=d2dxyz[1]*(-fh[i+2+(j+1)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + - fh[i+2+(j+3)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]); - //Z-- - - if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) - fz[curr]=d12dxyz[2]*( fh[i+2+(j+2)*_1D_SIZE[2]+k *_2D_SIZE[2]] - - 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]] + - 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k+3)*_2D_SIZE[2]] - - fh[i+2+(j+2)*_1D_SIZE[2]+(k+4)*_2D_SIZE[2]]); - - else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) - fz[curr]=d2dxyz[2]*(-fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]]+ - fh[i+2+(j+2)*_1D_SIZE[2]+(k+3)*_2D_SIZE[2]]); - - curr += STEP_SIZE; - - } - } - -inline void sub_fderivs(double * f,double * fh,double *fx,double *fy,double *fz,double * SoA) -{ - sub_symmetry_bd(2,f,fh,SoA); - - cudaMemset(fx,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fy,0,_3D_SIZE[0] * sizeof(double)); - cudaMemset(fz,0,_3D_SIZE[0] * sizeof(double)); - - cudaThreadSynchronize(); - sub_fderivs_part1<<>>(f,fh,fx,fy,fz); - cudaThreadSynchronize(); -} - -__global__ void computeRicci_part1(double * dst) -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - dst[_t] = M_ gupxx [_t]* M_ fxx [_t]+ M_ gupyy[_t]* M_ fyy[_t]+ M_ gupzz[_t]* M_ fzz[_t]+ - ( M_ gupxy[_t]* M_ fxy[_t]+ M_ gupxz[_t]* M_ fxz[_t]+ M_ gupyz[_t]* M_ fyz[_t]) * 2; - - _t += STEP_SIZE; - } -} - - inline void computeRicci(double * src,double* dst,double * SoA, Meta* meta) -{ - sub_fdderivs(src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,SoA); - cudaThreadSynchronize(); - computeRicci_part1<<>>(dst); - cudaThreadSynchronize(); - -}/*Exception*/ - -__global__ void sub_kodis_part1(double *f,double *fh,double *f_rhs) -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - int ps; //TOTRY: i,j,k; double value; - double inc_f_rhs; - while(_t < _3D_SIZE[0]) - { - int k = _t / _2D_SIZE[0]; - ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - if(k == ex_c[2]-1 && i == ex_c[0]-1 && j == ex_c[1]-1){ - _t += STEP_SIZE; - continue; - } - - if(i-3 >= ijk_min3[0] && i+3 <= ijk_max[0] && - j-3 >= ijk_min3[1] && j+3 <= ijk_max[1] && - k-3 >= ijk_min3[2] && k+3 <= ijk_max[2]) - { - // x direction - inc_f_rhs = ( (_FH3_(i,(j+3),(k+3))+_FH3_((i+6),(j+3),(k+3))) - - 6*(_FH3_((i+1),(j+3),(k+3))+_FH3_((i+5),(j+3),(k+3))) + - 15*(_FH3_((i+2),(j+3),(k+3))+_FH3_((i+4),(j+3),(k+3))) - - 20* _FH3_((i+3),(j+3),(k+3)) ) /dX; - - - // y direction - - inc_f_rhs += ( (_FH3_((i+3),j,(k+3))+_FH3_((i+3),(j+6),(k+3))) - - 6*(_FH3_((i+3),(j+1),(k+3))+_FH3_((i+3),(j+5),(k+3))) + - 15*(_FH3_((i+3),(j+2),(k+3))+_FH3_((i+3),(j+4),(k+3))) - - 20* _FH3_((i+3),(j+3),(k+3)) )/dY; - - // z direction - - inc_f_rhs += ( (_FH3_((i+3),(j+3),k)+_FH3_((i+3),(j+3),(k+6))) - - 6*(_FH3_((i+3),(j+3),(k+1))+_FH3_((i+3),(j+3),(k+5))) + - 15*(_FH3_((i+3),(j+3),(k+2))+_FH3_((i+3),(j+3),(k+4))) - - 20* _FH3_((i+3),(j+3),(k+3)) )/dZ; - inc_f_rhs *= eps_c; - inc_f_rhs /= 64; - f_rhs[_t] += inc_f_rhs; //be careful the mark is "+=" not "==" ! - } - - _t += STEP_SIZE; - } -} - -inline void sub_kodis(double *f,double *fh,double *f_rhs,double *SoA) -{ - sub_symmetry_bd(3,f,fh,SoA); - cudaThreadSynchronize(); - sub_kodis_part1<<>>(f,fh,f_rhs); - cudaThreadSynchronize(); -} - -__global__ void sub_lopsided_part1(double *f,double* fh,double *f_rhs,double *Sfx,double *Sfy,double *Sfz) -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - int ps; //TOTRY: i,j,k; double value; - - while(_t < _3D_SIZE[0]) - { - int k = _t / _2D_SIZE[0]; - ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - if(k < ex_c[2]-1 && i < ex_c[0]-1 && j < ex_c[1]-1){ - // x direction - if(Sfx[_t] >= 0 && i+3 <= ijk_max[0] && i-1 >= ijk_min2[0]) - f_rhs[_t]=f_rhs[_t]+ - Sfx[_t]*d12dxyz[0]*(-3*_FH3_((i+2),(j+3),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+4),(j+3),(k+3)) - -6*_FH3_((i+5),(j+3),(k+3))+ _FH3_((i+6),(j+3),(k+3))); - - else if(Sfx[_t] <= 0 && i-3 >= ijk_min2[0] && i+1 <= ijk_max[0]) - f_rhs[_t]=f_rhs[_t]- - Sfx[_t]*d12dxyz[0]*(-3*_FH3_((i+4),(j+3),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+2),(j+3),(k+3)) - -6*_FH3_((i+1),(j+3),(k+3))+ _FH3_(i,(j+3),(k+3))); - - else if(i+2 <= ijk_max[0] && i-2 >= ijk_min2[0]) - - - f_rhs[_t]=f_rhs[_t]+ - Sfx[_t]*d12dxyz[0]*(_FH3_((i+1),(j+3),(k+3))-8*_FH3_((i+2),(j+3),(k+3))+8*_FH3_((i+4),(j+3),(k+3))-_FH3_((i+5),(j+3),(k+3))); - - else if(i+1 <= ijk_max[0] && i-1 >= ijk_min2[0]) - - f_rhs[_t]=f_rhs[_t] + Sfx[_t]*d2dxyz[0]*(-_FH3_((i+2),(j+3),(k+3))+_FH3_((i+4),(j+3),(k+3))); - - - // y direction - if(Sfy[_t] >= 0 && j+3 <= ijk_max[1] && j-1 >= ijk_min2[1]) - - f_rhs[_t]=f_rhs[_t]+ - Sfy[_t]*d12dxyz[1]*(-3*_FH3_((i+3),(j+2),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+4),(k+3)) - -6*_FH3_((i+3),(j+5),(k+3))+ _FH3_((i+3),(j+6),(k+3))); - - else if(Sfy[_t] <= 0 && j-3 >= ijk_min2[1] && j+1 <= ijk_max[1]) - f_rhs[_t]=f_rhs[_t]- - Sfy[_t]*d12dxyz[1]*(-3*_FH3_((i+3),(j+4),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+2),(k+3)) - -6*_FH3_((i+3),(j+1),(k+3))+ _FH3_((i+3),j,(k+3))); - - else if(j+2 <= ijk_max[1] && j-2 >= ijk_min2[1]) - - f_rhs[_t]=f_rhs[_t]+ - Sfy[_t]*d12dxyz[1]*(_FH3_((i+3),(j+1),(k+3))-8*_FH3_((i+3),(j+2),(k+3))+8*_FH3_((i+3),(j+4),(k+3))-_FH3_((i+3),(j+5),(k+3))); - - else if(j+1 <= ijk_max[1] && j-1 >= ijk_min2[1]) - - f_rhs[_t]=f_rhs[_t] + Sfy[_t]*d2dxyz[1]*(-_FH3_((i+3),(j+2),(k+3))+_FH3_((i+3),(j+4),(k+3))); - - - // z direction - if(Sfz[_t] >= 0 && k+3 <= ijk_max[2] && k-1 >= ijk_min2[2]) - // v - // D f = ------[ - 3f - 10f + 18f - 6f + f ] - // i 12dx i-v i i+v i+2v i+3v - f_rhs[_t]=f_rhs[_t]+ - Sfz[_t]*d12dxyz[2]*(-3*_FH3_((i+3),(j+3),(k+2))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+3),(k+4)) - -6*_FH3_((i+3),(j+3),(k+5))+ _FH3_((i+3),(j+3),(k+6))); - - else if(Sfz[_t] <= 0 && k-3 >= ijk_min2[2] && k+1 <= ijk_max[2]) - f_rhs[_t]=f_rhs[_t]- - Sfz[_t]*d12dxyz[2]*(-3*_FH3_((i+3),(j+3),(k+4))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+3),(k+2)) - -6*_FH3_((i+3),(j+3),(k+1))+ _FH3_((i+3),(j+3),k)); - - else if(k+2 <= ijk_max[2] && k-2 >= ijk_min2[2]) - - f_rhs[_t]=f_rhs[_t]+ - Sfz[_t]*d12dxyz[2]*(_FH3_((i+3),(j+3),(k+1))-8*_FH3_((i+3),(j+3),(k+2))+8*_FH3_((i+3),(j+3),(k+4))-_FH3_((i+3),(j+3),(k+5))); - - else if(k+1 <= ijk_max[2] && k-1 >= ijk_min2[2]) - - f_rhs[_t]=f_rhs[_t]+Sfz[_t]*d2dxyz[2]*(-_FH3_((i+3),(j+3),(k+2))+_FH3_((i+3),(j+3),(k+4))); - } - //------------------- - _t += STEP_SIZE; - } -} - - -inline void sub_lopsided(double *f,double*fh,double *f_rhs,double *Sfx,double *Sfy,double *Sfz,double *SoA){ - sub_symmetry_bd(3,f,fh,SoA); - cudaThreadSynchronize(); - sub_lopsided_part1<<>>(f,fh,f_rhs,Sfx,Sfy,Sfz); - cudaThreadSynchronize(); -} - -__global__ void compute_rhs_bssn_part1() -{ - int tid = blockIdx.x*blockDim.x+threadIdx.x; - int curr = tid; - while(curr < _3D_SIZE[0]) - { - metac.alpn1[curr] = metac.Lap[curr] + 1; - metac.chin1[curr] = metac.chi[curr] + 1; - metac.gxx[curr] = metac.dxx[curr] + 1; - metac.gyy[curr] = metac.dyy[curr] + 1; - metac.gzz[curr] = metac.dzz[curr] + 1; - - curr += STEP_SIZE; - } -} - -__global__ void compute_rhs_bssn_part2() -{ - //__shared__ int judge = 1; - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - - M_ div_beta[_t] = M_ betaxx[_t] + M_ betayy[_t] + M_ betazz[_t]; - M_ chi_rhs[_t] = F2o3 *M_ chin1[_t]*( M_ alpn1[_t] * M_ trK[_t] - M_ div_beta[_t] ); //rhs[_t] for M_ chi - - M_ gxx_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axx[_t] - F2o3 * M_ gxx[_t]* M_ div_beta[_t] + - 2 *( M_ gxx[_t]* M_ betaxx[_t]+ M_ gxy[_t]* M_ betayx[_t]+ M_ gxz[_t]* M_ betazx[_t]); - M_ gyy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayy[_t] - F2o3 * M_ gyy[_t]* M_ div_beta[_t] + - 2 *( M_ gxy[_t]* M_ betaxy[_t]+ M_ gyy[_t]* M_ betayy[_t]+ M_ gyz[_t]* M_ betazy[_t]); - - M_ gzz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Azz[_t] - F2o3 * M_ gzz[_t]* M_ div_beta[_t] + - 2 *( M_ gxz[_t]* M_ betaxz[_t]+ M_ gyz[_t]* M_ betayz[_t]+ M_ gzz[_t]* M_ betazz[_t]); - - M_ gxy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axy[_t] + F1o3 * M_ gxy[_t] * M_ div_beta[_t] + - M_ gxx[_t]* M_ betaxy[_t] + M_ gxz[_t]* M_ betazy[_t]+ - M_ gyy[_t]* M_ betayx[_t]+ M_ gyz[_t]* M_ betazx[_t] - - M_ gxy[_t]* M_ betazz[_t]; - - M_ gyz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayz[_t] + F1o3 * M_ gyz[_t] * M_ div_beta[_t] + - M_ gxy[_t]* M_ betaxz[_t]+ M_ gyy[_t]* M_ betayz[_t] + - M_ gxz[_t]* M_ betaxy[_t] + M_ gzz[_t]* M_ betazy[_t] - - M_ gyz[_t]* M_ betaxx[_t]; - - M_ gxz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axz[_t] + F1o3 * M_ gxz[_t] * M_ div_beta[_t] + - M_ gxx[_t]* M_ betaxz[_t]+ M_ gxy[_t]* M_ betayz[_t] + - M_ gyz[_t]* M_ betayx[_t]+ M_ gzz[_t]* M_ betazx[_t] - - M_ gxz[_t]* M_ betayy[_t]; //rhs[_t] for gij - - // invert tilted metric - M_ gupzz[_t]= M_ gxx[_t]* M_ gyy[_t]* M_ gzz[_t]+ M_ gxy[_t]* M_ gyz[_t]* M_ gxz[_t]+ M_ gxz[_t]* M_ gxy[_t]* M_ gyz[_t]- - M_ gxz[_t]* M_ gyy[_t]* M_ gxz[_t]- M_ gxy[_t]* M_ gxy[_t]* M_ gzz[_t]- M_ gxx[_t]* M_ gyz[_t]* M_ gyz[_t]; - M_ gupxx[_t]= ( M_ gyy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gyz[_t]) / M_ gupzz[_t]; - M_ gupxy[_t]= - ( M_ gxy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; - M_ gupxz[_t]= ( M_ gxy[_t]* M_ gyz[_t]- M_ gyy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; - M_ gupyy[_t]= ( M_ gxx[_t]* M_ gzz[_t]- M_ gxz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; - M_ gupyz[_t]= - ( M_ gxx[_t]* M_ gyz[_t]- M_ gxy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; - M_ gupzz[_t]= ( M_ gxx[_t]* M_ gyy[_t]- M_ gxy[_t]* M_ gxy[_t]) / M_ gupzz[_t]; - //if(threadIdx.x == 0){ - // judge = co_c; - //} - //__syncthreads(); - - if(co_c == 0) - { - // M_ Gam^i_Res = M_ Gam^i + M_ gup^ij_,j - M_ Gmx_Res[_t] = M_ Gamx[_t] - (M_ gupxx[_t]*(M_ gupxx[_t]*M_ gxxx[_t]+M_ gupxy[_t]*M_ gxyx[_t]+M_ gupxz[_t]*M_ gxzx[_t]) - +M_ gupxy[_t]*(M_ gupxx[_t]*M_ gxyx[_t]+M_ gupxy[_t]*M_ gyyx[_t]+M_ gupxz[_t]*M_ gyzx[_t]) - +M_ gupxz[_t]*(M_ gupxx[_t]*M_ gxzx[_t]+M_ gupxy[_t]*M_ gyzx[_t]+M_ gupxz[_t]*M_ gzzx[_t]) - +M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) - +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) - +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) - +M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) - +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) - +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); - M_ Gmy_Res[_t] = M_ Gamy[_t] - (M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxx[_t]+M_ gupyy[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gxzx[_t]) - +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyx[_t]+M_ gupyy[_t]*M_ gyyx[_t]+M_ gupyz[_t]*M_ gyzx[_t]) - +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzx[_t]+M_ gupyy[_t]*M_ gyzx[_t]+M_ gupyz[_t]*M_ gzzx[_t]) - +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) - +M_ gupyy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) - +M_ gupyz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) - +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) - +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) - +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); - M_ Gmz_Res[_t] = M_ Gamz[_t] - (M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxx[_t]+M_ gupyz[_t]*M_ gxyx[_t]+M_ gupzz[_t]*M_ gxzx[_t]) - +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gyyx[_t]+M_ gupzz[_t]*M_ gyzx[_t]) - +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzx[_t]+M_ gupyz[_t]*M_ gyzx[_t]+M_ gupzz[_t]*M_ gzzx[_t]) - +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxy[_t]+M_ gupyz[_t]*M_ gxyy[_t]+M_ gupzz[_t]*M_ gxzy[_t]) - +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gyyy[_t]+M_ gupzz[_t]*M_ gyzy[_t]) - +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzy[_t]+M_ gupyz[_t]*M_ gyzy[_t]+M_ gupzz[_t]*M_ gzzy[_t]) - +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) - +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) - +M_ gupzz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); - }//if(co == 0) - - // second kind of connection - M_ Gamxxx[_t]=HALF*( M_ gupxx[_t]*M_ gxxx[_t]+ M_ gupxy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupxz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); - M_ Gamyxx[_t]=HALF*( M_ gupxy[_t]*M_ gxxx[_t]+ M_ gupyy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupyz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); - M_ Gamzxx[_t]=HALF*( M_ gupxz[_t]*M_ gxxx[_t]+ M_ gupyz[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupzz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); - - M_ Gamxyy[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupxy[_t]*M_ gyyy[_t]+ M_ gupxz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); - M_ Gamyyy[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupyz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); - M_ Gamzyy[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyz[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); - - M_ Gamxzz[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupxy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupxz[_t]*M_ gzzz[_t]); - M_ Gamyzz[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupyz[_t]*M_ gzzz[_t]); - M_ Gamzzz[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyz[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupzz[_t]*M_ gzzz[_t]); - - M_ Gamxxy[_t]=HALF*( M_ gupxx[_t]*M_ gxxy[_t]+ M_ gupxy[_t]*M_ gyyx[_t]+ M_ gupxz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); - M_ Gamyxy[_t]=HALF*( M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupyy[_t]*M_ gyyx[_t]+ M_ gupyz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); - M_ Gamzxy[_t]=HALF*( M_ gupxz[_t]*M_ gxxy[_t]+ M_ gupyz[_t]*M_ gyyx[_t]+ M_ gupzz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); - - M_ Gamxxz[_t]=HALF*( M_ gupxx[_t]*M_ gxxz[_t]+ M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupxz[_t]*M_ gzzx[_t]); - M_ Gamyxz[_t]=HALF*( M_ gupxy[_t]*M_ gxxz[_t]+ M_ gupyy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupyz[_t]*M_ gzzx[_t]); - M_ Gamzxz[_t]=HALF*( M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupzz[_t]*M_ gzzx[_t]); - - M_ Gamxyz[_t]=HALF*( M_ gupxx[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupxy[_t]*M_ gyyz[_t]+ M_ gupxz[_t]*M_ gzzy[_t]); - M_ Gamyyz[_t]=HALF*( M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyy[_t]*M_ gyyz[_t]+ M_ gupyz[_t]*M_ gzzy[_t]); - M_ Gamzyz[_t]=HALF*( M_ gupxz[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyz[_t]*M_ gyyz[_t]+ M_ gupzz[_t]*M_ gzzy[_t]); - // Raise indices of \tilde A_{ij} and store in R_ij - - M_ Rxx[_t]= M_ gupxx[_t]* M_ gupxx[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupxy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupxz[_t]* M_ Azz[_t]+ - 2*(M_ gupxx[_t]* M_ gupxy[_t]* M_ Axy[_t]+ M_ gupxx[_t]* M_ gupxz[_t]* M_ Axz[_t]+ M_ gupxy[_t]* M_ gupxz[_t]* M_ Ayz[_t]); - - M_ Ryy[_t]= M_ gupxy[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ - 2*(M_ gupxy[_t]* M_ gupyy[_t]* M_ Axy[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayz[_t]); - - M_ Rzz[_t]= M_ gupxz[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ - 2*(M_ gupxz[_t]* M_ gupyz[_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Ayz[_t]); - - M_ Rxy[_t]= M_ gupxx[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ - (M_ gupxx[_t]* M_ gupyy[_t] + M_ gupxy[_t]* M_ gupxy[_t])* M_ Axy[_t] + - (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupxy[_t])* M_ Axz[_t] + - (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupyy[_t])* M_ Ayz[_t]; - - M_ Rxz[_t]= M_ gupxx[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ - (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxy[_t]* M_ gupxz[_t])* M_ Axy[_t] + - (M_ gupxx[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupxz[_t])* M_ Axz[_t] + - (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; - - M_ Ryz[_t]= M_ gupxy[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ - (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupyy[_t]* M_ gupxz[_t])* M_ Axy[_t] + - (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupxz[_t])* M_ Axz[_t] + - (M_ gupyy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; - - // Right hand side for M_ Gam^i without shift terms... - - M_ Gamx_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxx[_t]+ M_ Lapy[_t] * M_ Rxy[_t]+ M_ Lapz[_t] * M_ Rxz[_t]) + - 2 * M_ alpn1[_t] * ( - -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxx[_t]+ M_ chiy[_t] * M_ Rxy[_t]+ M_ chiz[_t] * M_ Rxz[_t]) - - M_ gupxx[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - - M_ gupxy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - - M_ gupxz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + - M_ Gamxxx[_t]* M_ Rxx[_t]+ M_ Gamxyy[_t]* M_ Ryy[_t]+ M_ Gamxzz[_t]* M_ Rzz[_t] + - 2 * ( M_ Gamxxy[_t]* M_ Rxy[_t]+ M_ Gamxxz[_t]* M_ Rxz[_t]+ M_ Gamxyz[_t]* M_ Ryz[_t]) ); - - M_ Gamy_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxy[_t]+ M_ Lapy[_t] * M_ Ryy[_t]+ M_ Lapz[_t] * M_ Ryz[_t]) + - 2 * M_ alpn1[_t] * ( - -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxy[_t]+ M_ chiy[_t] * M_ Ryy[_t]+ M_ chiz[_t] * M_ Ryz[_t]) - - M_ gupxy[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - - M_ gupyy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - - M_ gupyz[_t]* ( F2o3 * M_ Kz [_t] + 8 * PI * M_ Sz[_t] ) + - M_ Gamyxx[_t]* M_ Rxx[_t]+ M_ Gamyyy[_t]* M_ Ryy[_t]+ M_ Gamyzz[_t]* M_ Rzz[_t] + - 2 * ( M_ Gamyxy[_t]* M_ Rxy[_t]+ M_ Gamyxz[_t]* M_ Rxz[_t]+ M_ Gamyyz[_t]* M_ Ryz[_t]) ); - - M_ Gamz_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxz[_t]+ M_ Lapy[_t] * M_ Ryz[_t]+ M_ Lapz[_t] * M_ Rzz[_t]) + - 2 * M_ alpn1[_t] * ( - -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxz[_t]+ M_ chiy[_t] * M_ Ryz[_t]+ M_ chiz[_t] * M_ Rzz[_t]) - - M_ gupxz[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - - M_ gupyz[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - - M_ gupzz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + - M_ Gamzxx[_t]* M_ Rxx[_t]+ M_ Gamzyy[_t]* M_ Ryy[_t]+ M_ Gamzzz[_t]* M_ Rzz[_t] + - 2 * ( M_ Gamzxy[_t]* M_ Rxy[_t]+ M_ Gamzxz[_t]* M_ Rxz[_t]+ M_ Gamzyz[_t]* M_ Ryz[_t]) ); - - _t += STEP_SIZE; - } -} - -__global__ void compute_rhs_bssn_part3() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ fxx [_t]= M_ gxxx[_t]+ M_ gxyy[_t]+ M_ gxzz[_t]; - M_ fxy[_t]= M_ gxyx[_t]+ M_ gyyy[_t]+ M_ gyzz[_t]; - M_ fxz[_t]= M_ gxzx[_t]+ M_ gyzy[_t]+ M_ gzzz[_t]; - - M_ Gamxa[_t]= M_ gupxx [_t]* M_ Gamxxx [_t]+ M_ gupyy[_t]* M_ Gamxyy[_t]+ M_ gupzz[_t]* M_ Gamxzz[_t]+ - 2*( M_ gupxy[_t]* M_ Gamxxy[_t]+ M_ gupxz[_t]* M_ Gamxxz[_t]+ M_ gupyz[_t]* M_ Gamxyz[_t]); - M_ Gamya[_t]= M_ gupxx [_t]* M_ Gamyxx [_t]+ M_ gupyy[_t]* M_ Gamyyy[_t]+ M_ gupzz[_t]* M_ Gamyzz[_t]+ - 2*( M_ gupxy[_t]* M_ Gamyxy[_t]+ M_ gupxz[_t]* M_ Gamyxz[_t]+ M_ gupyz[_t]* M_ Gamyyz[_t]); - M_ Gamza[_t]= M_ gupxx [_t]* M_ Gamzxx [_t]+ M_ gupyy[_t]* M_ Gamzyy[_t]+ M_ gupzz[_t]* M_ Gamzzz[_t]+ - 2*( M_ gupxy[_t]* M_ Gamzxy[_t]+ M_ gupxz[_t]* M_ Gamzxz[_t]+ M_ gupyz[_t]* M_ Gamzyz[_t]); - - - - M_ Gamx_rhs[_t] = M_ Gamx_rhs[_t] + F2o3 * M_ Gamxa[_t]* M_ div_beta[_t] - - M_ Gamxa[_t]* M_ betaxx [_t]- M_ Gamya[_t]* M_ betaxy[_t]- M_ Gamza[_t]* M_ betaxz[_t] + - F1o3 * (M_ gupxx [_t]* M_ fxx [_t] + M_ gupxy[_t]* M_ fxy[_t] + M_ gupxz[_t]* M_ fxz[_t] ) + - M_ gupxx [_t]* M_ gxxx [_t] + M_ gupyy[_t]* M_ gyyx [_t] + M_ gupzz[_t]* M_ gzzx [_t] + - 2 * (M_ gupxy[_t]* M_ gxyx [_t] + M_ gupxz[_t]* M_ gxzx [_t] + M_ gupyz[_t]* M_ gyzx [_t] ); - - M_ Gamy_rhs[_t] = M_ Gamy_rhs[_t] + F2o3 * M_ Gamya[_t]* M_ div_beta[_t] - - M_ Gamxa[_t]* M_ betayx [_t]- M_ Gamya[_t]* M_ betayy[_t]- M_ Gamza[_t]* M_ betayz[_t] + - F1o3 * (M_ gupxy[_t]* M_ fxx [_t] + M_ gupyy[_t]* M_ fxy[_t] + M_ gupyz[_t]* M_ fxz[_t] ) + - M_ gupxx [_t]* M_ gxxy[_t] + M_ gupyy[_t]* M_ gyyy[_t] + M_ gupzz[_t]* M_ gzzy[_t] + - 2 * (M_ gupxy[_t]* M_ gxyy[_t] + M_ gupxz[_t]* M_ gxzy[_t] + M_ gupyz[_t]* M_ gyzy[_t] ); - - M_ Gamz_rhs[_t] = M_ Gamz_rhs[_t] + F2o3 * M_ Gamza[_t]* M_ div_beta[_t] - - M_ Gamxa[_t]* M_ betazx [_t]- M_ Gamya[_t]* M_ betazy[_t]- M_ Gamza[_t]* M_ betazz[_t] + - F1o3 * (M_ gupxz[_t]* M_ fxx [_t] + M_ gupyz[_t]* M_ fxy[_t] + M_ gupzz[_t]* M_ fxz[_t] ) + - M_ gupxx [_t]* M_ gxxz[_t] + M_ gupyy[_t]* M_ gyyz[_t] + M_ gupzz[_t]* M_ gzzz[_t] + - 2 * (M_ gupxy[_t]* M_ gxyz[_t] + M_ gupxz[_t]* M_ gxzz[_t] + M_ gupyz[_t]* M_ gyzz[_t] ) ; //rhs M_ for M_ Gam^i - - //first kind of connection stored in M_ gij,k - M_ gxxx [_t]= M_ gxx [_t]* M_ Gamxxx [_t]+ M_ gxy[_t]* M_ Gamyxx [_t]+ M_ gxz[_t]* M_ Gamzxx[_t]; - M_ gxyx [_t]= M_ gxx [_t]* M_ Gamxxy[_t]+ M_ gxy[_t]* M_ Gamyxy[_t]+ M_ gxz[_t]* M_ Gamzxy[_t]; - M_ gxzx [_t]= M_ gxx [_t]* M_ Gamxxz[_t]+ M_ gxy[_t]* M_ Gamyxz[_t]+ M_ gxz[_t]* M_ Gamzxz[_t]; - M_ gyyx [_t]= M_ gxx [_t]* M_ Gamxyy[_t]+ M_ gxy[_t]* M_ Gamyyy[_t]+ M_ gxz[_t]* M_ Gamzyy[_t]; - M_ gyzx [_t]= M_ gxx [_t]* M_ Gamxyz[_t]+ M_ gxy[_t]* M_ Gamyyz[_t]+ M_ gxz[_t]* M_ Gamzyz[_t]; - M_ gzzx [_t]= M_ gxx [_t]* M_ Gamxzz[_t]+ M_ gxy[_t]* M_ Gamyzz[_t]+ M_ gxz[_t]* M_ Gamzzz[_t]; - M_ gxxy[_t]= M_ gxy[_t]* M_ Gamxxx [_t]+ M_ gyy[_t]* M_ Gamyxx [_t]+ M_ gyz[_t]* M_ Gamzxx[_t]; - M_ gxyy[_t]= M_ gxy[_t]* M_ Gamxxy[_t]+ M_ gyy[_t]* M_ Gamyxy[_t]+ M_ gyz[_t]* M_ Gamzxy[_t]; - M_ gxzy[_t]= M_ gxy[_t]* M_ Gamxxz[_t]+ M_ gyy[_t]* M_ Gamyxz[_t]+ M_ gyz[_t]* M_ Gamzxz[_t]; - M_ gyyy[_t]= M_ gxy[_t]* M_ Gamxyy[_t]+ M_ gyy[_t]* M_ Gamyyy[_t]+ M_ gyz[_t]* M_ Gamzyy[_t]; - M_ gyzy[_t]= M_ gxy[_t]* M_ Gamxyz[_t]+ M_ gyy[_t]* M_ Gamyyz[_t]+ M_ gyz[_t]* M_ Gamzyz[_t]; - M_ gzzy[_t]= M_ gxy[_t]* M_ Gamxzz[_t]+ M_ gyy[_t]* M_ Gamyzz[_t]+ M_ gyz[_t]* M_ Gamzzz[_t]; - M_ gxxz[_t]= M_ gxz[_t]* M_ Gamxxx [_t]+ M_ gyz[_t]* M_ Gamyxx [_t]+ M_ gzz[_t]* M_ Gamzxx[_t]; - M_ gxyz[_t]= M_ gxz[_t]* M_ Gamxxy[_t]+ M_ gyz[_t]* M_ Gamyxy[_t]+ M_ gzz[_t]* M_ Gamzxy[_t]; - M_ gxzz[_t]= M_ gxz[_t]* M_ Gamxxz[_t]+ M_ gyz[_t]* M_ Gamyxz[_t]+ M_ gzz[_t]* M_ Gamzxz[_t]; - M_ gyyz[_t]= M_ gxz[_t]* M_ Gamxyy[_t]+ M_ gyz[_t]* M_ Gamyyy[_t]+ M_ gzz[_t]* M_ Gamzyy[_t]; - M_ gyzz[_t]= M_ gxz[_t]* M_ Gamxyz[_t]+ M_ gyz[_t]* M_ Gamyyz[_t]+ M_ gzz[_t]* M_ Gamzyz[_t]; - M_ gzzz[_t]= M_ gxz[_t]* M_ Gamxzz[_t]+ M_ gyz[_t]* M_ Gamyzz[_t]+ M_ gzz[_t]* M_ Gamzzz[_t]; - - - _t += STEP_SIZE; - } -} - -__global__ void compute_rhs_bssn_part4() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ Rxx [_t]= - HALF *M_ Rxx [_t] + - M_ gxx [_t]* M_ Gamxx[_t] +M_ gxy[_t]* M_ Gamyx [_t] + M_ gxz[_t]* M_ Gamzx [_t]+ - M_ Gamxa[_t]*M_ gxxx [_t]+ M_ Gamya[_t]*M_ gxyx [_t]+ M_ Gamza[_t]*M_ gxzx [_t] + - M_ gupxx [_t]*( - 2*(M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxyx [_t]+ M_ Gamzxx [_t]*M_ gxzx[_t]) + - M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxxy[_t]+ M_ Gamzxx [_t]*M_ gxxz[_t])+ - M_ gupxy[_t]*( - 2*(M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gyyx [_t]+ M_ Gamzxx [_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx[_t]) + - M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxxy[_t]+ M_ Gamzxy[_t]*M_ gxxz[_t] + - M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ - M_ gupxz[_t]*( - 2*(M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gyzx [_t]+ M_ Gamzxx [_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx[_t]) + - M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxxy[_t]+ M_ Gamzxz[_t]*M_ gxxz[_t] + - M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ - M_ gupyy[_t]*( - 2*(M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx[_t]) + - M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ - M_ gupyz[_t]*( - 2*(M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx[_t]) + - M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + - M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ - M_ gupzz[_t]*( - 2*(M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx[_t]) + - M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]); - - M_ Ryy[_t]= - HALF *M_ Ryy[_t] + - M_ gxy[_t]* M_ Gamxy[_t]+ M_ gyy[_t]* M_ Gamyy[_t] + M_ gyz[_t]* M_ Gamzy[_t] + - M_ Gamxa[_t]*M_ gxyy[_t]+ M_ Gamya[_t]*M_ gyyy[_t]+ M_ Gamza[_t]*M_ gyzy[_t] + - M_ gupxx [_t]*( - 2*(M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t]) + - M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ - M_ gupxy[_t]*( - 2*(M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + - M_ Gamxyy[_t]*M_ gxxy[_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxzy[_t]) + - M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxyz[_t] + - M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ - M_ gupxz[_t]*( - 2*(M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t]) + - M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxyz[_t] + - M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ - M_ gupyy[_t]*( - 2*(M_ Gamxyy[_t]*M_ gxyy[_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyzy[_t]) + - M_ Gamxyy[_t]*M_ gyyx [_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyyz[_t])+ - M_ gupyz[_t]*( - 2*(M_ Gamxyy[_t]*M_ gxzy[_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t]) + - M_ Gamxyz[_t]*M_ gyyx [_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyyz[_t] + - M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ - M_ gupzz[_t]*( - 2*(M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t]) + - M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]); - - M_ Rzz[_t]= - HALF *M_ Rzz[_t] + - M_ gxz[_t]* M_ Gamxz[_t] +M_ gyz[_t]* M_ Gamyz[_t] + M_ gzz[_t]* M_ Gamzz[_t] + - M_ Gamxa[_t]*M_ gxzz[_t]+ M_ Gamya[_t]*M_ gyzz[_t]+ M_ Gamza[_t]*M_ gzzz[_t] + - M_ gupxx [_t]*( - 2*(M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]) + - M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t])+ - M_ gupxy[_t]*( - 2*(M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + - M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t]) + - M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + - M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t])+ - M_ gupxz[_t]*( - 2*(M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + - M_ Gamxzz[_t]*M_ gxxz[_t]+ M_ Gamyzz[_t]*M_ gxyz[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t]) + - M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gxzy[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t] + - M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t])+ - M_ gupyy[_t]*( - 2*(M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]) + - M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t])+ - M_ gupyz[_t]*( - 2*(M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + - M_ Gamxzz[_t]*M_ gxyz[_t]+ M_ Gamyzz[_t]*M_ gyyz[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t]) + - M_ Gamxzz[_t]*M_ gyzx [_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t] + - M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t])+ - M_ gupzz[_t]*( - 2*(M_ Gamxzz[_t]*M_ gxzz[_t]+ M_ Gamyzz[_t]*M_ gyzz[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]) + - M_ Gamxzz[_t]*M_ gzzx [_t]+ M_ Gamyzz[_t]*M_ gzzy[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]); - - M_ Rxy[_t]= HALF*( -M_ Rxy[_t] + - M_ gxx [_t]* M_ Gamxy[_t]+ M_ gxy[_t]* M_ Gamyy[_t]+M_ gxz[_t]* M_ Gamzy[_t] + - M_ gxy[_t]* M_ Gamxx [_t]+ M_ gyy[_t]* M_ Gamyx [_t]+M_ gyz[_t]* M_ Gamzx [_t] + - M_ Gamxa[_t]*M_ gxyx [_t]+ M_ Gamya[_t]*M_ gyyx [_t]+ M_ Gamza[_t]*M_ gyzx [_t] + - M_ Gamxa[_t]*M_ gxxy[_t]+ M_ Gamya[_t]*M_ gxyy[_t]+ M_ Gamza[_t]*M_ gxzy[_t])+ - M_ gupxx [_t]*( - M_ Gamxxx [_t]*M_ gxxy[_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxzy[_t] + - M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ - M_ gupxy[_t]*( - M_ Gamxxx [_t]*M_ gxyy[_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyzy[_t] + - M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t] + - M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t] + - M_ Gamxyy[_t]*M_ gxxx [_t]+ M_ Gamyyy[_t]*M_ gxyx [_t]+ M_ Gamzyy[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gyyx [_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyyz[_t])+ - M_ gupxz[_t]*( - M_ Gamxxx [_t]*M_ gxzy[_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gzzy[_t] + - M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + - M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + - M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ - M_ gupyy[_t]*( - M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + - M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gyyx [_t]+ M_ Gamzyy[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ - M_ gupyz[_t]*( - M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + - M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gyzx [_t]+ M_ Gamzyy[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gyyx [_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyyz[_t] + - M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + - M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ - M_ gupzz[_t]*( - M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t]); - - M_ Rxz[_t]= HALF*( -M_ Rxz[_t] + - M_ gxx [_t]* M_ Gamxz[_t]+ M_ gxy[_t]* M_ Gamyz[_t]+M_ gxz[_t]* M_ Gamzz[_t] + - M_ gxz[_t]* M_ Gamxx [_t]+ M_ gyz[_t]* M_ Gamyx [_t]+M_ gzz[_t]* M_ Gamzx [_t] + - M_ Gamxa[_t]*M_ gxzx [_t]+ M_ Gamya[_t]*M_ gyzx [_t]+ M_ Gamza[_t]*M_ gzzx [_t] + - M_ Gamxa[_t]*M_ gxxz[_t]+ M_ Gamya[_t]*M_ gxyz[_t]+ M_ Gamza[_t]*M_ gxzz[_t])+ - M_ gupxx [_t]*( - M_ Gamxxx [_t]*M_ gxxz[_t]+ M_ Gamyxx [_t]*M_ gxyz[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t] + - M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ - M_ gupxy[_t]*( - M_ Gamxxx [_t]*M_ gxyz[_t]+ M_ Gamyxx [_t]*M_ gyyz[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t] + - M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + - M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + - M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ - M_ gupxz[_t]*( - M_ Gamxxx [_t]*M_ gxzz[_t]+ M_ Gamyxx [_t]*M_ gyzz[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t] + - M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + - M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + - M_ Gamxzz[_t]*M_ gxxx [_t]+ M_ Gamyzz[_t]*M_ gxyx [_t]+ M_ Gamzzz[_t]*M_ gxzx [_t] + - M_ Gamxxx [_t]*M_ gzzx [_t]+ M_ Gamyxx [_t]*M_ gzzy[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t])+ - M_ gupyy[_t]*( - M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + - M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ - M_ gupyz[_t]*( - M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + - M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + - M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + - M_ Gamxzz[_t]*M_ gxyx [_t]+ M_ Gamyzz[_t]*M_ gyyx [_t]+ M_ Gamzzz[_t]*M_ gyzx [_t] + - M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ - M_ gupzz[_t]*( - M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + - M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gyzx [_t]+ M_ Gamzzz[_t]*M_ gzzx [_t] + - M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t]); - - M_ Ryz[_t]= HALF*( -M_ Ryz[_t] + - M_ gxy[_t]* M_ Gamxz[_t]+M_ gyy[_t]* M_ Gamyz[_t]+M_ gyz[_t]* M_ Gamzz[_t] + - M_ gxz[_t]* M_ Gamxy[_t]+M_ gyz[_t]* M_ Gamyy[_t]+M_ gzz[_t]* M_ Gamzy[_t] + - M_ Gamxa[_t]*M_ gxzy[_t]+ M_ Gamya[_t]*M_ gyzy[_t]+ M_ Gamza[_t]*M_ gzzy[_t] + - M_ Gamxa[_t]*M_ gxyz[_t]+ M_ Gamya[_t]*M_ gyyz[_t]+ M_ Gamza[_t]*M_ gyzz[_t])+ - M_ gupxx [_t]*( - M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + - M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + - M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ - M_ gupxy[_t]*( - M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + - M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + - M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gxzy[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + - M_ Gamxyy[_t]*M_ gxxz[_t]+ M_ Gamyyy[_t]*M_ gxyz[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + - M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t] + - M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ - M_ gupxz[_t]*( - M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + - M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + - M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + - M_ Gamxzz[_t]*M_ gxxy[_t]+ M_ Gamyzz[_t]*M_ gxyy[_t]+ M_ Gamzzz[_t]*M_ gxzy[_t] + - M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ - M_ gupyy[_t]*( - M_ Gamxyy[_t]*M_ gxyz[_t]+ M_ Gamyyy[_t]*M_ gyyz[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t] + - M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t] + - M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ - M_ gupyz[_t]*( - M_ Gamxyy[_t]*M_ gxzz[_t]+ M_ Gamyyy[_t]*M_ gyzz[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t] + - M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + - M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + - M_ Gamxzz[_t]*M_ gxyy[_t]+ M_ Gamyzz[_t]*M_ gyyy[_t]+ M_ Gamzzz[_t]*M_ gyzy[_t] + - M_ Gamxyy[_t]*M_ gzzx [_t]+ M_ Gamyyy[_t]*M_ gzzy[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t])+ - M_ gupzz[_t]*( - M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + - M_ Gamxzz[_t]*M_ gxzy[_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gzzy[_t] + - M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t]); - - _t += STEP_SIZE; - } -} -__global__ void compute_rhs_bssn_part5() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx [_t]* M_ chix [_t]- M_ Gamyxx [_t]* M_ chiy[_t]- M_ Gamzxx [_t]* M_ chiz[_t]; - M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]* M_ chix [_t]- M_ Gamyxy[_t]* M_ chiy[_t]- M_ Gamzxy[_t]* M_ chiz[_t]; - M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]* M_ chix [_t]- M_ Gamyxz[_t]* M_ chiy[_t]- M_ Gamzxz[_t]* M_ chiz[_t]; - M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]* M_ chix [_t]- M_ Gamyyy[_t]* M_ chiy[_t]- M_ Gamzyy[_t]* M_ chiz[_t]; - M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]* M_ chix [_t]- M_ Gamyyz[_t]* M_ chiy[_t]- M_ Gamzyz[_t]* M_ chiz[_t]; - M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]* M_ chix [_t]- M_ Gamyzz[_t]* M_ chiy[_t]- M_ Gamzzz[_t]* M_ chiz[_t]; - // M_ Store D^l D_l M_ chi - 3/(2*M_ chi) D^l M_ chi D_l M_ chi inM_ f[_t] - - M_ f[_t] = M_ gupxx [_t]* (M_ fxx [_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chix [_t]) + - M_ gupyy[_t]* (M_ fyy[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiy[_t]) + - M_ gupzz[_t]* (M_ fzz[_t]- F3o2/M_ chin1[_t] * M_ chiz[_t]* M_ chiz[_t]) + - 2 *M_ gupxy[_t]* (M_ fxy[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiy[_t]) + - 2 *M_ gupxz[_t]* (M_ fxz[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiz[_t]) + - 2 *M_ gupyz[_t]* (M_ fyz[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiz[_t]); - // M_ Add M_ chi part toM_ Ricci tensor: - - M_ Rxx [_t]=M_ Rxx [_t]+ (M_ fxx [_t]- M_ chix[_t]*M_ chix[_t]/M_ chin1[_t]/2 +M_ gxx [_t]*M_ f[_t])/M_ chin1[_t]/2; - M_ Ryy[_t]=M_ Ryy[_t]+ (M_ fyy[_t]- M_ chiy[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gyy[_t]*M_ f[_t])/M_ chin1[_t]/2; - M_ Rzz[_t]=M_ Rzz[_t]+ (M_ fzz[_t]- M_ chiz[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gzz[_t]*M_ f[_t])/M_ chin1[_t]/2; - M_ Rxy[_t]=M_ Rxy[_t]+ (M_ fxy[_t]- M_ chix[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gxy[_t]*M_ f[_t])/M_ chin1[_t]/2; - M_ Rxz[_t]=M_ Rxz[_t]+ (M_ fxz[_t]- M_ chix[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gxz[_t]*M_ f[_t])/M_ chin1[_t]/2; - M_ Ryz[_t]=M_ Ryz[_t]+ (M_ fyz[_t]- M_ chiy[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gyz[_t]*M_ f[_t])/M_ chin1[_t]/2; - - - _t += STEP_SIZE; - } -} - -__global__ void compute_rhs_bssn_part6() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ gxxx [_t]= (M_ gupxx [_t]* M_ chix [_t]+M_ gupxy[_t]* M_ chiy[_t]+M_ gupxz[_t]* M_ chiz[_t])/M_ chin1[_t]; - M_ gxxy[_t]= (M_ gupxy[_t]* M_ chix [_t]+M_ gupyy[_t]* M_ chiy[_t]+M_ gupyz[_t]* M_ chiz[_t])/M_ chin1[_t]; - M_ gxxz[_t]= (M_ gupxz[_t]* M_ chix [_t]+M_ gupyz[_t]* M_ chiy[_t]+M_ gupzz[_t]* M_ chiz[_t])/M_ chin1[_t]; - // nowM_ get physical second kind of connection - M_ Gamxxx [_t]= M_ Gamxxx [_t]- ( (M_ chix [_t]+ M_ chix[_t])/M_ chin1[_t] -M_ gxx [_t]*M_ gxxx [_t])*HALF; - M_ Gamyxx [_t]= M_ Gamyxx [_t]- ( -M_ gxx [_t]*M_ gxxy[_t])*HALF; - M_ Gamzxx [_t]= M_ Gamzxx [_t]- ( -M_ gxx [_t]*M_ gxxz[_t])*HALF; - M_ Gamxyy[_t]= M_ Gamxyy[_t]- ( -M_ gyy[_t]*M_ gxxx [_t])*HALF; - M_ Gamyyy[_t]= M_ Gamyyy[_t]- ( (M_ chiy[_t]+ M_ chiy[_t])/M_ chin1[_t] -M_ gyy[_t]*M_ gxxy[_t])*HALF; - M_ Gamzyy[_t]= M_ Gamzyy[_t]- ( -M_ gyy[_t]*M_ gxxz[_t])*HALF; - M_ Gamxzz[_t]= M_ Gamxzz[_t]- ( -M_ gzz[_t]*M_ gxxx [_t])*HALF; - M_ Gamyzz[_t]= M_ Gamyzz[_t]- ( -M_ gzz[_t]*M_ gxxy[_t])*HALF; - M_ Gamzzz[_t]= M_ Gamzzz[_t]- ( (M_ chiz[_t]+ M_ chiz[_t])/M_ chin1[_t] -M_ gzz[_t]*M_ gxxz[_t])*HALF; - M_ Gamxxy[_t]= M_ Gamxxy[_t]- ( M_ chiy[_t] /M_ chin1[_t] -M_ gxy[_t]*M_ gxxx [_t])*HALF; - M_ Gamyxy[_t]= M_ Gamyxy[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxy[_t]*M_ gxxy[_t])*HALF; - M_ Gamzxy[_t]= M_ Gamzxy[_t]- ( -M_ gxy[_t]*M_ gxxz[_t])*HALF; - M_ Gamxxz[_t]= M_ Gamxxz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gxz[_t]*M_ gxxx [_t])*HALF; - M_ Gamyxz[_t]= M_ Gamyxz[_t]- ( -M_ gxz[_t]*M_ gxxy[_t])*HALF; - M_ Gamzxz[_t]= M_ Gamzxz[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxz[_t]*M_ gxxz[_t])*HALF; - M_ Gamxyz[_t]= M_ Gamxyz[_t]- ( -M_ gyz[_t]*M_ gxxx [_t])*HALF; - M_ Gamyyz[_t]= M_ Gamyyz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gyz[_t]*M_ gxxy[_t])*HALF; - M_ Gamzyz[_t]= M_ Gamzyz[_t]- ( M_ chiy[_t]/M_ chin1[_t] -M_ gyz[_t]*M_ gxxz[_t])*HALF; - - M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx[_t]*M_ Lapx [_t]- M_ Gamyxx[_t]*M_ Lapy[_t]- M_ Gamzxx[_t]*M_ Lapz[_t]; - M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]*M_ Lapx [_t]- M_ Gamyyy[_t]*M_ Lapy[_t]- M_ Gamzyy[_t]*M_ Lapz[_t]; - M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]*M_ Lapx [_t]- M_ Gamyzz[_t]*M_ Lapy[_t]- M_ Gamzzz[_t]*M_ Lapz[_t]; - M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]*M_ Lapx [_t]- M_ Gamyxy[_t]*M_ Lapy[_t]- M_ Gamzxy[_t]*M_ Lapz[_t]; - M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]*M_ Lapx [_t]- M_ Gamyxz[_t]*M_ Lapy[_t]- M_ Gamzxz[_t]*M_ Lapz[_t]; - M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]*M_ Lapx [_t]- M_ Gamyyz[_t]*M_ Lapy[_t]- M_ Gamzyz[_t]*M_ Lapz[_t]; - - // store D^i D_i Lap in M_ trK_rhs[_t] upto M_ chi - M_ trK_rhs[_t] = M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ - 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]); - // M_ Add lapse and M_ S_ij parts toM_ Ricci tensor: - - //follow bam code - M_ S[_t] = M_ chin1[_t] * ( M_ gupxx[_t] * M_ Sxx[_t] + M_ gupyy[_t] * M_ Syy[_t] + M_ gupzz[_t] * M_ Szz[_t] + - - 2 * ( M_ gupxy[_t] * M_ Sxy[_t] + M_ gupxz[_t] * M_ Sxz[_t] + M_ gupyz[_t] * M_ Syz[_t] ) ); - - -M_ f[_t] = F2o3 * M_ trK[_t] * M_ trK[_t] -( - - M_ gupxx[_t] * ( - - M_ gupxx[_t] * M_ Axx[_t] * M_ Axx[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Axy[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Axz[_t] + - - 2 * (M_ gupxy[_t] * M_ Axx[_t] * M_ Axy[_t] + M_ gupxz[_t] * M_ Axx[_t] * M_ Axz[_t] + M_ gupyz[_t] * M_ Axy[_t] * M_ Axz[_t]) ) + - - M_ gupyy[_t] * ( - - M_ gupxx[_t] * M_ Axy[_t] * M_ Axy[_t] + M_ gupyy[_t] * M_ Ayy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Ayz[_t] * M_ Ayz[_t] + - - 2 * (M_ gupxy[_t] * M_ Axy[_t] * M_ Ayy[_t] + M_ gupxz[_t] * M_ Axy[_t] * M_ Ayz[_t] + M_ gupyz[_t] * M_ Ayy[_t] * M_ Ayz[_t]) ) + - - M_ gupzz[_t] * ( - - M_ gupxx[_t] * M_ Axz[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Ayz[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Azz[_t] * M_ Azz[_t] + - - 2 * (M_ gupxy[_t] * M_ Axz[_t] * M_ Ayz[_t] + M_ gupxz[_t] * M_ Axz[_t] * M_ Azz[_t] + M_ gupyz[_t] * M_ Ayz[_t] * M_ Azz[_t]) ) + - - 2 * ( - - M_ gupxy[_t] * ( - - M_ gupxx[_t] * M_ Axx[_t] * M_ Axy[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Ayz[_t] + - - M_ gupxy[_t] * (M_ Axx[_t] * M_ Ayy[_t] + M_ Axy[_t] * M_ Axy[_t]) + - - M_ gupxz[_t] * (M_ Axx[_t] * M_ Ayz[_t] + M_ Axz[_t] * M_ Axy[_t]) + - - M_ gupyz[_t] * (M_ Axy[_t] * M_ Ayz[_t] + M_ Axz[_t] * M_ Ayy[_t]) ) + - - M_ gupxz[_t] * ( - - M_ gupxx[_t] * M_ Axx[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Azz[_t] + - - M_ gupxy[_t] * (M_ Axx[_t] * M_ Ayz[_t] + M_ Axy[_t] * M_ Axz[_t]) + - - M_ gupxz[_t] * (M_ Axx[_t] * M_ Azz[_t] + M_ Axz[_t] * M_ Axz[_t]) + - - M_ gupyz[_t] * (M_ Axy[_t] * M_ Azz[_t] + M_ Axz[_t] * M_ Ayz[_t]) ) + - - M_ gupyz[_t] * ( - - M_ gupxx[_t] * M_ Axy[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Ayy[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Ayz[_t] * M_ Azz[_t] + - - M_ gupxy[_t] * (M_ Axy[_t] * M_ Ayz[_t] + M_ Ayy[_t] * M_ Axz[_t]) + - - M_ gupxz[_t] * (M_ Axy[_t] * M_ Azz[_t] + M_ Ayz[_t] * M_ Axz[_t]) + - - M_ gupyz[_t] * (M_ Ayy[_t] * M_ Azz[_t] + M_ Ayz[_t] * M_ Ayz[_t]) ) )) -16 * PI * M_ rho[_t] + 8 * PI * M_ S[_t]; - - - M_ f[_t] = - F1o3 *( M_ gupxx[_t] * M_ fxx[_t] + M_ gupyy[_t] * M_ fyy[_t] + M_ gupzz[_t] * M_ fzz[_t] + - - 2* ( M_ gupxy[_t] * M_ fxy[_t] + M_ gupxz[_t] * M_ fxz[_t] + M_ gupyz[_t] * M_ fyz[_t] ) + M_ alpn1[_t] / M_ chin1[_t] * M_ f[_t]); - - - - M_ fxx[_t] = M_ alpn1[_t] * (M_ Rxx[_t] - 8 * PI * M_ Sxx[_t]) - M_ fxx[_t]; - - M_ fxy[_t] = M_ alpn1[_t] * (M_ Rxy[_t] - 8 * PI * M_ Sxy[_t]) - M_ fxy[_t]; - - M_ fxz[_t] = M_ alpn1[_t] * (M_ Rxz[_t] - 8 * PI * M_ Sxz[_t]) - M_ fxz[_t]; - - M_ fyy[_t] = M_ alpn1[_t] * (M_ Ryy[_t] - 8 * PI * M_ Syy[_t]) - M_ fyy[_t]; - - M_ fyz[_t] = M_ alpn1[_t] * (M_ Ryz[_t] - 8 * PI * M_ Syz[_t]) - M_ fyz[_t]; - - M_ fzz[_t] = M_ alpn1[_t] * (M_ Rzz[_t] - 8 * PI * M_ Szz[_t]) - M_ fzz[_t]; - /* - M_ fxx [_t]= M_ alpn1[_t]* (M_ Rxx [_t]- 8 * PI * M_ Sxx[_t]) -M_ fxx[_t]; - M_ fxy[_t]= M_ alpn1[_t]* (M_ Rxy[_t]- 8 * PI * M_ Sxy[_t]) -M_ fxy[_t]; - M_ fxz[_t]= M_ alpn1[_t]* (M_ Rxz[_t]- 8 * PI * M_ Sxz[_t]) -M_ fxz[_t]; - M_ fyy[_t]= M_ alpn1[_t]* (M_ Ryy[_t]- 8 * PI * M_ Syy[_t]) -M_ fyy[_t]; - M_ fyz[_t]= M_ alpn1[_t]* (M_ Ryz[_t]- 8 * PI * M_ Syz[_t]) -M_ fyz[_t]; - M_ fzz[_t]= M_ alpn1[_t]* (M_ Rzz[_t]- 8 * PI * M_ Szz[_t]) -M_ fzz[_t]; - - // Compute trace-free part (note: M_ chi^-1 and M_ chi cancel//): - - M_ f[_t] = F1o3 *( M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ - 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) ); - */ - M_ Axx_rhs[_t] =M_ fxx [_t]-M_ gxx [_t]*M_ f[_t]; - M_ Ayy_rhs[_t] =M_ fyy[_t]-M_ gyy[_t]*M_ f[_t]; - M_ Azz_rhs[_t] =M_ fzz[_t]-M_ gzz[_t]*M_ f[_t]; - M_ Axy_rhs[_t] =M_ fxy[_t]-M_ gxy[_t]*M_ f[_t]; - M_ Axz_rhs[_t] =M_ fxz[_t]-M_ gxz[_t]*M_ f[_t]; - M_ Ayz_rhs[_t] =M_ fyz[_t]-M_ gyz[_t]*M_ f[_t]; - - // Now: store M_ A_il M_ A^l_j intoM_ fij: - - M_ fxx [_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]); - - M_ fyy[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]); - - M_ fzz[_t]= M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]); - - M_ fxy[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ - M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + - M_ gupxz[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + - M_ gupyz[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]); - M_ fxz[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ - M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + - M_ gupxz[_t]*(M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + - M_ gupyz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]); - M_ fyz[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ - M_ gupxy[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + - M_ gupxz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + - M_ gupyz[_t]*(M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]); - - M_ f[_t] = M_ chin1[_t]; - // store D^i D_i Lap in M_ trK_rhs[_t] - M_ trK_rhs[_t] =M_ f[_t]*M_ trK_rhs[_t]; - - M_ Axx_rhs[_t] = M_ f[_t] * M_ Axx_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Axx [_t]- 2 *M_ fxx[_t]) + - 2 * ( M_ Axx [_t]* M_ betaxx [_t]+ M_ Axy[_t]* M_ betayx [_t]+ M_ Axz[_t]* M_ betazx [_t])- - F2o3 * M_ Axx [_t]* M_ div_beta[_t]; - - M_ Ayy_rhs[_t] = M_ f[_t] * M_ Ayy_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Ayy[_t]- 2 *M_ fyy[_t]) + - 2 * ( M_ Axy[_t]* M_ betaxy[_t]+ M_ Ayy[_t]* M_ betayy[_t]+ M_ Ayz[_t]* M_ betazy[_t])- - F2o3 * M_ Ayy[_t]* M_ div_beta[_t]; - - M_ Azz_rhs[_t] = M_ f[_t] * M_ Azz_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Azz[_t]- 2 *M_ fzz[_t]) + - 2 * ( M_ Axz[_t]* M_ betaxz[_t]+ M_ Ayz[_t]* M_ betayz[_t]+ M_ Azz[_t]* M_ betazz[_t])- - F2o3 * M_ Azz[_t]* M_ div_beta[_t]; - - M_ Axy_rhs[_t] = M_ f[_t] * M_ Axy_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axy[_t] - 2 *M_ fxy[_t])+ - M_ Axx [_t]* M_ betaxy[_t] + M_ Axz[_t]* M_ betazy[_t] + - M_ Ayy[_t]* M_ betayx [_t]+ M_ Ayz[_t]* M_ betazx [_t] + - F1o3 * M_ Axy[_t]* M_ div_beta[_t] - M_ Axy[_t]* M_ betazz[_t]; - - M_ Ayz_rhs[_t] = M_ f[_t] * M_ Ayz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Ayz[_t] - 2 *M_ fyz[_t])+ - M_ Axy[_t]* M_ betaxz[_t]+ M_ Ayy[_t]* M_ betayz[_t] + - M_ Axz[_t]* M_ betaxy[_t] + M_ Azz[_t]* M_ betazy[_t] + - F1o3 * M_ Ayz[_t]* M_ div_beta[_t] - M_ Ayz[_t]* M_ betaxx[_t]; - - M_ Axz_rhs[_t] = M_ f[_t] * M_ Axz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axz[_t] - 2 *M_ fxz[_t])+ - M_ Axx [_t]* M_ betaxz[_t]+ M_ Axy[_t]* M_ betayz[_t] + - M_ Ayz[_t]* M_ betayx [_t]+ M_ Azz[_t]* M_ betazx [_t] + - F1o3 * M_ Axz[_t]* M_ div_beta[_t] - M_ Axz[_t]* M_ betayy[_t] ; //rhsM_ for M_ Aij - - // Compute trace of M_ S_ij - - M_ S[_t] = M_ f[_t] * (M_ gupxx [_t]* M_ Sxx [_t]+M_ gupyy[_t]* M_ Syy[_t]+M_ gupzz[_t]* M_ Szz[_t]+ - 2 * (M_ gupxy[_t]* M_ Sxy[_t]+M_ gupxz[_t]* M_ Sxz[_t]+M_ gupyz[_t]* M_ Syz[_t]) ); - - M_ trK_rhs[_t] = - M_ trK_rhs[_t] + M_ alpn1[_t]*( F1o3 * M_ trK[_t]* M_ trK[_t] + - M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t] + - 2 * (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) + - 4 * PI * ( M_ rho[_t] + M_ S[_t] )) ; //rhsM_ for M_ trK[_t] - - ////////M_ gauge variable part - - M_ Lap_rhs[_t] = -2*M_ alpn1[_t] * M_ trK[_t]; - -#if (GAUGE == 0) - M_ betax_rhs[_t] =0.75*M_ dtSfx[_t]; - M_ betay_rhs[_t] =0.75*M_ dtSfy[_t]; - M_ betaz_rhs[_t] =0.75*M_ dtSfz[_t]; - - M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] -2*M_ dtSfx[_t]; - M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] -2*M_ dtSfy[_t]; - M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] -2*M_ dtSfz[_t]; - -#elif (GAUGE == 1) - M_ betax_rhs[_t] =M_ Gamx[_t] - 2 * M_ betax[_t] ; - - M_ betay_rhs[_t] =M_ Gamy[_t] - 2 * M_ betay[_t] ; - - M_ betaz_rhs[_t] =M_ Gamz[_t] - 2 * M_ betaz[_t] ; - - M_ dtSfx_rhs[_t] = 0; - M_ dtSfy_rhs[_t] = 0; - M_ dtSfz_rhs[_t] = 0; - -#elif (GAUGE == 2 || GAUGE == 3) - - M_ betax_rhs[_t] = 0.75* M_ dtSfx[_t]; - - M_ betay_rhs[_t] = 0.75* M_ dtSfy[_t]; - - M_ betaz_rhs[_t] = 0.75* M_ dtSfz[_t]; - -#elif (GAUGE == 6) - if(BHN==2) - { - int k = _t / _2D_SIZE[0]; - int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - r1 = ( pow2((Porg[0]-X[i]))+ pow2((Porg[1]-Y[j]))+ pow2((Porg[2]-Z[k])) ) / - - ( pow2((Porg[0]-Porg[3]))+ pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); - - - r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ - - ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); - - - reta[i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1/(1 + 12 * r1) + C2/(1 + 12 *r2); - }//BHN == 2 - - M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; - - M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; - - M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; - - - - M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t] * M_ dtSfx[_t]; - - M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t] * M_ dtSfy[_t]; - - M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t] * M_ dtSfz[_t]; - -#elif (GAUGE == 7) - if(BHN==2){ - int k = _t / _2D_SIZE[0]; - int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; - int j = ps / ex_c[0]; - int i = ps - (j * ex_c[0]); - - r1 = ( pow2((Porg[0]-X[i])) + pow2((Porg[1]-Y[j])) + pow2((Porg[2]-Z[k])) )/ - - ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); - - - r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ - - ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); - - - M_ reta[_t][i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1* exp(-12 *r1) + C2*exp(- 12*r2); - }//BHN ==2 - - M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; - - M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; - - M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; - - - - M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]*M_ dtSfx[_t]; - - M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]*M_ dtSfy[_t]; - - M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]*M_ dtSfz[_t]; - -#endif //if (GAUGE == ?) - - _t += STEP_SIZE; - } -} - -__global__ void compute_rhs_bssn_part6_gauge() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { -#if (GAUGE == 2) - M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + - - 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); - - - M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow2( ( 1-sqrt(M_ chin1[_t]) ) ); - - - M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; - - M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; - - M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; - -#elif (GAUGE == 3) - M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] - + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + - - 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + - M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + - M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); - - - M_ reta[_t] = 1.13/2 * sqrt( M_ reta[_t]/ M_ chin1[_t])/ pow2((1-M_ chin1[_t])); - - M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; - - M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; - - M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; - -#elif (GAUGE == 4) - M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * - M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + - - 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * - M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); - - - M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow( (1-sqrt(M_ chin1[_t]))); - - - M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; - - M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; - - M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; - -#elif (GAUGE == 5) - M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + - - 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); - - - M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1)/ pow( (1-M_ chin1[_t]) ); - - M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; - - M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; - - M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; - - - - M_ dtSfx_rhs[_t] = 0; - - M_ dtSfy_rhs[_t] = 0; - - M_ dtSfz_rhs[_t] = 0; -#endif - _t += STEP_SIZE; - } -} -__global__ void compute_rhs_bssn_part7() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ ham_Res[_t] = M_ gupxx [_t]* M_ Rxx [_t]+ M_ gupyy[_t]* M_ Ryy[_t]+ M_ gupzz[_t]* M_ Rzz[_t]+ - 2* ( M_ gupxy[_t]* M_ Rxy[_t]+ M_ gupxz[_t]* M_ Rxz[_t]+ M_ gupyz[_t]* M_ Ryz[_t]); - - M_ ham_Res[_t] = M_ chin1[_t]*M_ ham_Res[_t] + F2o3 * M_ trK[_t] * M_ trK[_t] -( - M_ gupxx [_t]* ( - M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]) ) + - M_ gupyy[_t]* ( - M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]) ) + - M_ gupzz[_t]* ( - M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ - 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+ M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+ M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]) ) + - 2 * ( - M_ gupxy[_t]* ( - M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ - M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + - M_ gupxz[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + - M_ gupyz[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]) ) + - M_ gupxz[_t]* ( - M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ - M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + - M_ gupxz[_t]* (M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + - M_ gupyz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]) ) + - M_ gupyz[_t]* ( - M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ - M_ gupxy[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + - M_ gupxz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + - M_ gupyz[_t]* (M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]) ) ))- 16 * PI * M_ rho[_t]; - - _t += STEP_SIZE; - } -} -__global__ void compute_rhs_bssn_part8() -{ - int _t = blockIdx.x*blockDim.x+threadIdx.x; - while(_t < _3D_SIZE[0]) - { - M_ gxxx [_t]= M_ gxxx [_t]- ( M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t] - + M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t]) - M_ chix[_t]*M_ Axx[_t]/M_ chin1[_t]; - - M_ gxyx [_t]= M_ gxyx [_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] - + M_ Gamxxx [_t]* M_ Axy[_t]+ M_ Gamyxx [_t]* M_ Ayy[_t]+ M_ Gamzxx [_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Axy[_t]/M_ chin1[_t]; - - M_ gxzx [_t]= M_ gxzx [_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] - + M_ Gamxxx [_t]* M_ Axz[_t]+ M_ Gamyxx [_t]* M_ Ayz[_t]+ M_ Gamzxx [_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Axz[_t]/M_ chin1[_t]; - - M_ gyyx [_t]= M_ gyyx [_t]- ( M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t] - + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Ayy[_t]/M_ chin1[_t]; - - M_ gyzx [_t]= M_ gyzx [_t]- ( M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t] - + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Ayz[_t]/M_ chin1[_t]; - - M_ gzzx [_t]= M_ gzzx [_t]- ( M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t] - + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Azz[_t]/M_ chin1[_t]; - - M_ gxxy[_t]= M_ gxxy[_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] - + M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t]) - M_ chiy[_t]*M_ Axx[_t]/M_ chin1[_t]; - - M_ gxyy[_t]= M_ gxyy[_t]- ( M_ Gamxyy[_t]* M_ Axx [_t]+ M_ Gamyyy[_t]* M_ Axy[_t]+ M_ Gamzyy[_t]* M_ Axz[_t] - + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Axy[_t]/M_ chin1[_t]; - - M_ gxzy[_t]= M_ gxzy[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] - + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Axz[_t]/M_ chin1[_t]; - - M_ gyyy[_t]= M_ gyyy[_t]- ( M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t] - + M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Ayy[_t]/M_ chin1[_t]; - - M_ gyzy[_t]= M_ gyzy[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] - + M_ Gamxyy[_t]* M_ Axz[_t]+ M_ Gamyyy[_t]* M_ Ayz[_t]+ M_ Gamzyy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Ayz[_t]/M_ chin1[_t]; - - M_ gzzy[_t]= M_ gzzy[_t]- ( M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t] - + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Azz[_t]/M_ chin1[_t]; - - M_ gxxz[_t]= M_ gxxz[_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] - + M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t]) - M_ chiz[_t]*M_ Axx[_t]/M_ chin1[_t]; - - M_ gxyz[_t]= M_ gxyz[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] - + M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Axy[_t]/M_ chin1[_t]; - - M_ gxzz[_t]= M_ gxzz[_t]- ( M_ Gamxzz[_t]* M_ Axx [_t]+ M_ Gamyzz[_t]* M_ Axy[_t]+ M_ Gamzzz[_t]* M_ Axz[_t] - + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Axz[_t]/M_ chin1[_t]; - - M_ gyyz[_t]= M_ gyyz[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] - + M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Ayy[_t]/M_ chin1[_t]; - - M_ gyzz[_t]= M_ gyzz[_t]- ( M_ Gamxzz[_t]* M_ Axy[_t]+ M_ Gamyzz[_t]* M_ Ayy[_t]+ M_ Gamzzz[_t]* M_ Ayz[_t] - + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Ayz[_t]/M_ chin1[_t]; - - M_ gzzz[_t]= M_ gzzz[_t]- ( M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t] - + M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Azz[_t]/M_ chin1[_t]; - - M_ movx_Res[_t] = M_ gupxx[_t]*M_ gxxx [_t]+ M_ gupyy[_t]*M_ gxyy[_t]+ M_ gupzz[_t]*M_ gxzz[_t] - +M_ gupxy[_t]*M_ gxyx [_t]+ M_ gupxz[_t]*M_ gxzx [_t]+ M_ gupyz[_t]*M_ gxzy[_t] - +M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*M_ gxyz[_t]; - M_ movy_Res[_t] = M_ gupxx[_t]*M_ gxyx [_t]+ M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*M_ gyzz[_t] - +M_ gupxy[_t]*M_ gyyx [_t]+ M_ gupxz[_t]*M_ gyzx [_t]+ M_ gupyz[_t]*M_ gyzy[_t] - +M_ gupxy[_t]*M_ gxyy[_t]+ M_ gupxz[_t]*M_ gxyz[_t]+ M_ gupyz[_t]*M_ gyyz[_t]; - - M_ movz_Res[_t] = M_ gupxx[_t]*M_ gxzx [_t]+ M_ gupyy[_t]*M_ gyzy[_t]+ M_ gupzz[_t]*M_ gzzz[_t] - +M_ gupxy[_t]*M_ gyzx [_t]+ M_ gupxz[_t]*M_ gzzx [_t]+ M_ gupyz[_t]*M_ gzzy[_t] - +M_ gupxy[_t]*M_ gxzy[_t]+ M_ gupxz[_t]*M_ gxzz[_t]+ M_ gupyz[_t]*M_ gyzz[_t]; - - M_ movx_Res[_t] = M_ movx_Res[_t] - F2o3*M_ Kx [_t]- 8*PI*M_ Sx[_t]; - M_ movy_Res[_t] = M_ movy_Res[_t] - F2o3*M_ Ky[_t]- 8*PI*M_ Sy[_t]; - M_ movz_Res[_t] = M_ movz_Res[_t] - F2o3*M_ Kz[_t]- 8*PI*M_ Sz[_t]; - - _t += STEP_SIZE; - } -} - - - -__global__ void device_test(double * result, double * Xt){ - /*result[0] = MAXSIZE; - result[1] = STEP; - result[2] = ex_c[0]; - result[3] = ex_c[1]; - result[4] = ex_c[2]; - result[5] = Xt[0]; - result[6] = Xt[1]; - result[7] = metac.X[0]; - result[8] = metac.X[1]; */ - - result[0] = metac.gzz[0]; - result[1] = metac.gzz[1]; - result[2] = metac.gzz[2]; - result[3] = metac.gyy[0]; - result[4] = metac.gyy[1]; - result[5] = metac.gyy[2]; - result[6] = _3D_SIZE[0]; - result[7] = STEP_SIZE; - result[8] = blockDim.x * gridDim.x; -} - -void destroy_meta(Meta *meta) -{ - /* - if(Mh_ X) CUDA_SAFE_CALL(cudaFree(Mh_ X)); - if(Mh_ Y) CUDA_SAFE_CALL(cudaFree(Mh_ Y)); - if(Mh_ Z) CUDA_SAFE_CALL(cudaFree(Mh_ Z)); - if(Mh_ chi) CUDA_SAFE_CALL(cudaFree(Mh_ chi)); - if(Mh_ dxx) CUDA_SAFE_CALL(cudaFree(Mh_ dxx)); - if(Mh_ dyy) CUDA_SAFE_CALL(cudaFree(Mh_ dyy)); - if(Mh_ dzz) CUDA_SAFE_CALL(cudaFree(Mh_ dzz)); - if(Mh_ trK) CUDA_SAFE_CALL(cudaFree(Mh_ trK)); - if(Mh_ gxy) CUDA_SAFE_CALL(cudaFree(Mh_ gxy)); - if(Mh_ gxz) CUDA_SAFE_CALL(cudaFree(Mh_ gxz)); - if(Mh_ gyz) CUDA_SAFE_CALL(cudaFree(Mh_ gyz)); - if(Mh_ Axx) CUDA_SAFE_CALL(cudaFree(Mh_ Axx)); - if(Mh_ Axy) CUDA_SAFE_CALL(cudaFree(Mh_ Axy)); - if(Mh_ Axz) CUDA_SAFE_CALL(cudaFree(Mh_ Axz)); - if(Mh_ Ayz) CUDA_SAFE_CALL(cudaFree(Mh_ Ayz)); - if(Mh_ Ayy) CUDA_SAFE_CALL(cudaFree(Mh_ Ayy)); - if(Mh_ Azz) CUDA_SAFE_CALL(cudaFree(Mh_ Azz)); - if(Mh_ Gamx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamx)); - if(Mh_ Gamy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamy)); - if(Mh_ Gamz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamz)); - if(Mh_ Lap) CUDA_SAFE_CALL(cudaFree(Mh_ Lap)); - if(Mh_ betax) CUDA_SAFE_CALL(cudaFree(Mh_ betax)); - if(Mh_ betay) CUDA_SAFE_CALL(cudaFree(Mh_ betay)); - if(Mh_ betaz) CUDA_SAFE_CALL(cudaFree(Mh_ betaz)); - if(Mh_ dtSfx) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfx)); - if(Mh_ dtSfy) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfy)); - if(Mh_ dtSfz) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfz)); - if(Mh_ chi_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ chi_rhs)); - if(Mh_ trK_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ trK_rhs)); - if(Mh_ gxy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxy_rhs)); - if(Mh_ gxz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxz_rhs)); - if(Mh_ gyz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gyz_rhs)); - if(Mh_ Axx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axx_rhs)); - if(Mh_ Axy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axy_rhs)); - if(Mh_ Axz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axz_rhs)); - if(Mh_ Ayz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Ayz_rhs)); - if(Mh_ Ayy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Ayy_rhs)); - if(Mh_ Azz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Azz_rhs)); - if(Mh_ Gamx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamx_rhs)); - if(Mh_ Gamy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamy_rhs)); - if(Mh_ Gamz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamz_rhs)); - if(Mh_ Lap_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Lap_rhs)); - if(Mh_ betax_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betax_rhs)); - if(Mh_ betay_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betay_rhs)); - if(Mh_ betaz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betaz_rhs)); - if(Mh_ dtSfx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfx_rhs)); - if(Mh_ dtSfy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfy_rhs)); - if(Mh_ dtSfz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfz_rhs)); - if(Mh_ rho) CUDA_SAFE_CALL(cudaFree(Mh_ rho)); - if(Mh_ Sx) CUDA_SAFE_CALL(cudaFree(Mh_ Sx)); - if(Mh_ Sy) CUDA_SAFE_CALL(cudaFree(Mh_ Sy)); - if(Mh_ Sz) CUDA_SAFE_CALL(cudaFree(Mh_ Sz)); - if(Mh_ Sxx) CUDA_SAFE_CALL(cudaFree(Mh_ Sxx)); - if(Mh_ Sxy) CUDA_SAFE_CALL(cudaFree(Mh_ Sxy)); - if(Mh_ Sxz) CUDA_SAFE_CALL(cudaFree(Mh_ Sxz)); - if(Mh_ Syz) CUDA_SAFE_CALL(cudaFree(Mh_ Syz)); - if(Mh_ Syy) CUDA_SAFE_CALL(cudaFree(Mh_ Syy)); - if(Mh_ Szz) CUDA_SAFE_CALL(cudaFree(Mh_ Szz)); - if(Mh_ Gamxxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxx)); - if(Mh_ Gamxxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxy)); - if(Mh_ Gamxxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxz)); - if(Mh_ Gamxyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxyy)); - if(Mh_ Gamxyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxyz)); - if(Mh_ Gamxzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxzz)); - if(Mh_ Gamyxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxx)); - if(Mh_ Gamyxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxy)); - if(Mh_ Gamyxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxz)); - if(Mh_ Gamyyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyyy)); - if(Mh_ Gamyyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyyz)); - if(Mh_ Gamyzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyzz)); - if(Mh_ Gamzxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxx)); - if(Mh_ Gamzxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxy)); - if(Mh_ Gamzxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxz)); - if(Mh_ Gamzyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzyz)); - if(Mh_ Gamzyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzyy)); - if(Mh_ Gamzzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzzz)); - if(Mh_ Rxx) CUDA_SAFE_CALL(cudaFree(Mh_ Rxx)); - if(Mh_ Rxy) CUDA_SAFE_CALL(cudaFree(Mh_ Rxy)); - if(Mh_ Rxz) CUDA_SAFE_CALL(cudaFree(Mh_ Rxz)); - if(Mh_ Ryy) CUDA_SAFE_CALL(cudaFree(Mh_ Ryy)); - if(Mh_ Ryz) CUDA_SAFE_CALL(cudaFree(Mh_ Ryz)); - if(Mh_ Rzz) CUDA_SAFE_CALL(cudaFree(Mh_ Rzz)); - if(Mh_ ham_Res) CUDA_SAFE_CALL(cudaFree(Mh_ ham_Res)); - if(Mh_ movx_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movx_Res)); - if(Mh_ movy_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movy_Res)); - if(Mh_ movz_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movz_Res)); - if(Mh_ Gmx_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmx_Res)); - if(Mh_ Gmy_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmy_Res)); - if(Mh_ Gmz_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmz_Res)); - if(Mh_ gxx) CUDA_SAFE_CALL(cudaFree(Mh_ gxx)); - if(Mh_ gyy) CUDA_SAFE_CALL(cudaFree(Mh_ gyy)); - if(Mh_ gzz) CUDA_SAFE_CALL(cudaFree(Mh_ gzz)); - if(Mh_ chix) CUDA_SAFE_CALL(cudaFree(Mh_ chix)); - if(Mh_ chiy) CUDA_SAFE_CALL(cudaFree(Mh_ chiy)); - if(Mh_ chiz) CUDA_SAFE_CALL(cudaFree(Mh_ chiz)); - if(Mh_ gxxx) CUDA_SAFE_CALL(cudaFree(Mh_ gxxx)); - if(Mh_ gxyx) CUDA_SAFE_CALL(cudaFree(Mh_ gxyx)); - if(Mh_ gxzx) CUDA_SAFE_CALL(cudaFree(Mh_ gxzx)); - if(Mh_ gyyx) CUDA_SAFE_CALL(cudaFree(Mh_ gyyx)); - if(Mh_ gyzx) CUDA_SAFE_CALL(cudaFree(Mh_ gyzx)); - if(Mh_ gzzx) CUDA_SAFE_CALL(cudaFree(Mh_ gzzx)); - if(Mh_ gxxy) CUDA_SAFE_CALL(cudaFree(Mh_ gxxy)); - if(Mh_ gxyy) CUDA_SAFE_CALL(cudaFree(Mh_ gxyy)); - if(Mh_ gxzy) CUDA_SAFE_CALL(cudaFree(Mh_ gxzy)); - if(Mh_ gyyy) CUDA_SAFE_CALL(cudaFree(Mh_ gyyy)); - if(Mh_ gyzy) CUDA_SAFE_CALL(cudaFree(Mh_ gyzy)); - if(Mh_ gzzy) CUDA_SAFE_CALL(cudaFree(Mh_ gzzy)); - if(Mh_ gxxz) CUDA_SAFE_CALL(cudaFree(Mh_ gxxz)); - if(Mh_ gxyz) CUDA_SAFE_CALL(cudaFree(Mh_ gxyz)); - if(Mh_ gxzz) CUDA_SAFE_CALL(cudaFree(Mh_ gxzz)); - if(Mh_ gyyz) CUDA_SAFE_CALL(cudaFree(Mh_ gyyz)); - if(Mh_ gyzz) CUDA_SAFE_CALL(cudaFree(Mh_ gyzz)); - if(Mh_ gzzz) CUDA_SAFE_CALL(cudaFree(Mh_ gzzz)); - if(Mh_ Lapx) CUDA_SAFE_CALL(cudaFree(Mh_ Lapx)); - if(Mh_ Lapy) CUDA_SAFE_CALL(cudaFree(Mh_ Lapy)); - if(Mh_ Lapz) CUDA_SAFE_CALL(cudaFree(Mh_ Lapz)); - if(Mh_ betaxx) CUDA_SAFE_CALL(cudaFree(Mh_ betaxx)); - if(Mh_ betaxy) CUDA_SAFE_CALL(cudaFree(Mh_ betaxy)); - if(Mh_ betaxz) CUDA_SAFE_CALL(cudaFree(Mh_ betaxz)); - if(Mh_ betayy) CUDA_SAFE_CALL(cudaFree(Mh_ betayy)); - if(Mh_ betayz) CUDA_SAFE_CALL(cudaFree(Mh_ betayz)); - if(Mh_ betazz) CUDA_SAFE_CALL(cudaFree(Mh_ betazz)); - if(Mh_ betayx) CUDA_SAFE_CALL(cudaFree(Mh_ betayx)); - if(Mh_ betazy) CUDA_SAFE_CALL(cudaFree(Mh_ betazy)); - if(Mh_ betazx) CUDA_SAFE_CALL(cudaFree(Mh_ betazx)); - if(Mh_ Kx) CUDA_SAFE_CALL(cudaFree(Mh_ Kx)); - if(Mh_ Ky) CUDA_SAFE_CALL(cudaFree(Mh_ Ky)); - if(Mh_ Kz) CUDA_SAFE_CALL(cudaFree(Mh_ Kz)); - if(Mh_ Gamxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxx)); - if(Mh_ Gamxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxy)); - if(Mh_ Gamxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxz)); - if(Mh_ Gamyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyy)); - if(Mh_ Gamyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyz)); - if(Mh_ Gamzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzz)); - if(Mh_ Gamyx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyx)); - if(Mh_ Gamzy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzy)); - if(Mh_ Gamzx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzx)); - if(Mh_ div_beta) CUDA_SAFE_CALL(cudaFree(Mh_ div_beta)); - if(Mh_ S) CUDA_SAFE_CALL(cudaFree(Mh_ S)); - if(Mh_ f) CUDA_SAFE_CALL(cudaFree(Mh_ f)); - if(Mh_ fxx) CUDA_SAFE_CALL(cudaFree(Mh_ fxx)); - if(Mh_ fxy) CUDA_SAFE_CALL(cudaFree(Mh_ fxy)); - if(Mh_ fxz) CUDA_SAFE_CALL(cudaFree(Mh_ fxz)); - if(Mh_ fyy) CUDA_SAFE_CALL(cudaFree(Mh_ fyy)); - if(Mh_ fyz) CUDA_SAFE_CALL(cudaFree(Mh_ fyz)); - if(Mh_ fzz) CUDA_SAFE_CALL(cudaFree(Mh_ fzz)); - if(Mh_ gupxx) CUDA_SAFE_CALL(cudaFree(Mh_ gupxx)); - if(Mh_ gupxy) CUDA_SAFE_CALL(cudaFree(Mh_ gupxy)); - if(Mh_ gupxz) CUDA_SAFE_CALL(cudaFree(Mh_ gupxz)); - if(Mh_ gupyy) CUDA_SAFE_CALL(cudaFree(Mh_ gupyy)); - if(Mh_ gupyz) CUDA_SAFE_CALL(cudaFree(Mh_ gupyz)); - if(Mh_ gupzz) CUDA_SAFE_CALL(cudaFree(Mh_ gupzz)); - if(Mh_ Gamxa) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxa)); - if(Mh_ Gamya) CUDA_SAFE_CALL(cudaFree(Mh_ Gamya)); - if(Mh_ Gamza) CUDA_SAFE_CALL(cudaFree(Mh_ Gamza)); - if(Mh_ alpn1) CUDA_SAFE_CALL(cudaFree(Mh_ alpn1)); - if(Mh_ chin1) CUDA_SAFE_CALL(cudaFree(Mh_ chin1)); - if(Mh_ fh) CUDA_SAFE_CALL(cudaFree(Mh_ fh)); - if(Mh_ fh2) CUDA_SAFE_CALL(cudaFree(Mh_ fh2)); - if(Mh_ gxx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxx_rhs)); - if(Mh_ gyy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gyy_rhs)); - if(Mh_ gzz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gzz_rhs)); - */ - - if(Mh_ X) cudaFree(Mh_ X); - if(Mh_ Y) cudaFree(Mh_ Y); - if(Mh_ Z) cudaFree(Mh_ Z); - if(Mh_ chi) cudaFree(Mh_ chi); - if(Mh_ dxx) cudaFree(Mh_ dxx); - if(Mh_ dyy) cudaFree(Mh_ dyy); - if(Mh_ dzz) cudaFree(Mh_ dzz); - if(Mh_ trK) cudaFree(Mh_ trK); - if(Mh_ gxy) cudaFree(Mh_ gxy); - if(Mh_ gxz) cudaFree(Mh_ gxz); - if(Mh_ gyz) cudaFree(Mh_ gyz); - if(Mh_ Axx) cudaFree(Mh_ Axx); - if(Mh_ Axy) cudaFree(Mh_ Axy); - if(Mh_ Axz) cudaFree(Mh_ Axz); - if(Mh_ Ayz) cudaFree(Mh_ Ayz); - if(Mh_ Ayy) cudaFree(Mh_ Ayy); - if(Mh_ Azz) cudaFree(Mh_ Azz); - if(Mh_ Gamx) cudaFree(Mh_ Gamx); - if(Mh_ Gamy) cudaFree(Mh_ Gamy); - if(Mh_ Gamz) cudaFree(Mh_ Gamz); - if(Mh_ Lap) cudaFree(Mh_ Lap); - if(Mh_ betax) cudaFree(Mh_ betax); - if(Mh_ betay) cudaFree(Mh_ betay); - if(Mh_ betaz) cudaFree(Mh_ betaz); - if(Mh_ dtSfx) cudaFree(Mh_ dtSfx); - if(Mh_ dtSfy) cudaFree(Mh_ dtSfy); - if(Mh_ dtSfz) cudaFree(Mh_ dtSfz); - if(Mh_ chi_rhs) cudaFree(Mh_ chi_rhs); - if(Mh_ trK_rhs) cudaFree(Mh_ trK_rhs); - if(Mh_ gxy_rhs) cudaFree(Mh_ gxy_rhs); - if(Mh_ gxz_rhs) cudaFree(Mh_ gxz_rhs); - if(Mh_ gyz_rhs) cudaFree(Mh_ gyz_rhs); - if(Mh_ Axx_rhs) cudaFree(Mh_ Axx_rhs); - if(Mh_ Axy_rhs) cudaFree(Mh_ Axy_rhs); - if(Mh_ Axz_rhs) cudaFree(Mh_ Axz_rhs); - if(Mh_ Ayz_rhs) cudaFree(Mh_ Ayz_rhs); - if(Mh_ Ayy_rhs) cudaFree(Mh_ Ayy_rhs); - if(Mh_ Azz_rhs) cudaFree(Mh_ Azz_rhs); - if(Mh_ Gamx_rhs) cudaFree(Mh_ Gamx_rhs); - if(Mh_ Gamy_rhs) cudaFree(Mh_ Gamy_rhs); - if(Mh_ Gamz_rhs) cudaFree(Mh_ Gamz_rhs); - if(Mh_ Lap_rhs) cudaFree(Mh_ Lap_rhs); - if(Mh_ betax_rhs) cudaFree(Mh_ betax_rhs); - if(Mh_ betay_rhs) cudaFree(Mh_ betay_rhs); - if(Mh_ betaz_rhs) cudaFree(Mh_ betaz_rhs); - if(Mh_ dtSfx_rhs) cudaFree(Mh_ dtSfx_rhs); - if(Mh_ dtSfy_rhs) cudaFree(Mh_ dtSfy_rhs); - if(Mh_ dtSfz_rhs) cudaFree(Mh_ dtSfz_rhs); - if(Mh_ rho) cudaFree(Mh_ rho); - if(Mh_ Sx) cudaFree(Mh_ Sx); - if(Mh_ Sy) cudaFree(Mh_ Sy); - if(Mh_ Sz) cudaFree(Mh_ Sz); - if(Mh_ Sxx) cudaFree(Mh_ Sxx); - if(Mh_ Sxy) cudaFree(Mh_ Sxy); - if(Mh_ Sxz) cudaFree(Mh_ Sxz); - if(Mh_ Syz) cudaFree(Mh_ Syz); - if(Mh_ Syy) cudaFree(Mh_ Syy); - if(Mh_ Szz) cudaFree(Mh_ Szz); - if(Mh_ Gamxxx) cudaFree(Mh_ Gamxxx); - if(Mh_ Gamxxy) cudaFree(Mh_ Gamxxy); - if(Mh_ Gamxxz) cudaFree(Mh_ Gamxxz); - if(Mh_ Gamxyy) cudaFree(Mh_ Gamxyy); - if(Mh_ Gamxyz) cudaFree(Mh_ Gamxyz); - if(Mh_ Gamxzz) cudaFree(Mh_ Gamxzz); - if(Mh_ Gamyxx) cudaFree(Mh_ Gamyxx); - if(Mh_ Gamyxy) cudaFree(Mh_ Gamyxy); - if(Mh_ Gamyxz) cudaFree(Mh_ Gamyxz); - if(Mh_ Gamyyy) cudaFree(Mh_ Gamyyy); - if(Mh_ Gamyyz) cudaFree(Mh_ Gamyyz); - if(Mh_ Gamyzz) cudaFree(Mh_ Gamyzz); - if(Mh_ Gamzxx) cudaFree(Mh_ Gamzxx); - if(Mh_ Gamzxy) cudaFree(Mh_ Gamzxy); - if(Mh_ Gamzxz) cudaFree(Mh_ Gamzxz); - if(Mh_ Gamzyz) cudaFree(Mh_ Gamzyz); - if(Mh_ Gamzyy) cudaFree(Mh_ Gamzyy); - if(Mh_ Gamzzz) cudaFree(Mh_ Gamzzz); - if(Mh_ Rxx) cudaFree(Mh_ Rxx); - if(Mh_ Rxy) cudaFree(Mh_ Rxy); - if(Mh_ Rxz) cudaFree(Mh_ Rxz); - if(Mh_ Ryy) cudaFree(Mh_ Ryy); - if(Mh_ Ryz) cudaFree(Mh_ Ryz); - if(Mh_ Rzz) cudaFree(Mh_ Rzz); - if(Mh_ ham_Res) cudaFree(Mh_ ham_Res); - if(Mh_ movx_Res) cudaFree(Mh_ movx_Res); - if(Mh_ movy_Res) cudaFree(Mh_ movy_Res); - if(Mh_ movz_Res) cudaFree(Mh_ movz_Res); - if(Mh_ Gmx_Res) cudaFree(Mh_ Gmx_Res); - if(Mh_ Gmy_Res) cudaFree(Mh_ Gmy_Res); - if(Mh_ Gmz_Res) cudaFree(Mh_ Gmz_Res); - if(Mh_ gxx) cudaFree(Mh_ gxx); - if(Mh_ gyy) cudaFree(Mh_ gyy); - if(Mh_ gzz) cudaFree(Mh_ gzz); - if(Mh_ chix) cudaFree(Mh_ chix); - if(Mh_ chiy) cudaFree(Mh_ chiy); - if(Mh_ chiz) cudaFree(Mh_ chiz); - if(Mh_ gxxx) cudaFree(Mh_ gxxx); - if(Mh_ gxyx) cudaFree(Mh_ gxyx); - if(Mh_ gxzx) cudaFree(Mh_ gxzx); - if(Mh_ gyyx) cudaFree(Mh_ gyyx); - if(Mh_ gyzx) cudaFree(Mh_ gyzx); - if(Mh_ gzzx) cudaFree(Mh_ gzzx); - if(Mh_ gxxy) cudaFree(Mh_ gxxy); - if(Mh_ gxyy) cudaFree(Mh_ gxyy); - if(Mh_ gxzy) cudaFree(Mh_ gxzy); - if(Mh_ gyyy) cudaFree(Mh_ gyyy); - if(Mh_ gyzy) cudaFree(Mh_ gyzy); - if(Mh_ gzzy) cudaFree(Mh_ gzzy); - if(Mh_ gxxz) cudaFree(Mh_ gxxz); - if(Mh_ gxyz) cudaFree(Mh_ gxyz); - if(Mh_ gxzz) cudaFree(Mh_ gxzz); - if(Mh_ gyyz) cudaFree(Mh_ gyyz); - if(Mh_ gyzz) cudaFree(Mh_ gyzz); - if(Mh_ gzzz) cudaFree(Mh_ gzzz); - if(Mh_ Lapx) cudaFree(Mh_ Lapx); - if(Mh_ Lapy) cudaFree(Mh_ Lapy); - if(Mh_ Lapz) cudaFree(Mh_ Lapz); - if(Mh_ betaxx) cudaFree(Mh_ betaxx); - if(Mh_ betaxy) cudaFree(Mh_ betaxy); - if(Mh_ betaxz) cudaFree(Mh_ betaxz); - if(Mh_ betayy) cudaFree(Mh_ betayy); - if(Mh_ betayz) cudaFree(Mh_ betayz); - if(Mh_ betazz) cudaFree(Mh_ betazz); - if(Mh_ betayx) cudaFree(Mh_ betayx); - if(Mh_ betazy) cudaFree(Mh_ betazy); - if(Mh_ betazx) cudaFree(Mh_ betazx); - if(Mh_ Kx) cudaFree(Mh_ Kx); - if(Mh_ Ky) cudaFree(Mh_ Ky); - if(Mh_ Kz) cudaFree(Mh_ Kz); - if(Mh_ Gamxx) cudaFree(Mh_ Gamxx); - if(Mh_ Gamxy) cudaFree(Mh_ Gamxy); - if(Mh_ Gamxz) cudaFree(Mh_ Gamxz); - if(Mh_ Gamyy) cudaFree(Mh_ Gamyy); - if(Mh_ Gamyz) cudaFree(Mh_ Gamyz); - if(Mh_ Gamzz) cudaFree(Mh_ Gamzz); - if(Mh_ Gamyx) cudaFree(Mh_ Gamyx); - if(Mh_ Gamzy) cudaFree(Mh_ Gamzy); - if(Mh_ Gamzx) cudaFree(Mh_ Gamzx); - if(Mh_ div_beta) cudaFree(Mh_ div_beta); - if(Mh_ S) cudaFree(Mh_ S); - if(Mh_ f) cudaFree(Mh_ f); - if(Mh_ fxx) cudaFree(Mh_ fxx); - if(Mh_ fxy) cudaFree(Mh_ fxy); - if(Mh_ fxz) cudaFree(Mh_ fxz); - if(Mh_ fyy) cudaFree(Mh_ fyy); - if(Mh_ fyz) cudaFree(Mh_ fyz); - if(Mh_ fzz) cudaFree(Mh_ fzz); - if(Mh_ gupxx) cudaFree(Mh_ gupxx); - if(Mh_ gupxy) cudaFree(Mh_ gupxy); - if(Mh_ gupxz) cudaFree(Mh_ gupxz); - if(Mh_ gupyy) cudaFree(Mh_ gupyy); - if(Mh_ gupyz) cudaFree(Mh_ gupyz); - if(Mh_ gupzz) cudaFree(Mh_ gupzz); - if(Mh_ Gamxa) cudaFree(Mh_ Gamxa); - if(Mh_ Gamya) cudaFree(Mh_ Gamya); - if(Mh_ Gamza) cudaFree(Mh_ Gamza); - if(Mh_ alpn1) cudaFree(Mh_ alpn1); - if(Mh_ chin1) cudaFree(Mh_ chin1); - if(Mh_ fh) cudaFree(Mh_ fh); - if(Mh_ fh2) cudaFree(Mh_ fh2); - if(Mh_ gxx_rhs) cudaFree(Mh_ gxx_rhs); - if(Mh_ gyy_rhs) cudaFree(Mh_ gyy_rhs); - if(Mh_ gzz_rhs) cudaFree(Mh_ gzz_rhs); - -#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) - // if(Mh_ reta) CUDA_SAFE_CALL(cudaFree(Mh_ reta)); - if(Mh_ reta) cudaFree(Mh_ reta); - -#endif - - //if(Mh_ other_int) cudaFree(Mh_ other_int); - //if(Mh_ other_double) cudaFree(Mh_ other_double); - //cout<<"Address of meta:"<<&meta< 1 && abs[0] < dXh) {ijkmin_h[0] = -2; ijkmin2_h[0] = -3;} - if(Symmetry > 1 && abs[1] < dYh) {ijkmin_h[1] = -2; ijkmin2_h[1] = -3;} - if(Symmetry > 0 && abs[2] < dZh) {ijkmin_h[2] = -2; ijkmin2_h[2] = -3;} - - if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;} - if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;} - if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;} - - cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int)); - cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int)); - - double d12dxyz_h[3] = {1.0,1.0,1.0}; - double d2dxyz_h[3] = {1.0,1.0,1.0}; - d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12; - d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh; - d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2; - d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh; - - cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double)); - cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double)); - -//3.3--------for fdderivs------------ - double Sdxdxh = 1.0 /( dXh * dXh ); - double Sdydyh = 1.0 /( dYh * dYh ); - double Sdzdzh = 1.0 /( dZh * dZh ); - double Fdxdxh = 1.0 / 12.0 /( dXh * dXh ); - double Fdydyh = 1.0 / 12.0 /( dYh * dYh ); - double Fdzdzh = 1.0 / 12.0 /( dZh * dZh ); - double Sdxdyh = 1.0/4.0 /( dXh * dYh ); - double Sdxdzh = 1.0/4.0 /( dXh * dZh ); - double Sdydzh = 1.0/4.0 /( dYh * dZh ); - double Fdxdyh = 1.0/144.0 /( dXh * dYh ); - double Fdxdzh = 1.0/144.0 /( dXh * dZh ); - double Fdydzh = 1.0/144.0 /( dYh * dZh ); - cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double)); - cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double)); - cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double)); - cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double)); - cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double)); - cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double)); - cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double)); - cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double)); - cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double)); - cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double)); - -//3.4---------for lopsided--------------------------- - - -#ifdef TIMING1 - cudaThreadSynchronize(); - gettimeofday(&tv2, NULL); - cout<<"TIME USED"<>>(ctest_d); - cudaMemcpy(ctest, ctest_d, sizeof(double), cudaMemcpyDeviceToHost); - cout<<"My rank is: "<>>(); - cudaThreadSynchronize(); - - sub_fderivs(Mh_ betax,Mh_ fh,Mh_ betaxx,Mh_ betaxy,Mh_ betaxz,ass); - sub_fderivs(Mh_ betay,Mh_ fh,Mh_ betayx,Mh_ betayy,Mh_ betayz,sas); - sub_fderivs(Mh_ betaz,Mh_ fh,Mh_ betazx,Mh_ betazy,Mh_ betazz,ssa); - sub_fderivs(Mh_ chi,Mh_ fh,Mh_ chix,Mh_ chiy,Mh_ chiz, sss); - sub_fderivs(Mh_ Lap,Mh_ fh,Mh_ Lapx,Mh_ Lapy,Mh_ Lapz, sss); - sub_fderivs(Mh_ trK,Mh_ fh,Mh_ Kx,Mh_ Ky,Mh_ Kz, sss); - sub_fderivs(Mh_ dxx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz, sss); - sub_fderivs(Mh_ dyy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz, sss); - sub_fderivs(Mh_ dzz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz, sss); - sub_fderivs(Mh_ gxy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz, aas); - sub_fderivs(Mh_ gxz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz, asa); - sub_fderivs(Mh_ gyz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz, saa); - - compute_rhs_bssn_part2<<>>(); - cudaThreadSynchronize(); - - sub_fdderivs(Mh_ betax,Mh_ fh,Mh_ gxxx,Mh_ gxyx,Mh_ gxzx,Mh_ gyyx,Mh_ gyzx,Mh_ gzzx,ass); - sub_fdderivs(Mh_ betay,Mh_ fh,Mh_ gxxy,Mh_ gxyy,Mh_ gxzy,Mh_ gyyy,Mh_ gyzy,Mh_ gzzy,sas); - sub_fdderivs(Mh_ betaz,Mh_ fh,Mh_ gxxz,Mh_ gxyz,Mh_ gxzz,Mh_ gyyz,Mh_ gyzz,Mh_ gzzz,ssa); - sub_fderivs( Mh_ Gamx, Mh_ fh,Mh_ Gamxx, Mh_ Gamxy, Mh_ Gamxz,ass); - sub_fderivs( Mh_ Gamy, Mh_ fh,Mh_ Gamyx, Mh_ Gamyy, Mh_ Gamyz,sas); - sub_fderivs( Mh_ Gamz, Mh_ fh,Mh_ Gamzx, Mh_ Gamzy, Mh_ Gamzz,ssa); - - compute_rhs_bssn_part3<<>>(); - cudaThreadSynchronize(); - - computeRicci(Mh_ dxx,Mh_ Rxx,sss, meta); - computeRicci(Mh_ dyy,Mh_ Ryy,sss, meta); - computeRicci(Mh_ dzz,Mh_ Rzz,sss, meta); - computeRicci(Mh_ gxy,Mh_ Rxy,aas, meta); - computeRicci(Mh_ gxz,Mh_ Rxz,asa, meta); - computeRicci(Mh_ gyz,Mh_ Ryz,saa, meta); - - cudaThreadSynchronize(); - - compute_rhs_bssn_part4<<>>(); - cudaThreadSynchronize(); - - sub_fdderivs(Mh_ chi,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); - - compute_rhs_bssn_part5<<>>(); - cudaThreadSynchronize(); - - sub_fdderivs(Mh_ Lap,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); - - compute_rhs_bssn_part6<<>>(); - cudaThreadSynchronize(); - -#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5) - sub_fderivs(Mh_ chi,Mh_ fh, Mh_ dtSfx_rhs, Mh_ dtSfy_rhs, Mh_ dtSfz_rhs,sss); - compute_rhs_bssn_part6_gauge<<>>(); -#endif - - sub_lopsided(Mh_ gxx,Mh_ fh2,Mh_ gxx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ gxy,Mh_ fh2,Mh_ gxy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,aas); - sub_lopsided(Mh_ gxz,Mh_ fh2,Mh_ gxz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,asa); - sub_lopsided(Mh_ gyy,Mh_ fh2,Mh_ gyy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ gyz,Mh_ fh2,Mh_ gyz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,saa); - sub_lopsided(Mh_ gzz,Mh_ fh2,Mh_ gzz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ Axx,Mh_ fh2,Mh_ Axx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ Axy,Mh_ fh2,Mh_ Axy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,aas); - sub_lopsided(Mh_ Axz,Mh_ fh2,Mh_ Axz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,asa); - sub_lopsided(Mh_ Ayy,Mh_ fh2,Mh_ Ayy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ Ayz,Mh_ fh2,Mh_ Ayz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,saa); - sub_lopsided(Mh_ Azz,Mh_ fh2,Mh_ Azz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ chi,Mh_ fh2,Mh_ chi_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ trK,Mh_ fh2,Mh_ trK_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - sub_lopsided(Mh_ Gamx,Mh_ fh2,Mh_ Gamx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); - sub_lopsided(Mh_ Gamy,Mh_ fh2,Mh_ Gamy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); - sub_lopsided(Mh_ Gamz,Mh_ fh2,Mh_ Gamz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); - sub_lopsided(Mh_ Lap,Mh_ fh2,Mh_ Lap_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); - -#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) - - sub_lopsided(Mh_ betax,Mh_ fh2,Mh_ betax_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); - sub_lopsided(Mh_ betay,Mh_ fh2,Mh_ betay_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); - sub_lopsided(Mh_ betaz,Mh_ fh2,Mh_ betaz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); - -#endif -#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) - sub_lopsided(Mh_ dtSfx,Mh_ fh2,Mh_ dtSfx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); - sub_lopsided(Mh_ dtSfy,Mh_ fh2,Mh_ dtSfy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); - sub_lopsided(Mh_ dtSfz,Mh_ fh2,Mh_ dtSfz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); -#endif - if(eps > 0){ - sub_kodis(Mh_ chi,Mh_ fh2, Mh_ chi_rhs,sss); - sub_kodis(Mh_ trK,Mh_ fh2, Mh_ trK_rhs,sss); - sub_kodis(Mh_ dxx,Mh_ fh2, Mh_ gxx_rhs,sss); - sub_kodis(Mh_ gxy,Mh_ fh2, Mh_ gxy_rhs,aas); - sub_kodis(Mh_ gxz,Mh_ fh2, Mh_ gxz_rhs,asa); - sub_kodis(Mh_ dyy,Mh_ fh2, Mh_ gyy_rhs,sss); - sub_kodis(Mh_ gyz,Mh_ fh2, Mh_ gyz_rhs,saa); - sub_kodis(Mh_ dzz,Mh_ fh2, Mh_ gzz_rhs,sss); - sub_kodis(Mh_ Axx,Mh_ fh2, Mh_ Axx_rhs,sss); - sub_kodis(Mh_ Axy,Mh_ fh2, Mh_ Axy_rhs,aas); - sub_kodis(Mh_ Axz,Mh_ fh2, Mh_ Axz_rhs,asa); - sub_kodis(Mh_ Ayy,Mh_ fh2, Mh_ Ayy_rhs,sss); - sub_kodis(Mh_ Ayz,Mh_ fh2, Mh_ Ayz_rhs,saa); - sub_kodis(Mh_ Azz,Mh_ fh2, Mh_ Azz_rhs,sss); - sub_kodis(Mh_ Gamx,Mh_ fh2, Mh_ Gamx_rhs,ass); - sub_kodis(Mh_ Gamy,Mh_ fh2, Mh_ Gamy_rhs,sas); - sub_kodis(Mh_ Gamz,Mh_ fh2, Mh_ Gamz_rhs,ssa); - - sub_kodis(Mh_ Lap,Mh_ fh2, Mh_ Lap_rhs,sss); - sub_kodis(Mh_ betax,Mh_ fh2, Mh_ betax_rhs,ass); - sub_kodis(Mh_ betay,Mh_ fh2, Mh_ betay_rhs,sas); - sub_kodis(Mh_ betaz,Mh_ fh2, Mh_ betaz_rhs,ssa); - -#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) - sub_kodis(Mh_ dtSfx,Mh_ fh2, Mh_ dtSfx_rhs,ass); - sub_kodis(Mh_ dtSfy,Mh_ fh2, Mh_ dtSfy_rhs,sas); - sub_kodis(Mh_ dtSfz,Mh_ fh2, Mh_ dtSfz_rhs,ssa); -#endif - - } - - if(co == 0){ - compute_rhs_bssn_part7<<>>(); - cudaThreadSynchronize(); - - sub_fderivs(Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss); - sub_fderivs(Mh_ Axy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz,aas); - sub_fderivs(Mh_ Axz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz,asa); - sub_fderivs(Mh_ Ayy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz,sss); - sub_fderivs(Mh_ Ayz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz,saa); - sub_fderivs(Mh_ Azz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz,sss); - compute_rhs_bssn_part8<<>>(); - cudaThreadSynchronize(); - } - -#if (ABV == 1) - cout<<"TODO: bssn_gpu.cu::2373 (ABV == 1)"< -#define Ms_ metassc. -#define Msh_ metass-> - -// #define TIMING - -#define RHS_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co - -/** main function */ -int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T, - double *X, double *Y, double *Z, - - double *chi, double *trK, - - double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, - - double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, - - double *Gamx, double *Gamy, double *Gamz, - - double *Lap, double *betax, double *betay, double *betaz, - - double *dtSfx, double *dtSfy, double *dtSfz, - - double *chi_rhs, double *trK_rhs, - - double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, - - double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, - - double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, - - double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, - - double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, - - double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, - double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, - - double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, - - double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, - - double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, - - double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, - - double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, - double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, - int &Symmetry, int &Lev, double &eps, int &co); - -int gpu_rhs_ss(RHS_SS_PARA); - -/** Init GPU side data in GPUMeta. */ -// void init_fluid_meta_gpu(GPUMeta *gpu_meta); - -#endif diff --git a/AMSS_NCKU_source/bssn_gpu_class.C b/AMSS_NCKU_source/bssn_gpu_class.C deleted file mode 100644 index f6d5170..0000000 --- a/AMSS_NCKU_source/bssn_gpu_class.C +++ /dev/null @@ -1,7790 +0,0 @@ - -#ifdef newc -#include -#include -using namespace std; -#else -#include -#endif - -#include "macrodef.h" -#include "misc.h" -#include -#include "Ansorg.h" -#include "fmisc.h" -#include "Parallel.h" -#include "bssn_gpu_class.h" -#include "bssn_rhs.h" -#include "initial_puncture.h" -#include "enforce_algebra.h" -#include "rungekutta4_rout.h" -#include "sommerfeld_rout.h" -#include "getnp4.h" -#include "shellfunctions.h" - -#ifdef With_AHF -#include "derivatives.h" -#include "myglobal.h" -#endif - -#include "perf.h" -#include "derivatives.h" -#include "ricci_gamma.h" - -// include GPU files -#include "bssn_gpu.h" - -//================================================================================================ - -// Define bssn_gpu_class - -//================================================================================================ - -bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, - double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, - int Symmetryi, int checkruni, char *checkfilenamei, - double numepssi, double numepsbi, double numepshi, - int a_levi, int maxli, int decni, double maxrexi, double drexi) - : Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei), - DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei), - Symmetry(Symmetryi), checkrun(checkruni), numepss(numepssi), numepsb(numepsbi), numepsh(numepshi), -#ifdef With_AHF - xc(0), yc(0), zc(0), xr(0), yr(0), zr(0), trigger(0), dTT(0), dumpid(0), -#endif - a_lev(a_levi), maxl(maxli), decn(decni), maxrex(maxrexi), drex(drexi), - CheckPoint(0) -{ - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - - // setup Monitors - { - stringstream a_stream; - a_stream.setf(ios::left); - a_stream << "# Error log information"; - ErrorMonitor = new monitor("Error.log", myrank, a_stream.str()); - ErrorMonitor->print_message("Warning: we always assume intput parameter in cell center style."); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time"; - char str[50]; - for (int pl = 2; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - { - sprintf(str, "R%02dm%03d", pl, pm); - a_stream << setw(16) << str; - sprintf(str, "I%02dm%03d", pl, pm); - a_stream << setw(16) << str; - } - Psi4Monitor = new monitor("bssn_psi4.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time"; - BHMonitor = new monitor("bssn_BH.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time ADMmass ADMPx ADMPy ADMPz ADMSx ADMSy ADMSz"; - MAPMonitor = new monitor("bssn_ADMQs.dat", myrank, a_stream.str()); - - a_stream.clear(); - a_stream.str(""); - a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz"; - ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str()); - } - // setup sphere integration engine - Waveshell = new surface_integral(Symmetry); - - trfls = 0; - chitiny = 0; - // read parameter from file - { - char filename[50]; - strcpy(filename, "input.par"); - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename - << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "chitiny") - chitiny = atof(sval.c_str()); - else if (sgrp == "BSSN" && skey == "time refinement start from level") - trfls = atoi(sval.c_str()); -#ifdef With_AHF - else if (sgrp == "AHF" && skey == "AHfindevery") - AHfindevery = atoi(sval.c_str()); - else if (sgrp == "AHF" && skey == "AHdumptime") - AHdumptime = atof(sval.c_str()); -#endif - } - inf.close(); - } - if (myrank == 0) - { - // echo information of lower bound of chi - cout << "chitiny = " << chitiny << endl; - cout << "time refinement start from level #" << trfls << endl; -#ifdef With_AHF - cout << "parameters for AHF:" << endl; - cout << "AHfindevery = " << AHfindevery << endl; - cout << "AHdumptime = " << AHdumptime << endl; -#endif - } - - chitiny = chitiny - 1; // because we have subtracted one from chi - - strcpy(checkfilename, checkfilenamei); - - ngfs = 0; - phio = new var("phio", ngfs++, 1, 1, 1); - trKo = new var("trKo", ngfs++, 1, 1, 1); - gxxo = new var("gxxo", ngfs++, 1, 1, 1); - gxyo = new var("gxyo", ngfs++, -1, -1, 1); - gxzo = new var("gxzo", ngfs++, -1, 1, -1); - gyyo = new var("gyyo", ngfs++, 1, 1, 1); - gyzo = new var("gyzo", ngfs++, 1, -1, -1); - gzzo = new var("gzzo", ngfs++, 1, 1, 1); - Axxo = new var("Axxo", ngfs++, 1, 1, 1); - Axyo = new var("Axyo", ngfs++, -1, -1, 1); - Axzo = new var("Axzo", ngfs++, -1, 1, -1); - Ayyo = new var("Ayyo", ngfs++, 1, 1, 1); - Ayzo = new var("Ayzo", ngfs++, 1, -1, -1); - Azzo = new var("Azzo", ngfs++, 1, 1, 1); - Gmxo = new var("Gmxo", ngfs++, -1, 1, 1); - Gmyo = new var("Gmyo", ngfs++, 1, -1, 1); - Gmzo = new var("Gmzo", ngfs++, 1, 1, -1); - Lapo = new var("Lapo", ngfs++, 1, 1, 1); - Sfxo = new var("Sfxo", ngfs++, -1, 1, 1); - Sfyo = new var("Sfyo", ngfs++, 1, -1, 1); - Sfzo = new var("Sfzo", ngfs++, 1, 1, -1); - dtSfxo = new var("dtSfxo", ngfs++, -1, 1, 1); - dtSfyo = new var("dtSfyo", ngfs++, 1, -1, 1); - dtSfzo = new var("dtSfzo", ngfs++, 1, 1, -1); - - phi0 = new var("phi0", ngfs++, 1, 1, 1); - trK0 = new var("trK0", ngfs++, 1, 1, 1); - gxx0 = new var("gxx0", ngfs++, 1, 1, 1); - gxy0 = new var("gxy0", ngfs++, -1, -1, 1); - gxz0 = new var("gxz0", ngfs++, -1, 1, -1); - gyy0 = new var("gyy0", ngfs++, 1, 1, 1); - gyz0 = new var("gyz0", ngfs++, 1, -1, -1); - gzz0 = new var("gzz0", ngfs++, 1, 1, 1); - Axx0 = new var("Axx0", ngfs++, 1, 1, 1); - Axy0 = new var("Axy0", ngfs++, -1, -1, 1); - Axz0 = new var("Axz0", ngfs++, -1, 1, -1); - Ayy0 = new var("Ayy0", ngfs++, 1, 1, 1); - Ayz0 = new var("Ayz0", ngfs++, 1, -1, -1); - Azz0 = new var("Azz0", ngfs++, 1, 1, 1); - Gmx0 = new var("Gmx0", ngfs++, -1, 1, 1); - Gmy0 = new var("Gmy0", ngfs++, 1, -1, 1); - Gmz0 = new var("Gmz0", ngfs++, 1, 1, -1); - Lap0 = new var("Lap0", ngfs++, 1, 1, 1); - Sfx0 = new var("Sfx0", ngfs++, -1, 1, 1); - Sfy0 = new var("Sfy0", ngfs++, 1, -1, 1); - Sfz0 = new var("Sfz0", ngfs++, 1, 1, -1); - dtSfx0 = new var("dtSfx0", ngfs++, -1, 1, 1); - dtSfy0 = new var("dtSfy0", ngfs++, 1, -1, 1); - dtSfz0 = new var("dtSfz0", ngfs++, 1, 1, -1); - - phi = new var("phi", ngfs++, 1, 1, 1); - trK = new var("trK", ngfs++, 1, 1, 1); - gxx = new var("gxx", ngfs++, 1, 1, 1); - gxy = new var("gxy", ngfs++, -1, -1, 1); - gxz = new var("gxz", ngfs++, -1, 1, -1); - gyy = new var("gyy", ngfs++, 1, 1, 1); - gyz = new var("gyz", ngfs++, 1, -1, -1); - gzz = new var("gzz", ngfs++, 1, 1, 1); - Axx = new var("Axx", ngfs++, 1, 1, 1); - Axy = new var("Axy", ngfs++, -1, -1, 1); - Axz = new var("Axz", ngfs++, -1, 1, -1); - Ayy = new var("Ayy", ngfs++, 1, 1, 1); - Ayz = new var("Ayz", ngfs++, 1, -1, -1); - Azz = new var("Azz", ngfs++, 1, 1, 1); - Gmx = new var("Gmx", ngfs++, -1, 1, 1); - Gmy = new var("Gmy", ngfs++, 1, -1, 1); - Gmz = new var("Gmz", ngfs++, 1, 1, -1); - Lap = new var("Lap", ngfs++, 1, 1, 1); - Sfx = new var("Sfx", ngfs++, -1, 1, 1); - Sfy = new var("Sfy", ngfs++, 1, -1, 1); - Sfz = new var("Sfz", ngfs++, 1, 1, -1); - dtSfx = new var("dtSfx", ngfs++, -1, 1, 1); - dtSfy = new var("dtSfy", ngfs++, 1, -1, 1); - dtSfz = new var("dtSfz", ngfs++, 1, 1, -1); - - phi1 = new var("phi1", ngfs++, 1, 1, 1); - trK1 = new var("trK1", ngfs++, 1, 1, 1); - gxx1 = new var("gxx1", ngfs++, 1, 1, 1); - gxy1 = new var("gxy1", ngfs++, -1, -1, 1); - gxz1 = new var("gxz1", ngfs++, -1, 1, -1); - gyy1 = new var("gyy1", ngfs++, 1, 1, 1); - gyz1 = new var("gyz1", ngfs++, 1, -1, -1); - gzz1 = new var("gzz1", ngfs++, 1, 1, 1); - Axx1 = new var("Axx1", ngfs++, 1, 1, 1); - Axy1 = new var("Axy1", ngfs++, -1, -1, 1); - Axz1 = new var("Axz1", ngfs++, -1, 1, -1); - Ayy1 = new var("Ayy1", ngfs++, 1, 1, 1); - Ayz1 = new var("Ayz1", ngfs++, 1, -1, -1); - Azz1 = new var("Azz1", ngfs++, 1, 1, 1); - Gmx1 = new var("Gmx1", ngfs++, -1, 1, 1); - Gmy1 = new var("Gmy1", ngfs++, 1, -1, 1); - Gmz1 = new var("Gmz1", ngfs++, 1, 1, -1); - Lap1 = new var("Lap1", ngfs++, 1, 1, 1); - Sfx1 = new var("Sfx1", ngfs++, -1, 1, 1); - Sfy1 = new var("Sfy1", ngfs++, 1, -1, 1); - Sfz1 = new var("Sfz1", ngfs++, 1, 1, -1); - dtSfx1 = new var("dtSfx1", ngfs++, -1, 1, 1); - dtSfy1 = new var("dtSfy1", ngfs++, 1, -1, 1); - dtSfz1 = new var("dtSfz1", ngfs++, 1, 1, -1); - - phi_rhs = new var("phi_rhs", ngfs++, 1, 1, 1); - trK_rhs = new var("trK_rhs", ngfs++, 1, 1, 1); - gxx_rhs = new var("gxx_rhs", ngfs++, 1, 1, 1); - gxy_rhs = new var("gxy_rhs", ngfs++, -1, -1, 1); - gxz_rhs = new var("gxz_rhs", ngfs++, -1, 1, -1); - gyy_rhs = new var("gyy_rhs", ngfs++, 1, 1, 1); - gyz_rhs = new var("gyz_rhs", ngfs++, 1, -1, -1); - gzz_rhs = new var("gzz_rhs", ngfs++, 1, 1, 1); - Axx_rhs = new var("Axx_rhs", ngfs++, 1, 1, 1); - Axy_rhs = new var("Axy_rhs", ngfs++, -1, -1, 1); - Axz_rhs = new var("Axz_rhs", ngfs++, -1, 1, -1); - Ayy_rhs = new var("Ayy_rhs", ngfs++, 1, 1, 1); - Ayz_rhs = new var("Ayz_rhs", ngfs++, 1, -1, -1); - Azz_rhs = new var("Azz_rhs", ngfs++, 1, 1, 1); - Gmx_rhs = new var("Gmx_rhs", ngfs++, -1, 1, 1); - Gmy_rhs = new var("Gmy_rhs", ngfs++, 1, -1, 1); - Gmz_rhs = new var("Gmz_rhs", ngfs++, 1, 1, -1); - Lap_rhs = new var("Lap_rhs", ngfs++, 1, 1, 1); - Sfx_rhs = new var("Sfx_rhs", ngfs++, -1, 1, 1); - Sfy_rhs = new var("Sfy_rhs", ngfs++, 1, -1, 1); - Sfz_rhs = new var("Sfz_rhs", ngfs++, 1, 1, -1); - dtSfx_rhs = new var("dtSfx_rhs", ngfs++, -1, 1, 1); - dtSfy_rhs = new var("dtSfy_rhs", ngfs++, 1, -1, 1); - dtSfz_rhs = new var("dtSfz_rhs", ngfs++, 1, 1, -1); - - rho = new var("rho", ngfs++, 1, 1, 1); - Sx = new var("Sx", ngfs++, -1, 1, 1); - Sy = new var("Sy", ngfs++, 1, -1, 1); - Sz = new var("Sz", ngfs++, 1, 1, -1); - Sxx = new var("Sxx", ngfs++, 1, 1, 1); - Sxy = new var("Sxy", ngfs++, -1, -1, 1); - Sxz = new var("Sxz", ngfs++, -1, 1, -1); - Syy = new var("Syy", ngfs++, 1, 1, 1); - Syz = new var("Syz", ngfs++, 1, -1, -1); - Szz = new var("Szz", ngfs++, 1, 1, 1); - - Gamxxx = new var("Gamxxx", ngfs++, -1, 1, 1); - Gamxxy = new var("Gamxxy", ngfs++, 1, -1, 1); - Gamxxz = new var("Gamxxz", ngfs++, 1, 1, -1); - Gamxyy = new var("Gamxyy", ngfs++, -1, 1, 1); - Gamxyz = new var("Gamxyz", ngfs++, -1, -1, -1); - Gamxzz = new var("Gamxzz", ngfs++, -1, 1, 1); - Gamyxx = new var("Gamyxx", ngfs++, 1, -1, 1); - Gamyxy = new var("Gamyxy", ngfs++, -1, 1, 1); - Gamyxz = new var("Gamyxz", ngfs++, -1, -1, -1); - Gamyyy = new var("Gamyyy", ngfs++, 1, -1, 1); - Gamyyz = new var("Gamyyz", ngfs++, 1, 1, -1); - Gamyzz = new var("Gamyzz", ngfs++, 1, -1, 1); - Gamzxx = new var("Gamzxx", ngfs++, 1, 1, -1); - Gamzxy = new var("Gamzxy", ngfs++, -1, -1, -1); - Gamzxz = new var("Gamzxz", ngfs++, -1, 1, 1); - Gamzyy = new var("Gamzyy", ngfs++, 1, 1, -1); - Gamzyz = new var("Gamzyz", ngfs++, 1, -1, 1); - Gamzzz = new var("Gamzzz", ngfs++, 1, 1, -1); - - Rxx = new var("Rxx", ngfs++, 1, 1, 1); - Rxy = new var("Rxy", ngfs++, -1, -1, 1); - Rxz = new var("Rxz", ngfs++, -1, 1, -1); - Ryy = new var("Ryy", ngfs++, 1, 1, 1); - Ryz = new var("Ryz", ngfs++, 1, -1, -1); - Rzz = new var("Rzz", ngfs++, 1, 1, 1); - - // refer to PRD, 77, 024027 (2008) - Rpsi4 = new var("Rpsi4", ngfs++, 1, 1, 1); - Ipsi4 = new var("Ipsi4", ngfs++, -1, -1, -1); - t1Rpsi4 = new var("t1Rpsi4", ngfs++, 1, 1, 1); - t1Ipsi4 = new var("t1Ipsi4", ngfs++, -1, -1, -1); - t2Rpsi4 = new var("t2Rpsi4", ngfs++, 1, 1, 1); - t2Ipsi4 = new var("t2Ipsi4", ngfs++, -1, -1, -1); - - // constraint violation monitor variables - Cons_Ham = new var("Cons_Ham", ngfs++, 1, 1, 1); - Cons_Px = new var("Cons_Px", ngfs++, -1, 1, 1); - Cons_Py = new var("Cons_Py", ngfs++, 1, -1, 1); - Cons_Pz = new var("Cons_Pz", ngfs++, 1, 1, -1); - Cons_Gx = new var("Cons_Gx", ngfs++, -1, 1, 1); - Cons_Gy = new var("Cons_Gy", ngfs++, 1, -1, 1); - Cons_Gz = new var("Cons_Gz", ngfs++, 1, 1, -1); - -#ifdef Point_Psi4 - phix = new var("phix", ngfs++, -1, 1, 1); - phiy = new var("phiy", ngfs++, 1, -1, 1); - phiz = new var("phiz", ngfs++, 1, 1, -1); - trKx = new var("trKx", ngfs++, -1, 1, 1); - trKy = new var("trKy", ngfs++, 1, -1, 1); - trKz = new var("trKz", ngfs++, 1, 1, -1); - Axxx = new var("Axxx", ngfs++, -1, 1, 1); - Axxy = new var("Axxy", ngfs++, 1, -1, 1); - Axxz = new var("Axxz", ngfs++, 1, 1, -1); - Axyx = new var("Axyx", ngfs++, 1, -1, 1); - Axyy = new var("Axyy", ngfs++, -1, 1, 1); - Axyz = new var("Axyz", ngfs++, -1, -1, -1); - Axzx = new var("Axzx", ngfs++, 1, 1, -1); - Axzy = new var("Axzy", ngfs++, -1, -1, -1); - Axzz = new var("Axzz", ngfs++, -1, 1, 1); - Ayyx = new var("Ayyx", ngfs++, -1, 1, 1); - Ayyy = new var("Ayyy", ngfs++, 1, -1, 1); - Ayyz = new var("Ayyz", ngfs++, 1, 1, -1); - Ayzx = new var("Ayzx", ngfs++, -1, -1, -1); - Ayzy = new var("Ayzy", ngfs++, 1, 1, -1); - Ayzz = new var("Ayzz", ngfs++, 1, -1, 1); - Azzx = new var("Azzx", ngfs++, -1, 1, 1); - Azzy = new var("Azzy", ngfs++, 1, -1, 1); - Azzz = new var("Azzz", ngfs++, 1, 1, -1); -#endif - - // specific properspeed for 1+log slice - { - const double vl = sqrt(2); - trKo->setpropspeed(vl); - trK0->setpropspeed(vl); - trK->setpropspeed(vl); - trK1->setpropspeed(vl); - trK_rhs->setpropspeed(vl); - - phio->setpropspeed(vl); - phi0->setpropspeed(vl); - phi->setpropspeed(vl); - phi1->setpropspeed(vl); - phi_rhs->setpropspeed(vl); - - Lapo->setpropspeed(vl); - Lap0->setpropspeed(vl); - Lap->setpropspeed(vl); - Lap1->setpropspeed(vl); - Lap_rhs->setpropspeed(vl); - } - - OldStateList = new MyList(phio); - OldStateList->insert(trKo); - OldStateList->insert(gxxo); - OldStateList->insert(gxyo); - OldStateList->insert(gxzo); - OldStateList->insert(gyyo); - OldStateList->insert(gyzo); - OldStateList->insert(gzzo); - OldStateList->insert(Axxo); - OldStateList->insert(Axyo); - OldStateList->insert(Axzo); - OldStateList->insert(Ayyo); - OldStateList->insert(Ayzo); - OldStateList->insert(Azzo); - OldStateList->insert(Gmxo); - OldStateList->insert(Gmyo); - OldStateList->insert(Gmzo); - OldStateList->insert(Lapo); - OldStateList->insert(Sfxo); - OldStateList->insert(Sfyo); - OldStateList->insert(Sfzo); - OldStateList->insert(dtSfxo); - OldStateList->insert(dtSfyo); - OldStateList->insert(dtSfzo); - - StateList = new MyList(phi0); - StateList->insert(trK0); - StateList->insert(gxx0); - StateList->insert(gxy0); - StateList->insert(gxz0); - StateList->insert(gyy0); - StateList->insert(gyz0); - StateList->insert(gzz0); - StateList->insert(Axx0); - StateList->insert(Axy0); - StateList->insert(Axz0); - StateList->insert(Ayy0); - StateList->insert(Ayz0); - StateList->insert(Azz0); - StateList->insert(Gmx0); - StateList->insert(Gmy0); - StateList->insert(Gmz0); - StateList->insert(Lap0); - StateList->insert(Sfx0); - StateList->insert(Sfy0); - StateList->insert(Sfz0); - StateList->insert(dtSfx0); - StateList->insert(dtSfy0); - StateList->insert(dtSfz0); - - RHSList = new MyList(phi_rhs); - RHSList->insert(trK_rhs); - RHSList->insert(gxx_rhs); - RHSList->insert(gxy_rhs); - RHSList->insert(gxz_rhs); - RHSList->insert(gyy_rhs); - RHSList->insert(gyz_rhs); - RHSList->insert(gzz_rhs); - RHSList->insert(Axx_rhs); - RHSList->insert(Axy_rhs); - RHSList->insert(Axz_rhs); - RHSList->insert(Ayy_rhs); - RHSList->insert(Ayz_rhs); - RHSList->insert(Azz_rhs); - RHSList->insert(Gmx_rhs); - RHSList->insert(Gmy_rhs); - RHSList->insert(Gmz_rhs); - RHSList->insert(Lap_rhs); - RHSList->insert(Sfx_rhs); - RHSList->insert(Sfy_rhs); - RHSList->insert(Sfz_rhs); - RHSList->insert(dtSfx_rhs); - RHSList->insert(dtSfy_rhs); - RHSList->insert(dtSfz_rhs); - - SynchList_pre = new MyList(phi); - SynchList_pre->insert(trK); - SynchList_pre->insert(gxx); - SynchList_pre->insert(gxy); - SynchList_pre->insert(gxz); - SynchList_pre->insert(gyy); - SynchList_pre->insert(gyz); - SynchList_pre->insert(gzz); - SynchList_pre->insert(Axx); - SynchList_pre->insert(Axy); - SynchList_pre->insert(Axz); - SynchList_pre->insert(Ayy); - SynchList_pre->insert(Ayz); - SynchList_pre->insert(Azz); - SynchList_pre->insert(Gmx); - SynchList_pre->insert(Gmy); - SynchList_pre->insert(Gmz); - SynchList_pre->insert(Lap); - SynchList_pre->insert(Sfx); - SynchList_pre->insert(Sfy); - SynchList_pre->insert(Sfz); - SynchList_pre->insert(dtSfx); - SynchList_pre->insert(dtSfy); - SynchList_pre->insert(dtSfz); - - SynchList_cor = new MyList(phi1); - SynchList_cor->insert(trK1); - SynchList_cor->insert(gxx1); - SynchList_cor->insert(gxy1); - SynchList_cor->insert(gxz1); - SynchList_cor->insert(gyy1); - SynchList_cor->insert(gyz1); - SynchList_cor->insert(gzz1); - SynchList_cor->insert(Axx1); - SynchList_cor->insert(Axy1); - SynchList_cor->insert(Axz1); - SynchList_cor->insert(Ayy1); - SynchList_cor->insert(Ayz1); - SynchList_cor->insert(Azz1); - SynchList_cor->insert(Gmx1); - SynchList_cor->insert(Gmy1); - SynchList_cor->insert(Gmz1); - SynchList_cor->insert(Lap1); - SynchList_cor->insert(Sfx1); - SynchList_cor->insert(Sfy1); - SynchList_cor->insert(Sfz1); - SynchList_cor->insert(dtSfx1); - SynchList_cor->insert(dtSfy1); - SynchList_cor->insert(dtSfz1); - - DumpList = new MyList(phi0); - DumpList->insert(trK0); - DumpList->insert(gxx0); - DumpList->insert(gxy0); - DumpList->insert(gxz0); - DumpList->insert(gyy0); - DumpList->insert(gyz0); - DumpList->insert(gzz0); - // DumpList->insert(Axx0); - // DumpList->insert(Axy0); - // DumpList->insert(Axz0); - // DumpList->insert(Ayy0); - // DumpList->insert(Ayz0); - // DumpList->insert(Azz0); - // DumpList->insert(Gmx0); - // DumpList->insert(Gmy0); - // DumpList->insert(Gmz0); - DumpList->insert(Lap0); - // DumpList->insert(Sfx0); - // DumpList->insert(Sfy0); - // DumpList->insert(Sfz0); - // DumpList->insert(dtSfx0); - // DumpList->insert(dtSfy0); - // DumpList->insert(dtSfz0); - DumpList->insert(Rpsi4); - DumpList->insert(Ipsi4); - DumpList->insert(Cons_Ham); - DumpList->insert(Cons_Px); - DumpList->insert(Cons_Py); - DumpList->insert(Cons_Pz); - // DumpList->insert(Cons_Gx); - // DumpList->insert(Cons_Gy); - // DumpList->insert(Cons_Gz); - - ConstraintList = new MyList(Cons_Ham); - ConstraintList->insert(Cons_Px); - ConstraintList->insert(Cons_Py); - ConstraintList->insert(Cons_Pz); - ConstraintList->insert(Cons_Gx); - ConstraintList->insert(Cons_Gy); - ConstraintList->insert(Cons_Gz); -#ifdef With_AHF - // setup kinds of var list - // List for AparentHorizonFinderDirect - // special attension is payed to symmetry type - // gij gij,x gij,y gij,z - AHList = new MyList(gxx0); - AHList->insert(Gamxxx); - AHList->insert(Gamyxx); - AHList->insert(Gamzxx); - AHList->insert(gxy0); - AHList->insert(Gamxxy); - AHList->insert(Gamyxy); - AHList->insert(Gamzxy); - AHList->insert(gxz0); - AHList->insert(Gamxxz); - AHList->insert(Gamyxz); - AHList->insert(Gamzxz); - AHList->insert(gyy0); - AHList->insert(Gamxyy); - AHList->insert(Gamyyy); - AHList->insert(Gamzyy); - AHList->insert(gyz0); - AHList->insert(Gamxyz); - AHList->insert(Gamyyz); - AHList->insert(Gamzyz); - AHList->insert(gzz0); - AHList->insert(Gamxzz); - AHList->insert(Gamyzz); - AHList->insert(Gamzzz); - // phi phi,x phi,y phi,z - AHList->insert(phi0); - AHList->insert(dtSfx_rhs); - AHList->insert(dtSfy_rhs); - AHList->insert(dtSfz_rhs); - // Aij - AHList->insert(Axx0); - AHList->insert(Axy0); - AHList->insert(Axz0); - AHList->insert(Ayy0); - AHList->insert(Ayz0); - AHList->insert(Azz0); - // trK - AHList->insert(trK0); - // gij,x gij,y gij,z - AHDList = new MyList(Gamxxx); - AHDList->insert(Gamyxx); - AHDList->insert(Gamzxx); - AHDList->insert(Gamxxy); - AHDList->insert(Gamyxy); - AHDList->insert(Gamzxy); - AHDList->insert(Gamxxz); - AHDList->insert(Gamyxz); - AHDList->insert(Gamzxz); - AHDList->insert(Gamxyy); - AHDList->insert(Gamyyy); - AHDList->insert(Gamzyy); - AHDList->insert(Gamxyz); - AHDList->insert(Gamyyz); - AHDList->insert(Gamzyz); - AHDList->insert(Gamxzz); - AHDList->insert(Gamyzz); - AHDList->insert(Gamzzz); - // phi,x phi,y phi,z - AHDList->insert(dtSfx_rhs); - AHDList->insert(dtSfy_rhs); - AHDList->insert(dtSfz_rhs); - - GaugeList = new MyList(Lap0); - GaugeList->insert(Sfx0); - GaugeList->insert(Sfy0); - GaugeList->insert(Sfz0); -#endif - - CheckPoint = new checkpoint(checkrun, checkfilename, myrank); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function initializes the class - -//================================================================================================ - -void bssn_class::Initialize() -{ - if (myrank == 0) - cout << "you have setted " << ngfs << " grid functions." << endl; - - CheckPoint->addvariablelist(StateList); - CheckPoint->addvariablelist(OldStateList); - - GH = new cgh(0, ngfs, Symmetry, "input.par", checkrun, ErrorMonitor); - if (checkrun) - CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry); - else - GH->compose_cgh(nprocs); - -#ifdef WithShell - SH = new ShellPatch(0, ngfs, "input.par", Symmetry, myrank, ErrorMonitor); - SH->matchcheck(GH->PatL[0]); - SH->compose_sh(nprocs); - // SH->compose_shr(nprocs); //sh is faster than shr - SH->setupcordtrans(); - SH->Dump_xyz(0, 0, 1); - SH->setupintintstuff(nprocs, GH->PatL[0], Symmetry); - - if (checkrun) - CheckPoint->readcheck_sh(SH, myrank); -#else - SH = 0; -#endif - - double h = GH->PatL[0]->data->blb->data->getdX(0); - for (int i = 1; i < dim; i++) - h = Mymin(h, GH->PatL[0]->data->blb->data->getdX(i)); - dT = Courant * h; - - if (checkrun) - { - CheckPoint->read_Black_Hole_position(BH_num_input, BH_num, Porg0, Pmom, Spin, Mass, Porgbr, Porg, Porg1, Porg_rhs); - } - else - { - PhysTime = StartTime; - Setup_Black_Hole_position(); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// Destructor: free allocated variables - -//================================================================================================ - -bssn_class::~bssn_class() -{ -#ifdef With_AHF - AHList->clearList(); - AHDList->clearList(); - GaugeList->clearList(); - if (lastahdumpid) - delete[] lastahdumpid; - if (findeveryl) - delete[] findeveryl; - - if (xc) - { - delete[] xc; - delete[] yc; - delete[] zc; - delete[] xr; - delete[] yr; - delete[] zr; - delete[] trigger; - delete[] dumpid; - delete[] dTT; - } - - AHFinderDirect::AHFinderDirect_cleanup(); -#endif - - StateList->clearList(); - RHSList->clearList(); - OldStateList->clearList(); - SynchList_pre->clearList(); - SynchList_cor->clearList(); - DumpList->clearList(); - ConstraintList->clearList(); - - delete phio; - delete trKo; - delete gxxo; - delete gxyo; - delete gxzo; - delete gyyo; - delete gyzo; - delete gzzo; - delete Axxo; - delete Axyo; - delete Axzo; - delete Ayyo; - delete Ayzo; - delete Azzo; - delete Gmxo; - delete Gmyo; - delete Gmzo; - delete Lapo; - delete Sfxo; - delete Sfyo; - delete Sfzo; - delete dtSfxo; - delete dtSfyo; - delete dtSfzo; - - delete phi0; - delete trK0; - delete gxx0; - delete gxy0; - delete gxz0; - delete gyy0; - delete gyz0; - delete gzz0; - delete Axx0; - delete Axy0; - delete Axz0; - delete Ayy0; - delete Ayz0; - delete Azz0; - delete Gmx0; - delete Gmy0; - delete Gmz0; - delete Lap0; - delete Sfx0; - delete Sfy0; - delete Sfz0; - delete dtSfx0; - delete dtSfy0; - delete dtSfz0; - - delete phi; - delete trK; - delete gxx; - delete gxy; - delete gxz; - delete gyy; - delete gyz; - delete gzz; - delete Axx; - delete Axy; - delete Axz; - delete Ayy; - delete Ayz; - delete Azz; - delete Gmx; - delete Gmy; - delete Gmz; - delete Lap; - delete Sfx; - delete Sfy; - delete Sfz; - delete dtSfx; - delete dtSfy; - delete dtSfz; - - delete phi1; - delete trK1; - delete gxx1; - delete gxy1; - delete gxz1; - delete gyy1; - delete gyz1; - delete gzz1; - delete Axx1; - delete Axy1; - delete Axz1; - delete Ayy1; - delete Ayz1; - delete Azz1; - delete Gmx1; - delete Gmy1; - delete Gmz1; - delete Lap1; - delete Sfx1; - delete Sfy1; - delete Sfz1; - delete dtSfx1; - delete dtSfy1; - delete dtSfz1; - - delete phi_rhs; - delete trK_rhs; - delete gxx_rhs; - delete gxy_rhs; - delete gxz_rhs; - delete gyy_rhs; - delete gyz_rhs; - delete gzz_rhs; - delete Axx_rhs; - delete Axy_rhs; - delete Axz_rhs; - delete Ayy_rhs; - delete Ayz_rhs; - delete Azz_rhs; - delete Gmx_rhs; - delete Gmy_rhs; - delete Gmz_rhs; - delete Lap_rhs; - delete Sfx_rhs; - delete Sfy_rhs; - delete Sfz_rhs; - delete dtSfx_rhs; - delete dtSfy_rhs; - delete dtSfz_rhs; - - delete rho; - delete Sx; - delete Sy; - delete Sz; - delete Sxx; - delete Sxy; - delete Sxz; - delete Syy; - delete Syz; - delete Szz; - - delete Gamxxx; - delete Gamxxy; - delete Gamxxz; - delete Gamxyy; - delete Gamxyz; - delete Gamxzz; - delete Gamyxx; - delete Gamyxy; - delete Gamyxz; - delete Gamyyy; - delete Gamyyz; - delete Gamyzz; - delete Gamzxx; - delete Gamzxy; - delete Gamzxz; - delete Gamzyy; - delete Gamzyz; - delete Gamzzz; - - delete Rxx; - delete Rxy; - delete Rxz; - delete Ryy; - delete Ryz; - delete Rzz; - - delete Rpsi4; - delete Ipsi4; - delete t1Rpsi4; - delete t1Ipsi4; - delete t2Rpsi4; - delete t2Ipsi4; - - delete Cons_Ham; - delete Cons_Px; - delete Cons_Py; - delete Cons_Pz; - delete Cons_Gx; - delete Cons_Gy; - delete Cons_Gz; - -#ifdef Point_Psi4 - delete phix; - delete phiy; - delete phiz; - delete trKx; - delete trKy; - delete trKz; - delete Axxx; - delete Axxy; - delete Axxz; - delete Axyx; - delete Axyy; - delete Axyz; - delete Axzx; - delete Axzy; - delete Axzz; - delete Ayyx; - delete Ayyy; - delete Ayyz; - delete Ayzx; - delete Ayzy; - delete Ayzz; - delete Azzx; - delete Azzy; - delete Azzz; -#endif - - delete GH; -#ifdef WithShell - delete SH; -#endif - - for (int i = 0; i < BH_num; i++) - { - delete[] Porg0[i]; - delete[] Porgbr[i]; - delete[] Porg[i]; - delete[] Porg1[i]; - delete[] Porg_rhs[i]; - } - - delete[] Porg0; - delete[] Porgbr; - delete[] Porg; - delete[] Porg1; - delete[] Porg_rhs; - - delete[] Mass; - delete[] Spin; - delete[] Pmom; - - delete ErrorMonitor; - delete Psi4Monitor; - delete BHMonitor; - delete MAPMonitor; - delete ConVMonitor; - delete Waveshell; - - delete CheckPoint; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes initial data using Lousto's analytic formulas - -//================================================================================================ - -void bssn_class::Setup_Initial_Data_Lousto() -{ - if (!checkrun) - { - if (myrank == 0) - cout << "Setup initial data with Lousto's analytical formula." << endl; - char filename[50]; - strcpy(filename, "input.par"); - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - // Use Lousto's analytic formulas to compute initial data - f_get_lousto_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_nbhs_sh(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - // dump read_in initial data - SH->Dump_Data(StateList, 0, PhysTime, dT); -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // exit(0); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes initial data using analytic formulas by Prof. Cao - -//================================================================================================ - -void bssn_class::Setup_Initial_Data_Cao() -{ - if (!checkrun) - { - if (myrank == 0) - cout << "Setup initial data with Cao's analytical formula." << endl; - char filename[50]; - strcpy(filename, "input.par"); - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - // Use Prof. Cao's analytic formulas to compute initial data - f_get_initial_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT); -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_nbhs_sh(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - // dump read_in initial data - SH->Dump_Data(StateList, 0, PhysTime, dT); -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // exit(0); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes Kerr-Schild initial data analytically - -//================================================================================================ - -void bssn_class::Setup_KerrSchild() -{ - if (!checkrun) - { - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_kerrschild(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - int lev = 0, fngfs = Pp->data->fngfs; - - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_get_initial_kerrschild_ss(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn]); - /* - f_fderivs_shc(cg->shape, - cg->fgfs[phi0->sgfn], - cg->fgfs[Sfx_rhs->sgfn],cg->fgfs[Sfy_rhs->sgfn],cg->fgfs[Sfz_rhs->sgfn], - cg->X[0],cg->X[1],cg->X[2], - phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], - Symmetry,lev,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz]); - f_fdderivs_shc(cg->shape,cg->fgfs[phi0->sgfn], - cg->fgfs[Axx_rhs->sgfn],cg->fgfs[Axy_rhs->sgfn],cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn],cg->fgfs[Ayz_rhs->sgfn],cg->fgfs[Azz_rhs->sgfn], - cg->X[0],cg->X[1],cg->X[2], - phi0->SoA[0],phi0->SoA[1],phi0->SoA[2], - Symmetry,lev,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz], - cg->fgfs[fngfs+ShellPatch::drhodxx], - cg->fgfs[fngfs+ShellPatch::drhodxy], - cg->fgfs[fngfs+ShellPatch::drhodxz], - cg->fgfs[fngfs+ShellPatch::drhodyy], - cg->fgfs[fngfs+ShellPatch::drhodyz], - cg->fgfs[fngfs+ShellPatch::drhodzz], - cg->fgfs[fngfs+ShellPatch::dsigmadxx], - cg->fgfs[fngfs+ShellPatch::dsigmadxy], - cg->fgfs[fngfs+ShellPatch::dsigmadxz], - cg->fgfs[fngfs+ShellPatch::dsigmadyy], - cg->fgfs[fngfs+ShellPatch::dsigmadyz], - cg->fgfs[fngfs+ShellPatch::dsigmadzz], - cg->fgfs[fngfs+ShellPatch::dRdxx], - cg->fgfs[fngfs+ShellPatch::dRdxy], - cg->fgfs[fngfs+ShellPatch::dRdxz], - cg->fgfs[fngfs+ShellPatch::dRdyy], - cg->fgfs[fngfs+ShellPatch::dRdyz], - cg->fgfs[fngfs+ShellPatch::dRdzz]); - */ - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } -#endif - - // dump read_in initial data - // SH->Synch(GH->PatL[0],StateList,Symmetry); - // for(int lev=0;levlevels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); - // SH->Dump_Data(StateList,0,PhysTime,dT); - // exit(0); - - /* - { - MyList * DG_List=new MyList(Sfx_rhs); - DG_List->insert(Sfy_rhs); DG_List->insert(Sfz_rhs); - DG_List->insert(Axx_rhs); DG_List->insert(Axy_rhs); DG_List->insert(Axz_rhs); - DG_List->insert(Ayy_rhs); DG_List->insert(Ayz_rhs); DG_List->insert(Azz_rhs); - SH->Synch(DG_List,Symmetry); - SH->Dump_Data(DG_List,0,PhysTime,dT); - DG_List->clearList(); - exit(0); - } - */ - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function reads initial data produced by Pablo Galaviz's Olliptic program - -//================================================================================================ - -//|---------------------------------------------------------------------------- -// read ASCII file with the style of Pablo -//|---------------------------------------------------------------------------- -bool bssn_class::read_Pablo_file(int *ext, double *datain, char *filename) -{ - int nx = ext[0], ny = ext[1], nz = ext[2]; - int i, j, k; - double x, y, z; - //|--->open in put file - ifstream infile; - infile.open(filename); - if (!infile) - { - cout << "bssn_class: read_Pablo_file can't open " << filename << " for input." << endl; - return false; - } - for (k = 0; k < nz; k++) - for (j = 0; j < ny; j++) - for (i = 0; i < nx; i++) - { - infile >> x >> y >> z >> datain[i + j * nx + k * nx * ny]; - } - - infile.close(); - - return true; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function writes initial data for Pablo Galaviz's Olliptic program - -//================================================================================================ - -//|---------------------------------------------------------------------------- -// write ASCII file with the style of Pablo -//|---------------------------------------------------------------------------- -void bssn_class::write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - char *filename) -{ - int nx = ext[0], ny = ext[1], nz = ext[2]; - int i, j, k; - double *X, *Y, *Z; - X = new double[nx]; - Y = new double[ny]; - Z = new double[nz]; - double dX, dY, dZ; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - dX = (xmax - xmin) / (nx - 1); - for (i = 0; i < nx; i++) - X[i] = xmin + i * dX; - dY = (ymax - ymin) / (ny - 1); - for (j = 0; j < ny; j++) - Y[j] = ymin + j * dY; - dZ = (zmax - zmin) / (nz - 1); - for (k = 0; k < nz; k++) - Z[k] = zmin + k * dZ; -#else -#ifdef Cell - dX = (xmax - xmin) / nx; - for (i = 0; i < nx; i++) - X[i] = xmin + (i + 0.5) * dX; - dY = (ymax - ymin) / ny; - for (j = 0; j < ny; j++) - Y[j] = ymin + (j + 0.5) * dY; - dZ = (zmax - zmin) / nz; - for (k = 0; k < nz; k++) - Z[k] = zmin + (k + 0.5) * dZ; -#else -#error Not define Vertex nor Cell -#endif -#endif - //|--->open out put file - ofstream outfile; - outfile.open(filename); - if (!outfile) - { - cout << "bssn=_class: write_Pablo_file can't open " << filename << " for output." << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - outfile.setf(ios::scientific, ios::floatfield); - outfile.precision(16); - for (k = 0; k < nz; k++) - for (j = 0; j < ny; j++) - for (i = 0; i < nx; i++) - { - outfile << X[i] << " " << Y[j] << " " << Z[k] << " " - << 0 << endl; - } - outfile.close(); - - delete[] X; - delete[] Y; - delete[] Z; -} - -//================================================================================================ - - - - -//================================================================================================ - -// This member function reads TwoPuncture initial data produced by the Ansorg solver - -//================================================================================================ - -// Read initial data solved by Ansorg, PRD 70, 064011 (2004) - -void bssn_class::Read_Ansorg() -{ - if (!checkrun) - { - if (myrank == 0) - cout << "Read initial data from Ansorg's solver," - << " please be sure the input parameters for black holes are puncture parameters!!" << endl; - char filename[50]; - strcpy(filename, "input.par"); - int BH_NM; - double *Porg_here, *Pmom_here, *Spin_here, *Mass_here; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_NM = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - - Porg_here = new double[3 * BH_NM]; - Pmom_here = new double[3 * BH_NM]; - Spin_here = new double[3 * BH_NM]; - Mass_here = new double[BH_NM]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_NM) - { - if (skey == "Mass") - Mass_here[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin_here[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom_here[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom_here[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom_here[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - - int order = 6; - Ansorg read_ansorg("Ansorg.psid", order); - // set initial data - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - for (int k = 0; k < cg->shape[2]; k++) - for (int j = 0; j < cg->shape[1]; j++) - for (int i = 0; i < cg->shape[0]; i++) - cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = - read_ansorg.ps_u_at_xyz(cg->X[0][i], cg->X[1][j], cg->X[2][k]); - - f_get_ansorg_nbhs(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#ifdef WithShell - // ShellPatch part - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - for (int k = 0; k < cg->shape[2]; k++) - for (int j = 0; j < cg->shape[1]; j++) - for (int i = 0; i < cg->shape[0]; i++) - cg->fgfs[phi0->sgfn][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]] = - read_ansorg.ps_u_at_xyz(cg->fgfs[Pp->data->fngfs + ShellPatch::gx][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz][i + j * cg->shape[0] + k * cg->shape[0] * cg->shape[1]]); - - f_get_ansorg_nbhs_ss(cg->shape, - cg->fgfs[Pp->data->fngfs + ShellPatch::gx], - cg->fgfs[Pp->data->fngfs + ShellPatch::gy], - cg->fgfs[Pp->data->fngfs + ShellPatch::gz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - Mass_here, Porg_here, Pmom_here, Spin_here, BH_NM); -#if 0 -// for check fderivs_sh - f_fderivs_sh(cg->shape,cg->fgfs[Ayz0->sgfn], - cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], - cg->X[0],cg->X[1],cg->X[2], - Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], - Symmetry,Pp->data->sst,Pp->data->sst); -#endif -#if 0 -// for check fderivs_shc - int fngfs = Pp->data->fngfs; - f_fderivs_shc(cg->shape,cg->fgfs[Ayz0->sgfn], - cg->fgfs[Sfx0->sgfn],cg->fgfs[Sfy0->sgfn],cg->fgfs[Sfz0->sgfn], - cg->X[0],cg->X[1],cg->X[2], - Ayz0->SoA[0],Ayz0->SoA[1],Ayz0->SoA[2], - Symmetry,Pp->data->sst,Pp->data->sst, - cg->fgfs[fngfs+ShellPatch::drhodx], - cg->fgfs[fngfs+ShellPatch::drhody], - cg->fgfs[fngfs+ShellPatch::drhodz], - cg->fgfs[fngfs+ShellPatch::dsigmadx], - cg->fgfs[fngfs+ShellPatch::dsigmady], - cg->fgfs[fngfs+ShellPatch::dsigmadz], - cg->fgfs[fngfs+ShellPatch::dRdx], - cg->fgfs[fngfs+ShellPatch::dRdy], - cg->fgfs[fngfs+ShellPatch::dRdz]); -#endif - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } -#endif - - delete[] Porg_here; - delete[] Mass_here; - delete[] Pmom_here; - delete[] Spin_here; - - Compute_Constraint(); - // dump read_in initial data - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT); -#ifdef WithShell - SH->Dump_Data(DumpList, 0, PhysTime, dT); -#endif - // if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets up the time evolution for the entire process - -//================================================================================================ - -void bssn_class::Evolve(int Steps) -{ - - clock_t prev_clock, curr_clock; - double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; - LastAnas = 0; -#if 0 -//initial checkpoint for special uasge - { - CheckPoint->write_Black_Hole_position(BH_num_input,BH_num,Porg0,Porgbr,Mass); - CheckPoint->writecheck_cgh(PhysTime,GH); -#ifdef WithShell - CheckPoint->writecheck_sh(PhysTime,SH); -#endif - CheckPoint->write_bssn(LastDump,Last2dDump,LastAnas); - misc::tillherecheck("complete initialization preparation"); // we need synchronization here - if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } -#endif - - double beg_time; - beg_time = MPI_Wtime(); -// added by yangquan -#ifdef USE_GPU -#ifdef USE_GPU_DIVIDE - // new code considering different partition for cpu and gpu - { - MyList *Pp = GH->PatL[0]; - bool fg = true; - while (fg && Pp) - { - MyList *BP = Pp->data->blb; - while (fg && BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - use_gpu = cg->cgpu; - fg = false; - break; - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } -#else - // old yangquan code - use_gpu = 0; - if (myrank % 2 == 1) - use_gpu = 1; -#endif -#endif - - // for step 0 constraint interpolation - Interp_Constraint(true); - -#ifdef With_AHF - // setup apparent horizon finder direct of thornburg - { - HN_num = BH_num; - for (int ia = 0; ia < BH_num; ia++) - for (int ib = ia + 1; ib < BH_num; ib++) - HN_num++; - - AHFinderDirect::AHFinderDirect_setup(AHList, GaugeList, - this, - Symmetry, HN_num, &PhysTime); - - lastahdumpid = new int[HN_num]; - findeveryl = new int[HN_num]; - xc = new double[HN_num]; - yc = new double[HN_num]; - zc = new double[HN_num]; - xr = new double[HN_num]; - yr = new double[HN_num]; - zr = new double[HN_num]; - dTT = new double[HN_num]; - trigger = new bool[HN_num]; - dumpid = new int[HN_num]; - - for (int ihn = 0; ihn < HN_num; ihn++) - { - lastahdumpid[ihn] = 0; - findeveryl[ihn] = AHfindevery; - } - } -#endif - - if (checkrun) - CheckPoint->read_bssn(LastDump, Last2dDump, LastAnas); - - double dT_mon = dT * pow(0.5, Mymax(0, trfls)); - /* - #ifdef With_AHF - //initial apparent horizon finding - { - double gam; - double massmin=Mass[0]; - for(int ihn=1;ihnlevels; lev++) - GH->Lt[lev] = PhysTime; - - GH->settrfls(trfls); - - for (int ncount = 1; ncount < Steps + 1; ncount++) - { - cout << "Before Step: " << ncount << " My Rank: " << myrank - << " takes " << MPI_Wtime() - beg_time << " seconds!" << endl; - beg_time = MPI_Wtime(); -#if (PSTR == 0) - RecursiveStep(0); -#elif (PSTR == 1) - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - AnalysisStuff(a_lev, dT_mon); - ParallelStep(); -#endif - cout << "After Step: " << ncount << " My Rank: " << myrank - << " takes " << MPI_Wtime() - beg_time << " seconds!" << endl; - beg_time = MPI_Wtime(); - - // misc::tillherecheck("before Constraint_Out"); - - Constraint_Out(); // this will affect the Dump_List - - LastDump += dT_mon; - Last2dDump += dT_mon; - LastCheck += dT_mon; - - if (LastDump >= DumpTime) - { - // misc::tillherecheck("before Dump_Data"); - - for (int lev = 0; lev < GH->levels; lev++) - Parallel::Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); -#ifdef WithShell - SH->Dump_Data(DumpList, 0, PhysTime, dT_mon); -#endif - - LastDump = 0; - - if (myrank == 0) - { - cout << "Dump done." << endl; - } - } - - if (Last2dDump >= d2DumpTime) - { - // misc::tillherecheck("before 2dDump_Data"); - - for (int lev = 0; lev < GH->levels; lev++) - Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon); - - Last2dDump = 0; - - if (myrank == 0) - { - cout << "2dDump done." << endl; - } - } - - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl; - cout << "used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - - if (PhysTime >= TotalTime) - break; - -#if (REGLEV == 1) - GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); -#endif - -#if (REGLEV == 0 && PSTR == 1) -// GH->Regrid_fake(Symmetry,BH_num,Porgbr,Porg0, -// SynchList_cor,OldStateList,StateList,SynchList_pre, -// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor); -#endif - - bssn_perf.MemoryUsage(¤t_min, ¤t_avg, ¤t_max, - &peak_min, &peak_avg, &peak_max, nprocs); - if (myrank == 0) - printf("Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, " - "peak %0.4lg/%0.4lg/%0.4lgMB\n", - (double)current_min / (1024.0 * 1024.0), - (double)current_avg / (1024.0 * 1024.0), - (double)current_max / (1024.0 * 1024.0), - (double)peak_min / (1024.0 * 1024.0), - (double)peak_avg / (1024.0 * 1024.0), - (double)peak_max / (1024.0 * 1024.0)); - - if (LastCheck >= CheckTime) - { - LastCheck = 0; - - CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass); - CheckPoint->writecheck_cgh(PhysTime, GH); - CheckPoint->writecheck_sh(PhysTime, SH); - CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas); - } - } - /* - #ifdef With_AHF - // final apparent horizon finding - { - double gam; - for(int ihn=0;ihnPatL[lev],StateList,0,PhysTime,dT_lev); - } - -#if 0 - if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); -#endif - -#if (REGLEV == 0) - GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor); -#endif -} - -//================================================================================================ - - - -//================================================================================================ - -// ParallelStep performs time evolution across multiple grid levels (includes parallel execution) -// This section applies only when PSTR == 1 - -//================================================================================================ - -#if (PSTR == 1) -void bssn_class::ParallelStep() -{ - // stringstream a_stream; - // a_stream.setf(ios::left); - - double *tporg, *tporgo; - tporg = new double[3 * BH_num]; - tporgo = new double[3 * BH_num]; - - int lev = GH->mylev; - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - double dT_levp1 = dT * pow(0.5, Mymax(lev + 1, trfls)); - double dT_levm1 = dT * pow(0.5, Mymax(lev - 1, trfls)); - - int NoIterations = 1, YN; - if (lev <= trfls) - NoIterations = 1; - else - NoIterations = int(pow(2.0, lev - trfls)); - - for (int i = 0; i < NoIterations; i++) - { - // if(myrank==GH->start_rank[lev]) cout<<"level now = "<Commlev[lev],GH->start_rank[lev],a_stream.str()); - - // Step(lev,YN); -#ifdef USE_GPU - if (use_gpu == 1) - Step_GPU(lev, YN); - else - Step(lev, YN); -#else - Step(lev, YN); -#endif - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - -#if (AGM == 2) - if (GH->levels == 1) - { - Enforce_algcon(lev, 0); - } -#endif - - GH->Lt[lev] += dT_lev; - - PhysTime += dT_lev; - -#if (AGM == 2) - if (lev > 0) - { - Enforce_algcon(lev, 0); - if (YN == 1) - Enforce_algcon(lev - 1, 0); - } -#endif - -#if (RPS == 1) - // mesh refinement boundary part - // - // till here the PhysTime has updated dT_lev - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - if (lev < GH->levels - 1) - { - if (lev + 1 <= trfls) - { - // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - } - else - { - // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],"between RestrictProlong"); - - // RestrictProlong_aux(lev,0,fgt(PhysTime-dT_lev,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - // RestrictProlong_aux(lev,1,fgt(PhysTime-dT_levp1,StartTime,dT_levp1/2),StateList,OldStateList,SynchList_cor); - RestrictProlong(lev + 1, 0, fgt(PhysTime - dT_lev, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - RestrictProlong(lev + 1, 1, fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), StateList, OldStateList, SynchList_cor); - } - } - - // if(myrank==GH->start_rank[lev]) cout<mylev<<", "<Commlev[lev],GH->start_rank[lev],a_stream.str()); - - RestrictProlong(lev, YN, fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), StateList, OldStateList, SynchList_cor); - // RestrictProlong(lev,YN,false,StateList,OldStateList,SynchList_cor); - -// if(myrank==GH->start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],a_stream.str()); -#endif - - // Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT_lev); - - { - MPI_Status status; - // receive - if (lev < GH->levels - 1) - { - if (myrank == GH->start_rank[lev]) - { - MPI_Recv(tporgo, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev + 1], 1, MPI_COMM_WORLD, &status); - // cout<Commlev[lev]); - - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - Porg0[i][j] = tporg[3 * i + j]; - - // if(myrank==GH->start_rank[lev]) cout< 0 && YN == 1 && myrank == GH->start_rank[lev]) - { - for (int i = 0; i < BH_num; i++) - for (int j = 0; j < 3; j++) - tporg[3 * i + j] = Porg0[i][j]; - - MPI_Send(tporg, 3 * BH_num, MPI_DOUBLE, GH->start_rank[lev - 1], 1, MPI_COMM_WORLD); - } - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - } -#if (REGLEV == 0) - // for higher level - if (lev < GH->levels - 1) - { - if (lev + 1 >= GH->movls) - { - // GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0, - GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor); - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for higher level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - - // for this level - if (YN == 1) - { - GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor); - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - - // for lower level - if (lev - 1 >= GH->movls) - { - if (lev - 1 <= trfls) - { - if (YN == 1) - { - // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor); - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for lower level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - else - { - if (i % 4 == 3) - { - // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, - GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, - SynchList_cor, OldStateList, StateList, SynchList_pre, - fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor); - - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Regrid_Onelevel_aux for lower level"; - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],a_stream.str()); - } - } - } -#endif - } - -#ifdef WithShell - SHStep(); - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - -#if (RPS == 1) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(StateList, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - // a_stream.clear(); - // a_stream.str(""); - // a_stream<Commlev[lev],GH->start_rank[lev],a_stream.str()); - } -#endif - -#endif - -#if 0 - if(lev>0) Parallel::Restrict_after(GH->PatL[lev-1],GH->PatL[lev],StateList,StateList,Symmetry); -#endif - - delete[] tporg; - delete[] tporgo; -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// This member function configures the single-step time evolution for each grid level -// during the time evolution process. -// For the case PSTR == 0 - -//================================================================================================ - -#if (PSTR == 0) -#if 1 -void bssn_class::Step(int lev, int YN) -{ - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - -// new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) -#warning "shell part still bam type" - if (lev == 0) // Shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, pre); -#endif - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, cor)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) - if (lev == 1) // shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); -#endif - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, cor)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } - } -#if (RPS == 0) - // mesh refinement boundary part - RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets up the single-step time evolution for each grid level (alternate version) - -//================================================================================================ - -#else // #if 1 (comment may be incorrect; should be #if 0) -// ICN for bam comparison -void bssn_class::Step(int lev, int YN) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } - // corrector - for (iter_count = 1; iter_count < 3; iter_count++) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, cor)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif - f_icn_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, cor)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } - } - } -#if (RPS == 0) - // mesh refinement boundary part - RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets up the single-step time evolution for each grid level -// For the case PSTR == 1 - -//================================================================================================ - -#elif (PSTR == 1) -void bssn_class::Step(int lev, int YN) -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - -// new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif //(MAPBH == 1) - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) -#warning "shell part still bam type" - if (lev == 0) // Shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], - varl0->data->SoA, - Symmetry, pre); -#endif - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation"); - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime - << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector"); - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"head of Corrector"); - - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn], - cg->fgfs[Gmx->sgfn], cg->fgfs[Gmy->sgfn], cg->fgfs[Gmz->sgfn], - cg->fgfs[Lap->sgfn], - cg->fgfs[Sfx->sgfn], cg->fgfs[Sfy->sgfn], cg->fgfs[Sfz->sgfn], - cg->fgfs[dtSfx->sgfn], cg->fgfs[dtSfy->sgfn], cg->fgfs[dtSfz->sgfn], - cg->fgfs[phi1->sgfn], cg->fgfs[trK1->sgfn], - cg->fgfs[gxx1->sgfn], cg->fgfs[gxy1->sgfn], cg->fgfs[gxz1->sgfn], - cg->fgfs[gyy1->sgfn], cg->fgfs[gyz1->sgfn], cg->fgfs[gzz1->sgfn], - cg->fgfs[Axx1->sgfn], cg->fgfs[Axy1->sgfn], cg->fgfs[Axz1->sgfn], - cg->fgfs[Ayy1->sgfn], cg->fgfs[Ayz1->sgfn], cg->fgfs[Azz1->sgfn], - cg->fgfs[Gmx1->sgfn], cg->fgfs[Gmy1->sgfn], cg->fgfs[Gmz1->sgfn], - cg->fgfs[Lap1->sgfn], - cg->fgfs[Sfx1->sgfn], cg->fgfs[Sfy1->sgfn], cg->fgfs[Sfz1->sgfn], - cg->fgfs[dtSfx1->sgfn], cg->fgfs[dtSfy1->sgfn], cg->fgfs[dtSfz1->sgfn], - cg->fgfs[rho->sgfn], - cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, cor)) - { - cout << "find NaN in domain: (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) - if (lev == 1) // shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], - Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, - cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], - varl0->data->SoA, - Symmetry, cor); -#endif - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check"); - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] - << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector of black hole position"); -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after pre cor swap"); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"tail of corrector"); - } -#if (RPS == 0) - // mesh refinement boundary part - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before RestrictProlong"); - RestrictProlong(lev, YN, BB); -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - // if(myrank==GH->start_rank[lev]) - // cout<start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],"complet GH Step"); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets up the single-step time evolution for the spherical shell -// grid part during the time evolution process - -//================================================================================================ - -#ifdef WithShell -void bssn_class::SHStep() -{ - int lev = 0; - // #if (PSTR == 1) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - // #endif - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - - // #if (PSTR == 1) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - // #endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - -#ifdef USE_GPU - if (use_gpu == 1) - { - - if (gpu_rhs_ss(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME)) - - { - - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - } - } - else - { - if (f_compute_rhs_bssn_ss(RHS_PARA_CALLED_FIRST_TIME)) - - { - - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - } - } - -#else - if (f_compute_rhs_bssn_ss(RHS_PARA_CALLED_FIRST_TIME)) - - { - - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - } -#endif // USE_GPU - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - -#if (PSTR == 1) -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check"); -#endif - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - -#ifdef USE_GPU - if (use_gpu == 1) - { - - if(gpu_rhs_ss(CALLED_BY_STEP,myrank,RHS_PARA_CALLED_THEN) - - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - - } - } - else - { - if (f_compute_rhs_bssn_ss(RHS_PARA_CALLED_THEN)) - - { - - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - } - } - -#else - if (f_compute_rhs_bssn_ss(RHS_PARA_CALLED_THEN)) - - { - - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" - << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - - ERROR = 1; - } -#endif // USE_GPU - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; - // we do not check the correspondence here - - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], - sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], - varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, - cg->fgfs[varl0->data->sgfn], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count - << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " - << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } - - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#if (RPS == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) - << " seconds!" << endl; - } - } -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -} -#endif -#endif // withshell - -//================================================================================================ - - - -//================================================================================================ - -// 0: do not use mixing two levels data for OutBD; 1: do use - -#define MIXOUTB 0 -void bssn_class::RestrictProlong(int lev, int YN, bool BB, - MyList *SL, MyList *OL, MyList *corL) -// we assume -// StateList 1 ----------- -// -// OldStateList 0 ----------- -// -// SynchList_cor old ----------- -{ -#if (PSTR == 1) -// stringstream a_stream; -// a_stream.setf(ios::left); -#endif - - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, SL, OL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - -#if (PSTR == 1) -// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); -#endif - Pp = Pp->next; - } - -#if (PSTR == 1) -// Pp=GH->PatL[lev]; -// while(Pp) -// { -// Pp->data->checkPatch(0,GH->start_rank[GH->mylev]); -// Pp=Pp->next; -// } - -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 before Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); -#endif - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 0 after OutBdLow2Hi"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } - else // no time refinement levels and for all same time levels - { - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 before Restrict"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); -#endif - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 before Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - - Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry); - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); -#endif - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": 1 after OutBdLow2Hi"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } - - Parallel::Sync(GH->PatL[lev], SL, Symmetry); - -#if (PSTR == 1) -// a_stream.clear(); -// a_stream.str(""); -// a_stream<mylev<<": after Sync"; -// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); -#endif - } -} - -//================================================================================================ - - - -//================================================================================================ - -// auxiliary operation, input lev means original lev-1 - -void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB, - MyList *SL, MyList *OL, MyList *corL) -// we assume -// StateList 1 ----------- -// -// OldStateList 0 ----------- -// -// SynchList_cor old ----------- -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux"); - - if (lev >= GH->levels - 1) - return; - lev = lev + 1; - - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, SL, OL, corL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, SL, OL, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); -#endif - - Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry); - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry); -#endif - } - - Parallel::Sync(GH->PatL[lev], SL, Symmetry); - } -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::RestrictProlong(int lev, int YN, bool BB) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - // we assume for fine - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // for coarse - // StateList 1 ----------- - // - // OldStateList 0 ----------- - // - // SynchList_cor old ----------- - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - if (myrank == 0) - cout << "/=: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); -#endif - - Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry); - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { - if (myrank == 0) - cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); -#elif (RPB == 1) - // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); -#endif - - Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry); - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - } -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::ProlongRestrict(int lev, int YN, bool BB) -{ - if (lev > 0) - { - MyList *Pp, *Ppc; - if (lev > trfls && YN == 0) // time refinement levels and for intermediat time level - { - Pp = GH->PatL[lev - 1]; - while (Pp) - { - if (BB) - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, SynchList_cor, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - else - Parallel::prepare_inter_time_level(Pp->data, StateList, OldStateList, - SynchList_pre, 0); // use SynchList_pre as temporal storage space - Pp = Pp->next; - } - -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - } - else // no time refinement levels and for all same time levels - { -#if (RPB == 0) - Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { -#if (MIXOUTB == 0) - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); -#elif (MIXOUTB == 1) - Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry); -#endif - Pp = Pp->next; - } - Ppc = Ppc->next; - } -#elif (RPB == 1) - // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); - Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); -#endif - -#if 0 -#if (RPB == 0) - Parallel::Restrict(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); -#elif (RPB == 1) -// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); - Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,GH->rsul[lev],Symmetry); -#endif -#else - Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); -#endif - Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry); - } - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - } -} -#undef MIXOUTB - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes the gravitational radiation scalar Psi4 - -//================================================================================================ - -void bssn_class::Compute_Psi4(int lev) -{ - MyList *DG_List = new MyList(Rpsi4); - DG_List->insert(Ipsi4); - -#if 0 // test showes this operation does not help -for(int ilev = GH->levels-1;ilev>=lev;ilev--) -{ - MyList *Pp=GH->PatL[ilev]; -#else - MyList *Pp = GH->PatL[lev]; -#endif - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (Psi4type == 0) - if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation - f_ricci_gamma(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - Symmetry); - // the input arguments Gamma^i_jk and R_ij do not need synch, because we do not need to derivate them - f_getnp4(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry); -#elif (Psi4type == 1) - f_getnp4old(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry); -#else -#error "not recognized Psi4type" -#endif - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#if 0 - Parallel::Sync(GH->PatL[ilev],DG_List,Symmetry); -} -// because of double level data change, you can not do this in above loop -// prolong restrict Psi4 -for(int ilev=GH->levels-1;ilev>lev;ilev--) - RestrictProlong(ilev,1,false,DG_List,DG_List,DG_List); -#else - Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); -#endif - -#ifdef WithShell - // ShellPatch part - if (lev == 0) - { - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - int fngfs = Pp->data->fngfs; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { -#if (Psi4type == 0) - if (0) // if Gamma^i_jk and R_ij can be reused from the rhs calculation - f_ricci_gamma_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - Symmetry, lev, Pp->data->sst); - - f_getnp4_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry, Pp->data->sst); -#elif (Psi4type == 1) - f_getnp4old_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[Rpsi4->sgfn], cg->fgfs[Ipsi4->sgfn], - Symmetry, Pp->data->sst); -#else -#error "not recognized Psi4type" -#endif - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - - SH->Synch(DG_List, Symmetry); -#if 0 -// interpolate Psi4 - SH->CS_Inter(DG_List,Symmetry); -#endif - } -#endif - - DG_List->clearList(); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end of Compute_Psi4"); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function sets the puncture positions of black holes at the initial time - -//================================================================================================ - -void bssn_class::Setup_Black_Hole_position() -{ - char filename[50]; - strcpy(filename, "input.par"); - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename - << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && skey == "BH_num") - { - BH_num_input = BH_num = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - // set up the data for black holes - // these arrays will be deleted when bssn_class is deleted - Pmom = new double[3 * BH_num]; - Spin = new double[3 * BH_num]; - Mass = new double[BH_num]; - Porg0 = new double *[BH_num]; - Porgbr = new double *[BH_num]; - Porg = new double *[BH_num]; - Porg1 = new double *[BH_num]; - Porg_rhs = new double *[BH_num]; - for (int i = 0; i < BH_num; i++) - { - Porg0[i] = new double[3]; - Porgbr[i] = new double[3]; - Porg[i] = new double[3]; - Porg1[i] = new double[3]; - Porg_rhs[i] = new double[3]; - } - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind); - if (status == -1) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "error reading parameter file " << filename - << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "BSSN" && sind < BH_num) - { - if (skey == "Mass") - Mass[sind] = atof(sval.c_str()); - else if (skey == "Porgx") - Porg0[sind][0] = atof(sval.c_str()); - else if (skey == "Porgy") - Porg0[sind][1] = atof(sval.c_str()); - else if (skey == "Porgz") - Porg0[sind][2] = atof(sval.c_str()); - else if (skey == "Spinx") - Spin[sind * 3] = atof(sval.c_str()); - else if (skey == "Spiny") - Spin[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Spinz") - Spin[sind * 3 + 2] = atof(sval.c_str()); - else if (skey == "Pmomx") - Pmom[sind * 3] = atof(sval.c_str()); - else if (skey == "Pmomy") - Pmom[sind * 3 + 1] = atof(sval.c_str()); - else if (skey == "Pmomz") - Pmom[sind * 3 + 2] = atof(sval.c_str()); - } - } - inf.close(); - } - // echo information of Black holes - if (myrank == 0) - { - cout << "initial information of " << BH_num << " Black Hole(s)" << endl; - cout << setw(16) << "Mass" - << setw(16) << "x" - << setw(16) << "y" - << setw(16) << "z" - << setw(16) << "Px" - << setw(16) << "Py" - << setw(16) << "Pz" - << setw(16) << "Sx" - << setw(16) << "Sy" - << setw(16) << "Sz" << endl; - for (int i = 0; i < BH_num; i++) - { - cout << setw(16) << Mass[i] - << setw(16) << Porg0[i][0] - << setw(16) << Porg0[i][1] - << setw(16) << Porg0[i][2] - << setw(16) << Pmom[i * 3] - << setw(16) << Pmom[i * 3 + 1] - << setw(16) << Pmom[i * 3 + 2] - << setw(16) << Spin[i * 3] - << setw(16) << Spin[i * 3 + 1] - << setw(16) << Spin[i * 3 + 2] << endl; - } - } - - int maxl = 1; - int levels; - int *grids; - double bbox[6]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "levels") - { - levels = atoi(sval.c_str()); - break; - } - } - inf.close(); - } - grids = new int[levels]; - // read parameter from file - { - const int LEN = 256; - char pline[LEN]; - string str, sgrp, skey, sval; - int sind1, sind2, sind3; - ifstream inf(filename, ifstream::in); - if (!inf.good() && myrank == 0) - { - cout << "bssn_class::Setup_Black_Hole_position: Can not open parameter file " << filename - << " for inputing information of black holes" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - - for (int i = 1; inf.good(); i++) - { - inf.getline(pline, LEN); - str = pline; - - int status = misc::parse_parts(str, sgrp, skey, sval, sind1, sind2, sind3); - if (status == -1) - { - cout << "error reading parameter file " << filename << " in line " << i << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - else if (status == 0) - continue; - - if (sgrp == "cgh" && skey == "grids" && sind1 < levels) - grids[sind1] = atoi(sval.c_str()); - if (sgrp == "cgh" && skey == "bbox" && sind1 == 0 && sind2 == 0) - bbox[sind3] = atof(sval.c_str()); - } - inf.close(); - } - for (int i = 0; i < levels; i++) - if (maxl < grids[i]) - maxl = grids[i]; - - delete[] grids; - - if (BH_num > maxl) - { - int BH_numc = BH_num; - for (int i = 0; i < BH_num; i++) - if (Porg0[i][0] < bbox[0] || Porg0[i][0] > bbox[3] || - Porg0[i][1] < bbox[1] || Porg0[i][1] > bbox[4] || - Porg0[i][2] < bbox[2] || Porg0[i][2] > bbox[5]) - { - delete[] Porg0[i]; - Porg0[i] = 0; - BH_numc--; - } - - if (BH_num > BH_numc) - { - maxl = BH_numc; - int bhi; - double *tmp; - - tmp = Pmom; - Pmom = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - for (int j = 0; j < 3; j++) - Pmom[3 * bhi + j] = tmp[3 * i + j]; - bhi++; - } - delete[] tmp; - - tmp = Spin; - Spin = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - for (int j = 0; j < 3; j++) - Spin[3 * bhi + j] = tmp[3 * i + j]; - bhi++; - } - delete[] tmp; - - tmp = Mass; - Mass = new double[3 * maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (Porg0[i]) - { - Mass[bhi] = tmp[i]; - bhi++; - } - delete[] tmp; - - double **ttmp; - ttmp = Porg0; - Porg0 = new double *[maxl]; - bhi = 0; - for (int i = 0; i < BH_num; i++) - if (ttmp[i]) - { - Porg0[bhi] = ttmp[i]; - bhi++; - } - delete[] ttmp; - - for (int i = 0; i < BH_num; i++) - { - delete[] Porgbr[i]; - delete[] Porg[i]; - delete[] Porg1[i]; - delete[] Porg_rhs[i]; - } - delete[] Porgbr; - delete[] Porg; - delete[] Porg1; - delete[] Porg_rhs; - - BH_num = maxl; - - Porgbr = new double *[BH_num]; - Porg = new double *[BH_num]; - Porg1 = new double *[BH_num]; - Porg_rhs = new double *[BH_num]; - - for (int i = 0; i < BH_num; i++) - { - Porgbr[i] = new double[3]; - Porg[i] = new double[3]; - Porg1[i] = new double[3]; - Porg_rhs[i] = new double[3]; - } - } - } - - for (int i = 0; i < BH_num; i++) - { - for (int j = 0; j < dim; j++) - Porgbr[i][j] = Porg0[i][j]; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes black hole positions - -//================================================================================================ - -#if 0 -// old code - -void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *fory,var *forz,int lev) -{ - const int InList = 3; - - MyList * DG_List=new MyList(forx); - DG_List->insert(fory); DG_List->insert(forz); - - int n; - double *x1,*y1,*z1; - double *shellf; - shellf=new double[3*BH_num]; - double *pox[3]; - for(int i=0;i<3;i++) pox[i] = new double[BH_num]; - for( n = 0; n < BH_num; n++) - { - pox[0][n] = BH_PS[n][0]; - pox[1][n] = BH_PS[n][1]; - pox[2][n] = BH_PS[n][2]; - } - - if(!Parallel::PatList_Interp_Points(GH->PatL[lev],DG_List,BH_num,pox,shellf,Symmetry)) - { - ErrorMonitor->outfile<<"fail to find black holes at t = "<outfile<<"(x,y,z) = ("<clearList(); - delete[] shellf; - for(int i=0;i<3;i++) delete[] pox[i]; -} - -#else - -// new code considering diferent levels for different black hole - -void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev) -{ - const int InList = 3; - - MyList *DG_List = new MyList(forx); - DG_List->insert(fory); - DG_List->insert(forz); - - double *x1, *y1, *z1; - double *shellf; - shellf = new double[3]; - double *pox[3]; - for (int i = 0; i < 3; i++) - pox[i] = new double[1]; - - for (int n = 0; n < BH_num; n++) - { - pox[0][0] = BH_PS[n][0]; - pox[1][0] = BH_PS[n][1]; - pox[2][0] = BH_PS[n][2]; - - int lev = ilev; - -#if (PSTR == 0) - while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry)) -#elif (PSTR == 1) - while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev])) -#endif - { - lev--; - if (lev < 0) - { - ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl; - for (n = 0; n < BH_num; n++) - ErrorMonitor->outfile << "(x,y,z) = (" << pox[0][n] << "," << pox[1][n] << "," << pox[2][n] << ")" << endl; - break; - } - } - - if (lev >= 0) - { - BH_RHS[n][0] = -shellf[0]; - BH_RHS[n][1] = -shellf[1]; - BH_RHS[n][2] = -shellf[2]; - } - } - - DG_List->clearList(); - delete[] shellf; - for (int i = 0; i < 3; i++) - delete[] pox[i]; -} -#endif - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes gravitational-wave related data - -//================================================================================================ - -void bssn_class::AnalysisStuff(int lev, double dT_lev) -{ - LastAnas += dT_lev; - - if (LastAnas >= AnasTime) - { -#ifdef Point_Psi4 -#error "not support parallel levels yet" - // Gam_ijk and R_ij have been calculated in Interp_Constraint() - double SYM = 1, ANT = -1; - for (int levh = lev; levh < GH->levels; levh++) - { - MyList *Pp = GH->PatL[levh]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[trK0->sgfn], - cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axx0->sgfn], - cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axy0->sgfn], - cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, ANT, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Axz0->sgfn], - cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, SYM, ANT, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Ayy0->sgfn], - cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Ayz0->sgfn], - cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, ANT, ANT, Symmetry, levh); - f_fderivs(cg->shape, cg->fgfs[Azz0->sgfn], - cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, levh); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#ifdef WithShell - // ShellPatch part - if (lev == 0) - { - MyList *Pp = SH->PatL; - while (Pp) - { - MyList *BL = Pp->data->blb; - int fngfs = Pp->data->fngfs; - while (BL) - { - Block *cg = BL->data; - if (myrank == cg->rank) - { - f_fderivs_shc(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[phix->sgfn], cg->fgfs[phiy->sgfn], cg->fgfs[phiz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - phi0->SoA[0], phi0->SoA[1], phi0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[trK0->sgfn], - cg->fgfs[trKx->sgfn], cg->fgfs[trKy->sgfn], cg->fgfs[trKz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - trK0->SoA[0], trK0->SoA[1], trK0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axx0->sgfn], - cg->fgfs[Axxx->sgfn], cg->fgfs[Axxy->sgfn], cg->fgfs[Axxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axx0->SoA[0], Axx0->SoA[1], Axx0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axy0->sgfn], - cg->fgfs[Axyx->sgfn], cg->fgfs[Axyy->sgfn], cg->fgfs[Axyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axy0->SoA[0], Axy0->SoA[1], Axy0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Axz0->sgfn], - cg->fgfs[Axzx->sgfn], cg->fgfs[Axzy->sgfn], cg->fgfs[Axzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Axz0->SoA[0], Axz0->SoA[1], Axz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Ayy0->sgfn], - cg->fgfs[Ayyx->sgfn], cg->fgfs[Ayyy->sgfn], cg->fgfs[Ayyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Ayy0->SoA[0], Ayy0->SoA[1], Ayy0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Ayz0->sgfn], - cg->fgfs[Ayzx->sgfn], cg->fgfs[Ayzy->sgfn], cg->fgfs[Ayzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Ayz0->SoA[0], Ayz0->SoA[1], Ayz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - f_fderivs_shc(cg->shape, cg->fgfs[Azz0->sgfn], - cg->fgfs[Azzx->sgfn], cg->fgfs[Azzy->sgfn], cg->fgfs[Azzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - Azz0->SoA[0], Azz0->SoA[1], Azz0->SoA[2], - Symmetry, levh, Pp->data->sst, - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz]); - } - if (BL == Pp->data->ble) - break; - BL = BL->next; - } - Pp = Pp->next; - } - } -#endif - } -#else - Compute_Psi4(lev); -#endif - double *RP, *IP, *RoutMAP; - int NN = 0; - for (int pl = 2; pl < maxl + 1; pl++) - for (int pm = -pl; pm < pl + 1; pm++) - NN++; - RP = new double[NN]; - IP = new double[NN]; - RoutMAP = new double[7]; - double Rex = maxrex; - for (int i = 0; i < decn; i++) - { -#ifdef Point_Psi4 - Waveshell->surf_Wave(Rex, GH, SH, - phi, trK, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - phix, phiy, phiz, - trKx, trKy, trKz, - Axxx, Axxy, Axxz, - Axyx, Axyy, Axyz, - Axzx, Axzy, Axzz, - Ayyx, Ayyy, Ayyz, - Ayzx, Ayzy, Ayzz, - Azzx, Azzy, Azzz, - Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, - Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, - Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, - Rxx, Rxy, Rxz, Ryy, Ryz, Rzz, - 2, maxl, NN, RP, IP, ErrorMonitor); -#ifdef WithShell - if (lev > 0 || Rex < GH->bbox[0][0][3]) - { - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } - else - { - Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } -#else - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); -#endif -#else -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral"); -#ifdef WithShell - if (lev > 0 || Rex < GH->bbox[0][0][3]) - { - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } - else - { - Waveshell->surf_Wave(Rex, lev, SH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); - } -#else -#if (PSTR == 0) - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor); -#elif (PSTR == 1) - Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]); - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave"); - Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0, - gxx0, gxy0, gxz0, gyy0, gyz0, gzz0, - Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0, - Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables - RoutMAP, ErrorMonitor, GH->Commlev[lev]); -#endif -#endif -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral"); -#endif - if (i == 0) - { - ADMMass = RoutMAP[0]; - } -#if (PSTR == 1) - if (GH->start_rank[a_lev] > 0) - { - MPI_Status status; - // receive - if (myrank == 0) - { - MPI_Recv(RP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 1, MPI_COMM_WORLD, &status); - MPI_Recv(IP, NN, MPI_DOUBLE, GH->start_rank[a_lev], 2, MPI_COMM_WORLD, &status); - MPI_Recv(RoutMAP, 7, MPI_DOUBLE, GH->start_rank[a_lev], 3, MPI_COMM_WORLD, &status); - } - // send - if (myrank == GH->start_rank[a_lev]) - { - MPI_Send(RP, NN, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); - MPI_Send(IP, NN, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD); - MPI_Send(RoutMAP, 7, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD); - } - } -#endif - Psi4Monitor->writefile(PhysTime, NN, RP, IP); - MAPMonitor->writefile(PhysTime, 7, RoutMAP); - Rex = Rex - drex; - } - delete[] RP; - delete[] IP; - delete[] RoutMAP; - - // black hole's position - { - double *pox; - pox = new double[dim * BH_num]; - for (int bhi = 0; bhi < BH_num; bhi++) - for (int i = 0; i < dim; i++) - pox[dim * bhi + i] = Porg0[bhi][i]; - BHMonitor->writefile(PhysTime, dim * BH_num, pox); - delete[] pox; - } - - LastAnas = 0; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes and outputs constraint violations - -//================================================================================================ - -void bssn_class::Constraint_Out() -{ - LastConsOut += dT * pow(0.5, Mymax(0, trfls)); - - if (LastConsOut >= AnasTime) - // Constraint violation - { - // recompute least the constraint data lost for moved new grid - for (int lev = 0; lev < GH->levels; lev++) - { - // make sure the data consistent for higher levels - if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation - { - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -// added by yangquan -#ifdef USE_GPU - if (use_gpu == 1) - gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Constraint_Out); - - else - f_compute_rhs_bssn(RHS_PARA_CALLED_Constraint_Out); -#else - f_compute_rhs_bssn(RHS_PARA_CALLED_Constraint_Out); -#endif - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } -#ifdef WithShell - if (0) // if the constrait quantities can be reused from the step rhs calculation - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - double TRK4 = PhysTime; - int pre = 0; - int lev = 0; - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#ifdef USE_GPU - if (use_gpu == 1) - - gpu_rhs_ss(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Constraint_Out_SS); - else - f_compute_rhs_bssn_ss(RHS_PARA_CALLED_Constraint_Out_SS); -#else - f_compute_rhs_bssn_ss(RHS_PARA_CALLED_Constraint_Out_SS); - -#endif // USE_GPU - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); -#endif - - double ConV[7]; -#if (PSTR == 1) - double ConV_h[7]; -#endif - -#ifdef WithShell - ConV[0] = SH->L2Norm(Cons_Ham); - ConV[1] = SH->L2Norm(Cons_Px); - ConV[2] = SH->L2Norm(Cons_Py); - ConV[3] = SH->L2Norm(Cons_Pz); - ConV[4] = SH->L2Norm(Cons_Gx); - ConV[5] = SH->L2Norm(Cons_Gy); - ConV[6] = SH->L2Norm(Cons_Gz); - ConVMonitor->writefile(PhysTime, 7, ConV); -#endif - for (int levi = 0; levi < GH->levels; levi++) - { -#if (PSTR == 0) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz); -#elif (PSTR == 1) - ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham, GH->Commlev[levi]); - ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px, GH->Commlev[levi]); - ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py, GH->Commlev[levi]); - ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz, GH->Commlev[levi]); - ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx, GH->Commlev[levi]); - ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy, GH->Commlev[levi]); - ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz, GH->Commlev[levi]); - // misc::tillherecheck("before collect data to cpu0"); - // MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different - if (levi > 0) - { - if (GH->mylev == levi && myrank == GH->start_rank[levi]) - for (int i = 0; i < 7; i++) - ConV_h[i] = ConV[i]; - else - for (int i = 0; i < 7; i++) - ConV_h[i] = 0; - MPI_Allreduce(ConV_h, ConV, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - } -#endif - ConVMonitor->writefile(PhysTime, 7, ConV); - /* - if(fabs(ConV[0])<0.00001) - { - MyList * DG_List=new MyList(Cons_Ham); - DG_List->insert(Cons_Px); DG_List->insert(Cons_Py); DG_List->insert(Cons_Px); - DG_List->insert(Cons_Gx); DG_List->insert(Cons_Gy); DG_List->insert(Cons_Gx); - Parallel::Dump_Data(GH->PatL[levi],DG_List,"jiu",0,1); - DG_List->clearList(); - if(myrank==0) MPI_Abort(MPI_COMM_WORLD,1); - } - */ - } - - Interp_Constraint(false); - - LastConsOut = 0; - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes derivatives required by the apparent-horizon routines - -//================================================================================================ - -#ifdef With_AHF -void bssn_class::AH_Prepare_derivatives() -{ - double SYM = 1.0, ANT = -1.0; - int ZEO = 0; - - for (int lev = 0; lev < GH->levels; lev++) - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_fderivs(cg->shape, cg->fgfs[phi0->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxx0->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamzxx->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxy0->sgfn], - cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamzxy->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, ANT, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gxz0->sgfn], - cg->fgfs[Gamxxz->sgfn], cg->fgfs[Gamyxz->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - ANT, SYM, ANT, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gyy0->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamzyy->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gyz0->sgfn], - cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamzyz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, ANT, ANT, Symmetry, ZEO); - f_fderivs(cg->shape, cg->fgfs[gzz0->sgfn], - cg->fgfs[Gamxzz->sgfn], cg->fgfs[Gamyzz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->X[0], cg->X[1], cg->X[2], - SYM, SYM, SYM, Symmetry, ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - Parallel::Sync(GH->PatL[lev], AHDList, Symmetry); - } -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function interpolates apparent-horizon data - -//================================================================================================ - -bool bssn_class::AH_Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetryi) -{ - MyList *varl; - int num_var = 0; - varl = VarList; - while (varl) - { - num_var++; - varl = varl->next; - } - - double pox[3]; - for (int i = 0; i < NN; i++) - { - for (int j = 0; j < 3; j++) - pox[j] = XX[j][i]; - int lev = GH->levels - 1; - bool notfound = true; - - while (notfound) - { - if (lev < 0) - { -#ifdef WithShell - if (SH->Interp_One_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) - { - return true; - } - if (myrank == 0) - cout << "bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh and shell domain!" << endl; -#else - if (myrank == 0) - cout << "bssn_class::AH_Interp_Points: point (" - << pox[0] << "," << pox[1] << "," << pox[2] - << ") is out of cgh domain!" << endl; -#endif - return false; - } - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - if (Pp->data->Interp_ONE_Point(VarList, pox, Shellf + i * num_var, Symmetryi)) - { - notfound = false; - break; - } - Pp = Pp->next; - } - lev--; - } - } - return true; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes the apparent horizon at the current iteration step - -//================================================================================================ - -void bssn_class::AH_Step_Find(int lev, double dT_lev) -{ - if ((lev == GH->levels - 1)) - { - int ncount = int(PhysTime / dT_lev); - bool tf = false; - for (int ihn = 0; ihn < HN_num; ihn++) - { - if (ncount % findeveryl[ihn] == 0) - { - tf = true; - break; - } - } - if (tf) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - prev_clock = clock(); - const int cdumpid = int(PhysTime / AHdumptime) + 1; - for (int ihn = 0; ihn < HN_num; ihn++) - dumpid[ihn] = cdumpid; - - double gam; - for (int ihn = 0; ihn < BH_num; ihn++) - { - xc[ihn] = Porg0[ihn][0]; - yc[ihn] = Porg0[ihn][1]; - zc[ihn] = Porg0[ihn][2]; - gam = fabs(Pmom[ihn * 3]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - xr[ihn] = Mass[ihn] * gam; - gam = fabs(Pmom[ihn * 3 + 1]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - yr[ihn] = Mass[ihn] * gam; - gam = fabs(Pmom[ihn * 3 + 2]) / (Mass[ihn]); - gam = sqrt(1 - gam * gam); - zr[ihn] = Mass[ihn] * gam; - dTT[ihn] = -1; - - if (ncount % findeveryl[ihn] == 0) - { - trigger[ihn] = true; - dTT[ihn] = findeveryl[ihn] * dT_lev; - } - else - trigger[ihn] = false; - if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) - lastahdumpid[ihn] = dumpid[ihn]; - else - dumpid[ihn] = 0; - } - int ihn = BH_num; - for (int ia = 0; ia < BH_num; ia++) - for (int ib = ia + 1; ib < BH_num; ib++) - { - xc[ihn] = (Porg0[ia][0] + Porg0[ib][0]) / 2; - yc[ihn] = (Porg0[ia][1] + Porg0[ib][1]) / 2; - zc[ihn] = (Porg0[ia][2] + Porg0[ib][2]) / 2; - - xr[ihn] = yr[ihn] = zr[ihn] = Mass[ia] + Mass[ib]; - - dTT[ihn] = -1; - - if (fabs(Porg0[ia][0] - Porg0[ib][0]) < 2 * xr[ihn] && - fabs(Porg0[ia][1] - Porg0[ib][1]) < 2 * xr[ihn] && - fabs(Porg0[ia][2] - Porg0[ib][2]) < 2 * xr[ihn] && - (ncount % findeveryl[ihn] == 0)) - { - trigger[ihn] = true; - dTT[ihn] = findeveryl[ihn] * dT_lev; - } - else - trigger[ihn] = false; - - if (trigger[ihn] && (dumpid[ihn] > lastahdumpid[ihn])) - lastahdumpid[ihn] = dumpid[ihn]; - else - dumpid[ihn] = 0; - - ihn++; - } -#if (ABEtype == 1) - if (PhysTime > 10) - { - ihn--; - trigger[ihn] = true; - xr[ihn] = yr[ihn] = zr[ihn] = 50; - // if(myrank==0) for(ihn=0;ihn 0) - return; - - // recompute least the constraint data lost for moved new grid - for (int lev = 0; lev < GH->levels; lev++) - { - // make sure the data consistent for higher levels - if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation - { - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -// added by yangquan -#ifdef USE_GPU - if (use_gpu == 1) - gpu_rhs(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Interp_Constraint); - else - f_compute_rhs_bssn(RHS_PARA_CALLED_Interp_Constraint); -#else - f_compute_rhs_bssn(RHS_PARA_CALLED_Interp_Constraint); -#endif - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } -#ifdef WithShell - if (0) // if the constrait quantities can be reused from the step rhs calculation - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - double TRK4 = PhysTime; - int pre = 0; - int lev = 0; - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#ifdef USE_GPU - if (use_gpu == 1) - - gpu_rhs_ss(CALLED_BY_CONSTRAINT, myrank, RHS_PARA_CALLED_Intrp_Constraint_Out_SS); - else - f_compute_rhs_bssn_ss(RHS_PARA_CALLED_Intrp_Constraint_Out_SS); -#else - f_compute_rhs_bssn_ss(RHS_PARA_CALLED_Intrp_Constraint_Out_SS); - -#endif // USE_GPU - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); -#endif - } - // interpolate - double *x1, *y1, *z1; - const int n = 1000; - double lmax, lmin, dd; - lmin = 0; -#ifdef WithShell - lmax = SH->Rrange[1]; -#else - lmax = GH->bbox[0][0][4]; -#endif -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - dd = (lmax - lmin) / (n - 1); -#else -#ifdef Cell - dd = (lmax - lmin) / n; -#else -#error Not define Vertex nor Cell -#endif -#endif - x1 = new double[n]; - y1 = new double[n]; - z1 = new double[n]; - for (int i = 0; i < n; i++) - { - x1[i] = 0; -#ifdef Vertex -#ifdef Cell -#error Both Cell and Vertex are defined -#endif - y1[i] = lmin + i * dd; -#else -#ifdef Cell - y1[i] = lmin + (i + 0.5) * dd; -#else -#error Not define Vertex nor Cell -#endif -#endif - z1[i] = 0; - } - - int InList = 0; - - MyList *varl = ConstraintList; - while (varl) - { - InList++; - varl = varl->next; - } - double *shellf; - shellf = new double[n * InList]; - for (int i = 0; i < n; i++) - { - double XX[3]; - XX[0] = x1[i]; - XX[1] = y1[i]; - XX[2] = z1[i]; - bool fg = GH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); -#ifdef WithShell - if (!fg) - fg = SH->Interp_One_Point(ConstraintList, XX, shellf + i * InList, Symmetry); -#endif - if (!fg && myrank == 0) - { - cout << "bssn_class::Interp_Constraint meets wrong" << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - if (myrank == 0) - { - ofstream outfile; - char filename[50]; - sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); - // 0.5 for round off - - outfile.open(filename); - outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; - for (int i = 0; i < n; i++) - { - outfile << setw(10) << setprecision(10) << y1[i]; - for (int j = 0; j < InList; j++) - outfile << " " << setw(16) << setprecision(15) << shellf[InList * i + j]; - outfile << endl; - } - outfile.close(); - } - - delete[] shellf; -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function computes constraint violations - -//================================================================================================ - -void bssn_class::Compute_Constraint() -{ - double TRK4 = PhysTime; - double ndeps = numepsb; - int pre = 0; - int lev; - - for (lev = 0; lev < GH->levels; lev++) - { - { - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, ndeps, pre); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - } - Parallel::Sync(GH->PatL[lev], ConstraintList, Symmetry); - } - // prolong restrict constraint quantities - for (lev = GH->levels - 1; lev > 0; lev--) - RestrictProlong(lev, 1, false, ConstraintList, ConstraintList, ConstraintList); - -#ifdef WithShell - lev = 0; - { - MyList *sPp; - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_compute_rhs_bssn_ss(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], - cg->fgfs[fngfs + ShellPatch::gx], - cg->fgfs[fngfs + ShellPatch::gy], - cg->fgfs[fngfs + ShellPatch::gz], - cg->fgfs[fngfs + ShellPatch::drhodx], - cg->fgfs[fngfs + ShellPatch::drhody], - cg->fgfs[fngfs + ShellPatch::drhodz], - cg->fgfs[fngfs + ShellPatch::dsigmadx], - cg->fgfs[fngfs + ShellPatch::dsigmady], - cg->fgfs[fngfs + ShellPatch::dsigmadz], - cg->fgfs[fngfs + ShellPatch::dRdx], - cg->fgfs[fngfs + ShellPatch::dRdy], - cg->fgfs[fngfs + ShellPatch::dRdz], - cg->fgfs[fngfs + ShellPatch::drhodxx], - cg->fgfs[fngfs + ShellPatch::drhodxy], - cg->fgfs[fngfs + ShellPatch::drhodxz], - cg->fgfs[fngfs + ShellPatch::drhodyy], - cg->fgfs[fngfs + ShellPatch::drhodyz], - cg->fgfs[fngfs + ShellPatch::drhodzz], - cg->fgfs[fngfs + ShellPatch::dsigmadxx], - cg->fgfs[fngfs + ShellPatch::dsigmadxy], - cg->fgfs[fngfs + ShellPatch::dsigmadxz], - cg->fgfs[fngfs + ShellPatch::dsigmadyy], - cg->fgfs[fngfs + ShellPatch::dsigmadyz], - cg->fgfs[fngfs + ShellPatch::dsigmadzz], - cg->fgfs[fngfs + ShellPatch::dRdxx], - cg->fgfs[fngfs + ShellPatch::dRdxy], - cg->fgfs[fngfs + ShellPatch::dRdxz], - cg->fgfs[fngfs + ShellPatch::dRdyy], - cg->fgfs[fngfs + ShellPatch::dRdyz], - cg->fgfs[fngfs + ShellPatch::dRdzz], - cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn], - cg->fgfs[Gmx0->sgfn], cg->fgfs[Gmy0->sgfn], cg->fgfs[Gmz0->sgfn], - cg->fgfs[Lap0->sgfn], - cg->fgfs[Sfx0->sgfn], cg->fgfs[Sfy0->sgfn], cg->fgfs[Sfz0->sgfn], - cg->fgfs[dtSfx0->sgfn], cg->fgfs[dtSfy0->sgfn], cg->fgfs[dtSfz0->sgfn], - cg->fgfs[phi_rhs->sgfn], cg->fgfs[trK_rhs->sgfn], - cg->fgfs[gxx_rhs->sgfn], cg->fgfs[gxy_rhs->sgfn], cg->fgfs[gxz_rhs->sgfn], - cg->fgfs[gyy_rhs->sgfn], cg->fgfs[gyz_rhs->sgfn], cg->fgfs[gzz_rhs->sgfn], - cg->fgfs[Axx_rhs->sgfn], cg->fgfs[Axy_rhs->sgfn], cg->fgfs[Axz_rhs->sgfn], - cg->fgfs[Ayy_rhs->sgfn], cg->fgfs[Ayz_rhs->sgfn], cg->fgfs[Azz_rhs->sgfn], - cg->fgfs[Gmx_rhs->sgfn], cg->fgfs[Gmy_rhs->sgfn], cg->fgfs[Gmz_rhs->sgfn], - cg->fgfs[Lap_rhs->sgfn], - cg->fgfs[Sfx_rhs->sgfn], cg->fgfs[Sfy_rhs->sgfn], cg->fgfs[Sfz_rhs->sgfn], - cg->fgfs[dtSfx_rhs->sgfn], cg->fgfs[dtSfy_rhs->sgfn], cg->fgfs[dtSfz_rhs->sgfn], - cg->fgfs[rho->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], - cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], - cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], - cg->fgfs[Gamxxx->sgfn], cg->fgfs[Gamxxy->sgfn], cg->fgfs[Gamxxz->sgfn], - cg->fgfs[Gamxyy->sgfn], cg->fgfs[Gamxyz->sgfn], cg->fgfs[Gamxzz->sgfn], - cg->fgfs[Gamyxx->sgfn], cg->fgfs[Gamyxy->sgfn], cg->fgfs[Gamyxz->sgfn], - cg->fgfs[Gamyyy->sgfn], cg->fgfs[Gamyyz->sgfn], cg->fgfs[Gamyzz->sgfn], - cg->fgfs[Gamzxx->sgfn], cg->fgfs[Gamzxy->sgfn], cg->fgfs[Gamzxz->sgfn], - cg->fgfs[Gamzyy->sgfn], cg->fgfs[Gamzyz->sgfn], cg->fgfs[Gamzzz->sgfn], - cg->fgfs[Rxx->sgfn], cg->fgfs[Rxy->sgfn], cg->fgfs[Rxz->sgfn], - cg->fgfs[Ryy->sgfn], cg->fgfs[Ryz->sgfn], cg->fgfs[Rzz->sgfn], - cg->fgfs[Cons_Ham->sgfn], - cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], - cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], - Symmetry, lev, numepsh, sPp->data->sst, pre); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - SH->Synch(ConstraintList, Symmetry); - // interpolate constraint quantities - SH->CS_Inter(ConstraintList, Symmetry); -#endif -} - - -void bssn_class::testRestrict() -{ - MyList *DG_List = new MyList(phi0); - int lev = 0; - double ZEO = 0, ONE = 1; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 1; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], DG_List, DG_List, Symmetry); - Parallel::Sync(GH->PatL[lev - 1], DG_List, Symmetry); - - Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); - - DG_List->clearList(); - exit(0); -} - -//================================================================================================ - - - -//================================================================================================ - -void bssn_class::testOutBd() -{ - MyList *DG_List = new MyList(phi0); - int lev = 1; - double ZEO = 0, ONE = 1; - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ZEO); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 0; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - f_set_value(cg->shape, cg->fgfs[phi0->sgfn], ONE); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - lev = 1; - MyList *Ppc = GH->PatL[lev - 1]; - while (Ppc) - { - Pp = GH->PatL[lev]; - while (Pp) - { - Parallel::OutBdLow2Hi(Ppc->data, Pp->data, DG_List, DG_List, Symmetry); - Pp = Pp->next; - } - Ppc = Ppc->next; - } - - Parallel::Sync(GH->PatL[lev], DG_List, Symmetry); - - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT); - Parallel::Dump_Data(GH->PatL[lev - 1], DG_List, 0, PhysTime, dT); - - DG_List->clearList(); - exit(0); -} - -//================================================================================================ - - - -//================================================================================================ - -// This member function enforces/checks the trace-free condition - -//================================================================================================ - -void bssn_class::Enforce_algcon(int lev, int fg) -{ - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - if (fg == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); - else - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - -#ifdef WithShell - if (lev == 0) - { - MyList *sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { - if (fg == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], - cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], - cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); - else - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], - cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], - cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif -} - -// added by yangquan -void bssn_class::Get_runtime_envirment() -{ - // get processor name - char pname[MPI_MAX_PROCESSOR_NAME]; - int resultlen = 0, pcode = 0; - MPI_Get_processor_name(pname, &resultlen); - cout << "MPI rank: " << myrank << "Processor name:" << pname << endl; - for (int i = 0; i < resultlen; ++i) - { - pcode += ((int)(pname[i]) - 65) * i; - } - - /*if(myrank % 2 == 0){ - - } */ -} - -//================================================================================================ - diff --git a/AMSS_NCKU_source/bssn_gpu_class.h b/AMSS_NCKU_source/bssn_gpu_class.h deleted file mode 100644 index 98e844d..0000000 --- a/AMSS_NCKU_source/bssn_gpu_class.h +++ /dev/null @@ -1,210 +0,0 @@ - -#ifndef BSSN_GPU_CLASS_H -#define BSSN_GPU_CLASS_H - -#ifdef newc -#include -#include -#include -#include -#include -#include -using namespace std; -#else -#include -#include -#include -#include -#include -#include -#endif - -#include - -#include "macrodef.h" -#include "cgh.h" -#include "ShellPatch.h" -#include "misc.h" -#include "var.h" -#include "MyList.h" -#include "monitor.h" -#include "surface_integral.h" -#include "checkpoint.h" - -// added by yangquan -#include "bssn_macro.h" - -extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN); - -class bssn_class -{ -public: - // added by yangquan - //---------------------- - int gpu_num_mynode; - int cpu_core_num_mynode; - int mpi_process_num_mynode; - int my_sequence_mynode; - int mynode_id; - int use_gpu; - - virtual void Step_GPU(int lev, int YN); - virtual void Get_runtime_envirment(); - // virtual void Step_OPENMP(int lev,int YN); - //---------------------- - - int ngfs; - int nprocs, myrank; - cgh *GH; - ShellPatch *SH; - double PhysTime; - - int checkrun; - char checkfilename[50]; - int Steps; - double StartTime, TotalTime; - double AnasTime, DumpTime, d2DumpTime, CheckTime; - double LastAnas, LastConsOut; - double Courant; - double numepss, numepsb, numepsh; - int Symmetry; - int maxl, decn; - double maxrex, drex; - int trfls, a_lev; - - double dT; - double chitiny; - - double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs; - int BH_num, BH_num_input; - double *Mass, *Pmom, *Spin; - double ADMMass; - - var *phio, *trKo; - var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo; - var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo; - var *Gmxo, *Gmyo, *Gmzo; - var *Lapo, *Sfxo, *Sfyo, *Sfzo; - var *dtSfxo, *dtSfyo, *dtSfzo; - - var *phi0, *trK0; - var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0; - var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0; - var *Gmx0, *Gmy0, *Gmz0; - var *Lap0, *Sfx0, *Sfy0, *Sfz0; - var *dtSfx0, *dtSfy0, *dtSfz0; - - var *phi, *trK; - var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz; - var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz; - var *Gmx, *Gmy, *Gmz; - var *Lap, *Sfx, *Sfy, *Sfz; - var *dtSfx, *dtSfy, *dtSfz; - - var *phi1, *trK1; - var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1; - var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1; - var *Gmx1, *Gmy1, *Gmz1; - var *Lap1, *Sfx1, *Sfy1, *Sfz1; - var *dtSfx1, *dtSfy1, *dtSfz1; - - var *phi_rhs, *trK_rhs; - var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs; - var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs; - var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs; - var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs; - var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs; - - var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz; - - var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz; - var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz; - var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz; - - var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz; - - var *Rpsi4, *Ipsi4; - var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4; - - var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz; - -#ifdef Point_Psi4 - var *phix, *phiy, *phiz; - var *trKx, *trKy, *trKz; - var *Axxx, *Axxy, *Axxz; - var *Axyx, *Axyy, *Axyz; - var *Axzx, *Axzy, *Axzz; - var *Ayyx, *Ayyy, *Ayyz; - var *Ayzx, *Ayzy, *Ayzz; - var *Azzx, *Azzy, *Azzz; -#endif - // FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data - MyList *StateList, *SynchList_pre, *SynchList_cor, *RHSList; - MyList *OldStateList, *DumpList; - MyList *ConstraintList; - - monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor; - monitor *ConVMonitor; - surface_integral *Waveshell; - checkpoint *CheckPoint; - -public: - bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, - int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi, - int a_levi, int maxli, int decni, double maxrexi, double drexi); - ~bssn_class(); - - void Evolve(int Steps); - void RecursiveStep(int lev); -#if (PSTR == 1) - void ParallelStep(); - void SHStep(); -#endif - void RestrictProlong(int lev, int YN, bool BB, MyList *SL, MyList *OL, MyList *corL); - void RestrictProlong_aux(int lev, int YN, bool BB, MyList *SL, MyList *OL, MyList *corL); - void RestrictProlong(int lev, int YN, bool BB); - void ProlongRestrict(int lev, int YN, bool BB); - void Setup_Black_Hole_position(); - void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev); - bool read_Pablo_file(int *ext, double *datain, char *filename); - void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax, - char *filename); - void AnalysisStuff(int lev, double dT_lev); - void Setup_KerrSchild(); - void Enforce_algcon(int lev, int fg); - - void testRestrict(); - void testOutBd(); - - virtual void Setup_Initial_Data_Lousto(); - virtual void Setup_Initial_Data_Cao(); - virtual void Initialize(); - virtual void Read_Ansorg(); - virtual void Read_Pablo() {}; - virtual void Compute_Psi4(int lev); - virtual void Step(int lev, int YN); - virtual void Interp_Constraint(bool infg); - virtual void Constraint_Out(); - virtual void Compute_Constraint(); - -#ifdef With_AHF -protected: - MyList *AHList, *AHDList, *GaugeList; - int AHfindevery; - double AHdumptime; - int *lastahdumpid, HN_num; // number of possible horizons - int *findeveryl; - double *xc, *yc, *zc, *xr, *yr, *zr; - bool *trigger; - double *dTT; - int *dumpid; - -public: - void AH_Prepare_derivatives(); - bool AH_Interp_Points(MyList *VarList, - int NN, double **XX, - double *Shellf, int Symmetryi); - void AH_Step_Find(int lev, double dT_lev); -#endif -}; -#endif /* BSSN_GPU_CLASS_H */ diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.cu b/AMSS_NCKU_source/bssn_rhs_cuda.cu new file mode 100644 index 0000000..640b1c1 --- /dev/null +++ b/AMSS_NCKU_source/bssn_rhs_cuda.cu @@ -0,0 +1,2565 @@ +/* + * bssn_rhs_cuda.cu — GPU implementation of f_compute_rhs_bssn + * + * Drop-in replacement for bssn_rhs_c.C. + * Compile with nvcc, link bssn_rhs_cuda.o in place of bssn_rhs_c.o. + */ + +#include +#include +#include +#include +#include +#include "macrodef.h" +#include "bssn_rhs.h" + +/* ------------------------------------------------------------------ */ +/* Multi-GPU dispatch: distribute ranks across available GPUs */ +/* ------------------------------------------------------------------ */ +static struct { + int num_gpus; + int my_rank; + int my_local_rank; + int my_device; + bool inited; +} g_dispatch = {0, -1, -1, -1, false}; + +static int env_to_int(const char *name, int fallback = -1) { + const char *v = getenv(name); + if (!v || !*v) return fallback; + return atoi(v); +} + +static void init_gpu_dispatch() { + if (g_dispatch.inited) return; + cudaError_t err = cudaGetDeviceCount(&g_dispatch.num_gpus); + if (err != cudaSuccess) g_dispatch.num_gpus = 1; + if (g_dispatch.num_gpus < 1) g_dispatch.num_gpus = 1; + + /* Get MPI rank from environment (set by mpirun/mpiexec). */ + g_dispatch.my_rank = env_to_int("PMI_RANK", + env_to_int("OMPI_COMM_WORLD_RANK", + env_to_int("MV2_COMM_WORLD_RANK", + env_to_int("SLURM_PROCID", 0)))); + + /* Prefer local rank for per-node GPU mapping (avoids cross-node skew). */ + g_dispatch.my_local_rank = env_to_int("OMPI_COMM_WORLD_LOCAL_RANK", + env_to_int("MV2_COMM_WORLD_LOCAL_RANK", + env_to_int("MPI_LOCALRANKID", + env_to_int("SLURM_LOCALID", -1)))); + + const int rank_for_map = (g_dispatch.my_local_rank >= 0) + ? g_dispatch.my_local_rank : g_dispatch.my_rank; + g_dispatch.my_device = rank_for_map % g_dispatch.num_gpus; + cudaSetDevice(g_dispatch.my_device); + + if (g_dispatch.my_rank == 0) { + printf("[AMSS-GPU] %d GPU(s) detected, device map uses %s rank\n", + g_dispatch.num_gpus, + (g_dispatch.my_local_rank >= 0) ? "local" : "global"); + } + g_dispatch.inited = true; +} + +/* ------------------------------------------------------------------ */ +/* Error checking */ +/* ------------------------------------------------------------------ */ +#define CUDA_CHECK(call) do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) { \ + fprintf(stderr, "CUDA error %s:%d: %s\n", \ + __FILE__, __LINE__, cudaGetErrorString(err)); \ + exit(EXIT_FAILURE); \ + } \ +} while(0) + +/* ------------------------------------------------------------------ */ +/* Physical / gauge constants (matching bssn_rhs_c.C) */ +/* ------------------------------------------------------------------ */ +static const double PI_VAL = 3.14159265358979323846; +static const double FF_VAL = 0.75; +static const double ETA_VAL = 2.0; + +/* ------------------------------------------------------------------ */ +/* Constant memory for grid parameters and stencil coefficients */ +/* ------------------------------------------------------------------ */ +struct GridParams { + int ex[3]; /* nx, ny, nz */ + int all; /* nx*ny*nz */ + double dX, dY, dZ; + /* fderivs coefficients */ + double d12dx, d12dy, d12dz; /* 1/(12*dX) etc */ + double d2dx, d2dy, d2dz; /* 1/(2*dX) etc */ + /* fdderivs coefficients */ + double Fdxdx, Fdydy, Fdzdz; /* 1/(12*dX^2) etc */ + double Sdxdx, Sdydy, Sdzdz; /* 1/(dX^2) etc */ + double Fdxdy, Fdxdz, Fdydz; /* 1/(144*dX*dY) etc */ + double Sdxdy, Sdxdz, Sdydz; /* 1/(4*dX*dY) etc */ + /* symmetry bounds (Fortran 1-based) */ + int iminF, jminF, kminF; + int imaxF, jmaxF, kmaxF; + /* symmetry bounds for ord=3 (lopsided/kodis) */ + int iminF3, jminF3, kminF3; + int Symmetry; + double eps; + int co; + /* padded sizes */ + int fh2_nx, fh2_ny, fh2_nz; /* (nx+2), (ny+2), (nz+2) for ord=2 */ + int fh3_nx, fh3_ny, fh3_nz; /* (nx+3), (ny+3), (nz+3) for ord=3 */ +}; + +__constant__ GridParams d_gp; + +/* ------------------------------------------------------------------ */ +/* Device indexing helpers */ +/* ------------------------------------------------------------------ */ +__device__ __forceinline__ int idx_ex_d(int i0, int j0, int k0) { + return i0 + j0 * d_gp.ex[0] + k0 * d_gp.ex[0] * d_gp.ex[1]; +} + +/* ord=2 ghost-padded: Fortran index iF -> flat index */ +__device__ __forceinline__ int idx_fh2(int iF, int jF, int kF) { + return (iF + 1) + (jF + 1) * d_gp.fh2_nx + (kF + 1) * d_gp.fh2_nx * d_gp.fh2_ny; +} + +/* ord=3 ghost-padded: Fortran index iF -> flat index */ +__device__ __forceinline__ int idx_fh3(int iF, int jF, int kF) { + return (iF + 2) + (jF + 2) * d_gp.fh3_nx + (kF + 2) * d_gp.fh3_nx * d_gp.fh3_ny; +} + +/* ------------------------------------------------------------------ */ +/* GPU buffer management */ +/* ------------------------------------------------------------------ */ +/* + * Array slot indices — all arrays live in one big cudaMalloc block. + * INPUT arrays (H2D): 39 slots + * OUTPUT arrays (D2H): 52 slots + * TEMPORARY arrays (GPU-only): ~65 slots + * Plus 2 extended arrays for ghost-padded stencils (fh_ord2, fh_ord3) + */ + +/* Total number of "all"-sized slots */ +#define NUM_SLOTS 160 + +struct GpuBuffers { + double *d_mem; /* single big allocation */ + double *d_fh2; /* ghost-padded ord=2: (nx+2)*(ny+2)*(nz+2) */ + double *d_fh3; /* ghost-padded ord=3: (nx+3)*(ny+3)*(nz+3) */ + double *h_stage; /* host staging buffer for bulk H2D/D2H */ + bool h_stage_pinned; /* true if allocated by cudaMallocHost */ + double *slot[NUM_SLOTS]; /* pointers into d_mem */ + size_t cap_all; + size_t cap_fh2_size; + size_t cap_fh3_size; + int prev_nx, prev_ny, prev_nz; + bool initialized; +}; + +static GpuBuffers g_buf = { + nullptr, nullptr, nullptr, nullptr, false, {}, + 0, 0, 0, 0, 0, 0, false +}; + +/* Slot assignments — INPUT (H2D) */ +enum { + S_chi=0, S_trK, S_dxx, S_gxy, S_gxz, S_dyy, S_gyz, S_dzz, + S_Axx, S_Axy, S_Axz, S_Ayy, S_Ayz, S_Azz, + S_Gamx, S_Gamy, S_Gamz, + S_Lap, S_betax, S_betay, S_betaz, + S_dtSfx, S_dtSfy, S_dtSfz, + S_rho, S_Sx, S_Sy, S_Sz, + S_Sxx, S_Sxy, S_Sxz, S_Syy, S_Syz, S_Szz, + S_X, S_Y, S_Z, /* coordinate arrays — only nx/ny/nz long */ + /* 37 input slots so far; X/Y/Z are special-sized */ + + /* OUTPUT (D2H) */ + S_chi_rhs, S_trK_rhs, + S_gxx_rhs, S_gxy_rhs, S_gxz_rhs, S_gyy_rhs, S_gyz_rhs, S_gzz_rhs, + S_Axx_rhs, S_Axy_rhs, S_Axz_rhs, S_Ayy_rhs, S_Ayz_rhs, S_Azz_rhs, + S_Gamx_rhs, S_Gamy_rhs, S_Gamz_rhs, + S_Lap_rhs, S_betax_rhs, S_betay_rhs, S_betaz_rhs, + S_dtSfx_rhs, S_dtSfy_rhs, S_dtSfz_rhs, + S_Gamxxx, S_Gamxxy, S_Gamxxz, S_Gamxyy, S_Gamxyz, S_Gamxzz, + S_Gamyxx, S_Gamyxy, S_Gamyxz, S_Gamyyy, S_Gamyyz, S_Gamyzz, + S_Gamzxx, S_Gamzxy, S_Gamzxz, S_Gamzyy, S_Gamzyz, S_Gamzzz, + S_Rxx, S_Rxy, S_Rxz, S_Ryy, S_Ryz, S_Rzz, + S_ham_Res, S_movx_Res, S_movy_Res, S_movz_Res, + S_Gmx_Res, S_Gmy_Res, S_Gmz_Res, + + /* TEMPORARY (GPU-only) */ + S_gxx, S_gyy, S_gzz, /* physical metric = dxx+1 etc */ + S_alpn1, S_chin1, + S_chix, S_chiy, S_chiz, + S_gxxx, S_gxyx, S_gxzx, S_gyyx, S_gyzx, S_gzzx, + S_gxxy, S_gxyy, S_gxzy, S_gyyy, S_gyzy, S_gzzy, + S_gxxz, S_gxyz, S_gxzz, S_gyyz, S_gyzz, S_gzzz, + S_Lapx, S_Lapy, S_Lapz, + S_betaxx, S_betaxy, S_betaxz, + S_betayx, S_betayy, S_betayz, + S_betazx, S_betazy, S_betazz, + S_Gamxx, S_Gamxy, S_Gamxz, + S_Gamyx, S_Gamyy_t, S_Gamyz_t, + S_Gamzx, S_Gamzy, S_Gamzz_t, + S_Kx, S_Ky, S_Kz, + S_S_arr, S_f_arr, + S_fxx, S_fxy, S_fxz, S_fyy, S_fyz, S_fzz, + S_Gamxa, S_Gamya, S_Gamza, + S_gupxx, S_gupxy, S_gupxz, + S_gupyy, S_gupyz, S_gupzz, + NUM_USED_SLOTS +}; + +static_assert(NUM_USED_SLOTS <= NUM_SLOTS, "Increase NUM_SLOTS"); + +static const int H2D_INPUT_SLOT_COUNT = (S_Szz - S_chi + 1); +static const int D2H_BASE_SLOT_COUNT = (S_Rzz - S_chi_rhs + 1); +static const int D2H_CONSTRAINT_SLOT_COUNT = (S_Gmz_Res - S_ham_Res + 1); +static const int STAGE_SLOT_COUNT = + (H2D_INPUT_SLOT_COUNT > (D2H_BASE_SLOT_COUNT + D2H_CONSTRAINT_SLOT_COUNT)) + ? H2D_INPUT_SLOT_COUNT + : (D2H_BASE_SLOT_COUNT + D2H_CONSTRAINT_SLOT_COUNT); + +static void ensure_gpu_buffers(int nx, int ny, int nz) { + size_t all = (size_t)nx * ny * nz; + size_t fh2_size = (size_t)(nx+2) * (ny+2) * (nz+2); + size_t fh3_size = (size_t)(nx+3) * (ny+3) * (nz+3); + const bool need_grow = (!g_buf.initialized) + || (all > g_buf.cap_all) + || (fh2_size > g_buf.cap_fh2_size) + || (fh3_size > g_buf.cap_fh3_size); + + if (need_grow) { + if (g_buf.d_mem) { cudaFree(g_buf.d_mem); g_buf.d_mem = nullptr; } + if (g_buf.d_fh2) { cudaFree(g_buf.d_fh2); g_buf.d_fh2 = nullptr; } + if (g_buf.d_fh3) { cudaFree(g_buf.d_fh3); g_buf.d_fh3 = nullptr; } + if (g_buf.h_stage) { + if (g_buf.h_stage_pinned) cudaFreeHost(g_buf.h_stage); + else free(g_buf.h_stage); + g_buf.h_stage = nullptr; + g_buf.h_stage_pinned = false; + } + + CUDA_CHECK(cudaMalloc(&g_buf.d_mem, NUM_USED_SLOTS * all * sizeof(double))); + CUDA_CHECK(cudaMalloc(&g_buf.d_fh2, fh2_size * sizeof(double))); + CUDA_CHECK(cudaMalloc(&g_buf.d_fh3, fh3_size * sizeof(double))); + + const size_t stage_bytes = (size_t)STAGE_SLOT_COUNT * all * sizeof(double); + cudaError_t stage_err = cudaMallocHost((void**)&g_buf.h_stage, stage_bytes); + if (stage_err == cudaSuccess) { + g_buf.h_stage_pinned = true; + } else { + g_buf.h_stage = (double *)malloc(stage_bytes); + g_buf.h_stage_pinned = false; + if (!g_buf.h_stage) { + fprintf(stderr, "Host stage allocation failed (%zu bytes)\n", stage_bytes); + exit(EXIT_FAILURE); + } + } + + g_buf.cap_all = all; + g_buf.cap_fh2_size = fh2_size; + g_buf.cap_fh3_size = fh3_size; + g_buf.initialized = true; + } + + for (int s = 0; s < NUM_USED_SLOTS; ++s) + g_buf.slot[s] = g_buf.d_mem + s * all; + + g_buf.prev_nx = nx; + g_buf.prev_ny = ny; + g_buf.prev_nz = nz; +} + +/* ================================================================== */ +/* A. Symmetry boundary kernels (ord=2 and ord=3) */ +/* ================================================================== */ + +/* Step 1: Copy interior into ghost-padded array */ +__global__ void kern_symbd_copy_interior_ord2(const double * __restrict__ func, + double * __restrict__ fh, + double SoA0, double SoA1, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + fh[(iF+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = func[tid]; + } +} + +/* Fused symmetry pack (ord=2): fill full fh from interior func in one pass. */ +__global__ void kern_symbd_pack_ord2(const double * __restrict__ func, + double * __restrict__ fh, + double SoA0, double SoA1, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny, fnz = d_gp.fh2_nz; + const int total = fnx * fny * fnz; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total; + tid += blockDim.x * gridDim.x) + { + int ii = tid % fnx; + int jj = (tid / fnx) % fny; + int kk = tid / (fnx * fny); + + int iF = ii - 1; /* -1 .. nx */ + int jF = jj - 1; /* -1 .. ny */ + int kF = kk - 1; /* -1 .. nz */ + + int siF = (iF <= 0) ? (1 - iF) : iF; /* 1..nx */ + int sjF = (jF <= 0) ? (1 - jF) : jF; /* 1..ny */ + int skF = (kF <= 0) ? (1 - kF) : kF; /* 1..nz */ + + double sign = 1.0; + if (iF <= 0) sign *= SoA0; + if (jF <= 0) sign *= SoA1; + if (kF <= 0) sign *= SoA2; + + int src = (siF - 1) + (sjF - 1) * nx + (skF - 1) * nx * ny; + fh[tid] = sign * func[src]; + } +} + +/* Step 2: Fill i-ghosts (x-direction symmetry) */ +__global__ void kern_symbd_ighost_ord2(double * __restrict__ fh, double SoA0) +{ + const int ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + /* ord=2: fill iF=0 and iF=-1, i.e. ghost layers ii=0 from ii=2, ii=1 from ii=1 */ + /* Fortran: do ii=0,ord-1: funcc(-ii,jF,kF) = funcc(ii+1,jF,kF)*SoA[0] */ + int total = ny * nz; /* jF=1..ny, kF=1..nz */ + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; /* 2 ghost layers */ + tid += blockDim.x * gridDim.x) + { + int ii = tid / total; /* 0 or 1 */ + int rem = tid % total; + int j0 = rem % ny; + int k0 = rem / ny; + int jF = j0 + 1, kF = k0 + 1; + int iF_dst = -ii; /* 0, -1 */ + int iF_src = ii + 1; /* 1, 2 */ + fh[(iF_dst+1) + (jF+1)*fnx + (kF+1)*fnx*fny] = + fh[(iF_src+1) + (jF+1)*fnx + (kF+1)*fnx*fny] * SoA0; + } +} + +/* Step 3: Fill j-ghosts (y-direction symmetry) */ +__global__ void kern_symbd_jghost_ord2(double * __restrict__ fh, double SoA1) +{ + const int nx = d_gp.ex[0], nz = d_gp.ex[2]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + /* iF ranges from -1 to nx (i.e. -ord+1 to ex1), total = nx+2 */ + int irange = nx + 2; + int total = irange * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; + tid += blockDim.x * gridDim.x) + { + int jj = tid / total; + int rem = tid % total; + int ii = rem % irange; + int k0 = rem / irange; + int iF = ii - 1; /* -1 .. nx */ + int kF = k0 + 1; + int jF_dst = -jj; + int jF_src = jj + 1; + fh[(iF+1) + (jF_dst+1)*fnx + (kF+1)*fnx*fny] = + fh[(iF+1) + (jF_src+1)*fnx + (kF+1)*fnx*fny] * SoA1; + } +} + +/* Step 4: Fill k-ghosts (z-direction symmetry) */ +__global__ void kern_symbd_kghost_ord2(double * __restrict__ fh, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh2_nx, fny = d_gp.fh2_ny; + int irange = nx + 2; + int jrange = ny + 2; + int total = irange * jrange; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 2; + tid += blockDim.x * gridDim.x) + { + int kk = tid / total; + int rem = tid % total; + int ii = rem % irange; + int jj = rem / irange; + int iF = ii - 1; + int jF = jj - 1; + int kF_dst = -kk; + int kF_src = kk + 1; + fh[(iF+1) + (jF+1)*fnx + (kF_dst+1)*fnx*fny] = + fh[(iF+1) + (jF+1)*fnx + (kF_src+1)*fnx*fny] * SoA2; + } +} + +/* ---- ord=3 variants (for lopsided / kodis) ---- */ + +__global__ void kern_symbd_copy_interior_ord3(const double * __restrict__ func, + double * __restrict__ fh) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + fh[(iF+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = func[tid]; + } +} + +/* Fused symmetry pack (ord=3): fill full fh from interior func in one pass. */ +__global__ void kern_symbd_pack_ord3(const double * __restrict__ func, + double * __restrict__ fh, + double SoA0, double SoA1, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny, fnz = d_gp.fh3_nz; + const int total = fnx * fny * fnz; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total; + tid += blockDim.x * gridDim.x) + { + int ii = tid % fnx; + int jj = (tid / fnx) % fny; + int kk = tid / (fnx * fny); + + int iF = ii - 2; /* -2 .. nx */ + int jF = jj - 2; /* -2 .. ny */ + int kF = kk - 2; /* -2 .. nz */ + + int siF = (iF <= 0) ? (1 - iF) : iF; /* 1..nx */ + int sjF = (jF <= 0) ? (1 - jF) : jF; /* 1..ny */ + int skF = (kF <= 0) ? (1 - kF) : kF; /* 1..nz */ + + double sign = 1.0; + if (iF <= 0) sign *= SoA0; + if (jF <= 0) sign *= SoA1; + if (kF <= 0) sign *= SoA2; + + int src = (siF - 1) + (sjF - 1) * nx + (skF - 1) * nx * ny; + fh[tid] = sign * func[src]; + } +} + +__global__ void kern_symbd_ighost_ord3(double * __restrict__ fh, double SoA0) +{ + const int ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int total = ny * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int ii = tid / total; + int rem = tid % total; + int j0 = rem % ny; + int k0 = rem / ny; + int jF = j0 + 1, kF = k0 + 1; + int iF_dst = -ii; + int iF_src = ii + 1; + fh[(iF_dst+2) + (jF+2)*fnx + (kF+2)*fnx*fny] = + fh[(iF_src+2) + (jF+2)*fnx + (kF+2)*fnx*fny] * SoA0; + } +} + +__global__ void kern_symbd_jghost_ord3(double * __restrict__ fh, double SoA1) +{ + const int nx = d_gp.ex[0], nz = d_gp.ex[2]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int irange = nx + 3; + int total = irange * nz; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int jj = tid / total; + int rem = tid % total; + int ii = rem % irange; + int k0 = rem / irange; + int iF = ii - 2; + int kF = k0 + 1; + int jF_dst = -jj; + int jF_src = jj + 1; + fh[(iF+2) + (jF_dst+2)*fnx + (kF+2)*fnx*fny] = + fh[(iF+2) + (jF_src+2)*fnx + (kF+2)*fnx*fny] * SoA1; + } +} + +__global__ void kern_symbd_kghost_ord3(double * __restrict__ fh, double SoA2) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1]; + const int fnx = d_gp.fh3_nx, fny = d_gp.fh3_ny; + int irange = nx + 3; + int jrange = ny + 3; + int total = irange * jrange; + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < total * 3; + tid += blockDim.x * gridDim.x) + { + int kk = tid / total; + int rem = tid % total; + int ii = rem % irange; + int jj = rem / irange; + int iF = ii - 2; + int jF = jj - 2; + int kF_dst = -kk; + int kF_src = kk + 1; + fh[(iF+2) + (jF+2)*fnx + (kF_dst+2)*fnx*fny] = + fh[(iF+2) + (jF+2)*fnx + (kF_src+2)*fnx*fny] * SoA2; + } +} + +/* ================================================================== */ +/* B. Stencil kernels */ +/* ================================================================== */ + +/* ---- First derivatives (ord=2, 4th/2nd order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_fderivs(const double * __restrict__ fh, + double * __restrict__ fx, + double * __restrict__ fy, + double * __restrict__ fz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + /* boundary points: leave as zero */ + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { + fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; + continue; + } + + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + + if ((iF+2) <= imaxF && (iF-2) >= iminF && + (jF+2) <= jmaxF && (jF-2) >= jminF && + (kF+2) <= kmaxF && (kF-2) >= kminF) + { + fx[tid] = d_gp.d12dx * ( + fh[idx_fh2(iF-2,jF,kF)] - 8.0*fh[idx_fh2(iF-1,jF,kF)] + + 8.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); + fy[tid] = d_gp.d12dy * ( + fh[idx_fh2(iF,jF-2,kF)] - 8.0*fh[idx_fh2(iF,jF-1,kF)] + + 8.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); + fz[tid] = d_gp.d12dz * ( + fh[idx_fh2(iF,jF,kF-2)] - 8.0*fh[idx_fh2(iF,jF,kF-1)] + + 8.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); + } + else if ((iF+1) <= imaxF && (iF-1) >= iminF && + (jF+1) <= jmaxF && (jF-1) >= jminF && + (kF+1) <= kmaxF && (kF-1) >= kminF) + { + fx[tid] = d_gp.d2dx * ( + -fh[idx_fh2(iF-1,jF,kF)] + fh[idx_fh2(iF+1,jF,kF)]); + fy[tid] = d_gp.d2dy * ( + -fh[idx_fh2(iF,jF-1,kF)] + fh[idx_fh2(iF,jF+1,kF)]); + fz[tid] = d_gp.d2dz * ( + -fh[idx_fh2(iF,jF,kF-1)] + fh[idx_fh2(iF,jF,kF+1)]); + } + else { + fx[tid] = 0.0; fy[tid] = 0.0; fz[tid] = 0.0; + } + } +} + +/* ---- Second derivatives (ord=2, 4th/2nd order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_fdderivs(const double * __restrict__ fh, + double * __restrict__ fxx, double * __restrict__ fxy, + double * __restrict__ fxz, double * __restrict__ fyy, + double * __restrict__ fyz, double * __restrict__ fzz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const int iminF = d_gp.iminF, jminF = d_gp.jminF, kminF = d_gp.kminF; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) { + fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; + fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; + continue; + } + + int iF = i0+1, jF = j0+1, kF = k0+1; + + if ((iF+2)<=imaxF && (iF-2)>=iminF && + (jF+2)<=jmaxF && (jF-2)>=jminF && + (kF+2)<=kmaxF && (kF-2)>=kminF) + { + /* 4th-order diagonal */ + double c = fh[idx_fh2(iF,jF,kF)]; + fxx[tid] = d_gp.Fdxdx*( + -fh[idx_fh2(iF-2,jF,kF)] + 16.0*fh[idx_fh2(iF-1,jF,kF)] + -30.0*c + 16.0*fh[idx_fh2(iF+1,jF,kF)] - fh[idx_fh2(iF+2,jF,kF)]); + fyy[tid] = d_gp.Fdydy*( + -fh[idx_fh2(iF,jF-2,kF)] + 16.0*fh[idx_fh2(iF,jF-1,kF)] + -30.0*c + 16.0*fh[idx_fh2(iF,jF+1,kF)] - fh[idx_fh2(iF,jF+2,kF)]); + fzz[tid] = d_gp.Fdzdz*( + -fh[idx_fh2(iF,jF,kF-2)] + 16.0*fh[idx_fh2(iF,jF,kF-1)] + -30.0*c + 16.0*fh[idx_fh2(iF,jF,kF+1)] - fh[idx_fh2(iF,jF,kF+2)]); + + /* 4th-order cross: fxy */ + { + double t_jm2 = fh[idx_fh2(iF-2,jF-2,kF)] - 8.0*fh[idx_fh2(iF-1,jF-2,kF)] + + 8.0*fh[idx_fh2(iF+1,jF-2,kF)] - fh[idx_fh2(iF+2,jF-2,kF)]; + double t_jm1 = fh[idx_fh2(iF-2,jF-1,kF)] - 8.0*fh[idx_fh2(iF-1,jF-1,kF)] + + 8.0*fh[idx_fh2(iF+1,jF-1,kF)] - fh[idx_fh2(iF+2,jF-1,kF)]; + double t_jp1 = fh[idx_fh2(iF-2,jF+1,kF)] - 8.0*fh[idx_fh2(iF-1,jF+1,kF)] + + 8.0*fh[idx_fh2(iF+1,jF+1,kF)] - fh[idx_fh2(iF+2,jF+1,kF)]; + double t_jp2 = fh[idx_fh2(iF-2,jF+2,kF)] - 8.0*fh[idx_fh2(iF-1,jF+2,kF)] + + 8.0*fh[idx_fh2(iF+1,jF+2,kF)] - fh[idx_fh2(iF+2,jF+2,kF)]; + fxy[tid] = d_gp.Fdxdy*(t_jm2 - 8.0*t_jm1 + 8.0*t_jp1 - t_jp2); + } + /* 4th-order cross: fxz */ + { + double t_km2 = fh[idx_fh2(iF-2,jF,kF-2)] - 8.0*fh[idx_fh2(iF-1,jF,kF-2)] + + 8.0*fh[idx_fh2(iF+1,jF,kF-2)] - fh[idx_fh2(iF+2,jF,kF-2)]; + double t_km1 = fh[idx_fh2(iF-2,jF,kF-1)] - 8.0*fh[idx_fh2(iF-1,jF,kF-1)] + + 8.0*fh[idx_fh2(iF+1,jF,kF-1)] - fh[idx_fh2(iF+2,jF,kF-1)]; + double t_kp1 = fh[idx_fh2(iF-2,jF,kF+1)] - 8.0*fh[idx_fh2(iF-1,jF,kF+1)] + + 8.0*fh[idx_fh2(iF+1,jF,kF+1)] - fh[idx_fh2(iF+2,jF,kF+1)]; + double t_kp2 = fh[idx_fh2(iF-2,jF,kF+2)] - 8.0*fh[idx_fh2(iF-1,jF,kF+2)] + + 8.0*fh[idx_fh2(iF+1,jF,kF+2)] - fh[idx_fh2(iF+2,jF,kF+2)]; + fxz[tid] = d_gp.Fdxdz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); + } + /* 4th-order cross: fyz */ + { + double t_km2 = fh[idx_fh2(iF,jF-2,kF-2)] - 8.0*fh[idx_fh2(iF,jF-1,kF-2)] + + 8.0*fh[idx_fh2(iF,jF+1,kF-2)] - fh[idx_fh2(iF,jF+2,kF-2)]; + double t_km1 = fh[idx_fh2(iF,jF-2,kF-1)] - 8.0*fh[idx_fh2(iF,jF-1,kF-1)] + + 8.0*fh[idx_fh2(iF,jF+1,kF-1)] - fh[idx_fh2(iF,jF+2,kF-1)]; + double t_kp1 = fh[idx_fh2(iF,jF-2,kF+1)] - 8.0*fh[idx_fh2(iF,jF-1,kF+1)] + + 8.0*fh[idx_fh2(iF,jF+1,kF+1)] - fh[idx_fh2(iF,jF+2,kF+1)]; + double t_kp2 = fh[idx_fh2(iF,jF-2,kF+2)] - 8.0*fh[idx_fh2(iF,jF-1,kF+2)] + + 8.0*fh[idx_fh2(iF,jF+1,kF+2)] - fh[idx_fh2(iF,jF+2,kF+2)]; + fyz[tid] = d_gp.Fdydz*(t_km2 - 8.0*t_km1 + 8.0*t_kp1 - t_kp2); + } + } + else if ((iF+1)<=imaxF && (iF-1)>=iminF && + (jF+1)<=jmaxF && (jF-1)>=jminF && + (kF+1)<=kmaxF && (kF-1)>=kminF) + { + double c = fh[idx_fh2(iF,jF,kF)]; + fxx[tid] = d_gp.Sdxdx*(fh[idx_fh2(iF-1,jF,kF)] - 2.0*c + fh[idx_fh2(iF+1,jF,kF)]); + fyy[tid] = d_gp.Sdydy*(fh[idx_fh2(iF,jF-1,kF)] - 2.0*c + fh[idx_fh2(iF,jF+1,kF)]); + fzz[tid] = d_gp.Sdzdz*(fh[idx_fh2(iF,jF,kF-1)] - 2.0*c + fh[idx_fh2(iF,jF,kF+1)]); + fxy[tid] = d_gp.Sdxdy*(fh[idx_fh2(iF-1,jF-1,kF)] - fh[idx_fh2(iF+1,jF-1,kF)] + -fh[idx_fh2(iF-1,jF+1,kF)] + fh[idx_fh2(iF+1,jF+1,kF)]); + fxz[tid] = d_gp.Sdxdz*(fh[idx_fh2(iF-1,jF,kF-1)] - fh[idx_fh2(iF+1,jF,kF-1)] + -fh[idx_fh2(iF-1,jF,kF+1)] + fh[idx_fh2(iF+1,jF,kF+1)]); + fyz[tid] = d_gp.Sdydz*(fh[idx_fh2(iF,jF-1,kF-1)] - fh[idx_fh2(iF,jF+1,kF-1)] + -fh[idx_fh2(iF,jF-1,kF+1)] + fh[idx_fh2(iF,jF+1,kF+1)]); + } + else { + fxx[tid]=0; fxy[tid]=0; fxz[tid]=0; + fyy[tid]=0; fyz[tid]=0; fzz[tid]=0; + } + } +} + +/* ---- Lopsided (upwind advection) kernel ---- */ +__global__ __launch_bounds__(128, 4) +void kern_lopsided(const double * __restrict__ fh, + double * __restrict__ f_rhs, + const double * __restrict__ Sfx, + const double * __restrict__ Sfy, + const double * __restrict__ Sfz) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + + if (i0 > nx - 2 || j0 > ny - 2 || k0 > nz - 2) continue; + + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + double val = 0.0; + + /* --- x direction --- */ + double sfx = Sfx[tid]; + if (sfx > 0.0) { + if (i0 <= nx - 4) { + val += sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] + + fh[idx_fh3(iF+3,jF,kF)]); + } else if (i0 <= nx - 3) { + val += sfx * d_gp.d12dx * ( + fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] + +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); + } else if (i0 <= nx - 2) { + val -= sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] + + fh[idx_fh3(iF-3,jF,kF)]); + } + } else if (sfx < 0.0) { + if ((i0 - 2) >= iminF) { + val -= sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF+1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF-1,jF,kF)] - 6.0*fh[idx_fh3(iF-2,jF,kF)] + + fh[idx_fh3(iF-3,jF,kF)]); + } else if ((i0 - 1) >= iminF) { + val += sfx * d_gp.d12dx * ( + fh[idx_fh3(iF-2,jF,kF)] - 8.0*fh[idx_fh3(iF-1,jF,kF)] + +8.0*fh[idx_fh3(iF+1,jF,kF)] - fh[idx_fh3(iF+2,jF,kF)]); + } else if (i0 >= iminF) { + val += sfx * d_gp.d12dx * ( + -3.0*fh[idx_fh3(iF-1,jF,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF+1,jF,kF)] - 6.0*fh[idx_fh3(iF+2,jF,kF)] + + fh[idx_fh3(iF+3,jF,kF)]); + } + } + + /* --- y direction --- */ + double sfy = Sfy[tid]; + if (sfy > 0.0) { + if (j0 <= ny - 4) { + val += sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] + + fh[idx_fh3(iF,jF+3,kF)]); + } else if (j0 <= ny - 3) { + val += sfy * d_gp.d12dy * ( + fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] + +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); + } else if (j0 <= ny - 2) { + val -= sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] + + fh[idx_fh3(iF,jF-3,kF)]); + } + } else if (sfy < 0.0) { + if ((j0 - 2) >= jminF) { + val -= sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF+1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF-1,kF)] - 6.0*fh[idx_fh3(iF,jF-2,kF)] + + fh[idx_fh3(iF,jF-3,kF)]); + } else if ((j0 - 1) >= jminF) { + val += sfy * d_gp.d12dy * ( + fh[idx_fh3(iF,jF-2,kF)] - 8.0*fh[idx_fh3(iF,jF-1,kF)] + +8.0*fh[idx_fh3(iF,jF+1,kF)] - fh[idx_fh3(iF,jF+2,kF)]); + } else if (j0 >= jminF) { + val += sfy * d_gp.d12dy * ( + -3.0*fh[idx_fh3(iF,jF-1,kF)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF+1,kF)] - 6.0*fh[idx_fh3(iF,jF+2,kF)] + + fh[idx_fh3(iF,jF+3,kF)]); + } + } + + /* --- z direction --- */ + double sfz = Sfz[tid]; + if (sfz > 0.0) { + if (k0 <= nz - 4) { + val += sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] + + fh[idx_fh3(iF,jF,kF+3)]); + } else if (k0 <= nz - 3) { + val += sfz * d_gp.d12dz * ( + fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] + +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); + } else if (k0 <= nz - 2) { + val -= sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] + + fh[idx_fh3(iF,jF,kF-3)]); + } + } else if (sfz < 0.0) { + if ((k0 - 2) >= kminF) { + val -= sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF+1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF-1)] - 6.0*fh[idx_fh3(iF,jF,kF-2)] + + fh[idx_fh3(iF,jF,kF-3)]); + } else if ((k0 - 1) >= kminF) { + val += sfz * d_gp.d12dz * ( + fh[idx_fh3(iF,jF,kF-2)] - 8.0*fh[idx_fh3(iF,jF,kF-1)] + +8.0*fh[idx_fh3(iF,jF,kF+1)] - fh[idx_fh3(iF,jF,kF+2)]); + } else if (k0 >= kminF) { + val += sfz * d_gp.d12dz * ( + -3.0*fh[idx_fh3(iF,jF,kF-1)] - 10.0*fh[idx_fh3(iF,jF,kF)] + +18.0*fh[idx_fh3(iF,jF,kF+1)] - 6.0*fh[idx_fh3(iF,jF,kF+2)] + + fh[idx_fh3(iF,jF,kF+3)]); + } + } + + f_rhs[tid] += val; + } +} + +/* ---- KO dissipation kernel (ord=3, 6th-order) ---- */ +__global__ __launch_bounds__(128, 4) +void kern_kodis(const double * __restrict__ fh, + double * __restrict__ f_rhs, + double eps_val) +{ + const int nx = d_gp.ex[0], ny = d_gp.ex[1], nz = d_gp.ex[2]; + const int iminF = d_gp.iminF3, jminF = d_gp.jminF3, kminF = d_gp.kminF3; + const int imaxF = d_gp.imaxF, jmaxF = d_gp.jmaxF, kmaxF = d_gp.kmaxF; + const double cof = 64.0; + + for (int tid = blockIdx.x * blockDim.x + threadIdx.x; + tid < d_gp.all; + tid += blockDim.x * gridDim.x) + { + int i0 = tid % nx; + int j0 = (tid / nx) % ny; + int k0 = tid / (nx * ny); + int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1; + + if ((iF-3) >= iminF && (iF+3) <= imaxF && + (jF-3) >= jminF && (jF+3) <= jmaxF && + (kF-3) >= kminF && (kF+3) <= kmaxF) + { + double Dx = (fh[idx_fh3(iF-3,jF,kF)] + fh[idx_fh3(iF+3,jF,kF)]) + - 6.0*(fh[idx_fh3(iF-2,jF,kF)] + fh[idx_fh3(iF+2,jF,kF)]) + +15.0*(fh[idx_fh3(iF-1,jF,kF)] + fh[idx_fh3(iF+1,jF,kF)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dx /= d_gp.dX; + + double Dy = (fh[idx_fh3(iF,jF-3,kF)] + fh[idx_fh3(iF,jF+3,kF)]) + - 6.0*(fh[idx_fh3(iF,jF-2,kF)] + fh[idx_fh3(iF,jF+2,kF)]) + +15.0*(fh[idx_fh3(iF,jF-1,kF)] + fh[idx_fh3(iF,jF+1,kF)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dy /= d_gp.dY; + + double Dz = (fh[idx_fh3(iF,jF,kF-3)] + fh[idx_fh3(iF,jF,kF+3)]) + - 6.0*(fh[idx_fh3(iF,jF,kF-2)] + fh[idx_fh3(iF,jF,kF+2)]) + +15.0*(fh[idx_fh3(iF,jF,kF-1)] + fh[idx_fh3(iF,jF,kF+1)]) + -20.0* fh[idx_fh3(iF,jF,kF)]; + Dz /= d_gp.dZ; + + f_rhs[tid] += (eps_val / cof) * (Dx + Dy + Dz); + } + } +} + +/* ================================================================== */ +/* Host wrapper helpers */ +/* ================================================================== */ +static const int BLK = 128; +static inline int grid(size_t n) { + if (n == 0) return 1; + size_t g = (n + BLK - 1) / BLK; + if (g > 2147483647u) g = 2147483647u; + return (int)g; +} + +/* symmetry_bd on GPU for ord=2, then launch fderivs kernel */ +static void gpu_fderivs(double *d_f, double *d_fx, double *d_fy, double *d_fz, + double SoA0, double SoA1, double SoA2, int all) +{ + double *fh = g_buf.d_fh2; + const size_t nx = (size_t)g_buf.prev_nx; + const size_t ny = (size_t)g_buf.prev_ny; + const size_t nz = (size_t)g_buf.prev_nz; + const size_t w_pack = (nx + 2ull) * (ny + 2ull) * (nz + 2ull); + + kern_symbd_pack_ord2<<>>(d_f, fh, SoA0, SoA1, SoA2); + kern_fderivs<<>>(fh, d_fx, d_fy, d_fz); +} + +/* symmetry_bd on GPU for ord=2, then launch fdderivs kernel */ +static void gpu_fdderivs(double *d_f, + double *d_fxx, double *d_fxy, double *d_fxz, + double *d_fyy, double *d_fyz, double *d_fzz, + double SoA0, double SoA1, double SoA2, int all) +{ + double *fh = g_buf.d_fh2; + const size_t nx = (size_t)g_buf.prev_nx; + const size_t ny = (size_t)g_buf.prev_ny; + const size_t nz = (size_t)g_buf.prev_nz; + const size_t w_pack = (nx + 2ull) * (ny + 2ull) * (nz + 2ull); + + kern_symbd_pack_ord2<<>>(d_f, fh, SoA0, SoA1, SoA2); + kern_fdderivs<<>>(fh, d_fxx, d_fxy, d_fxz, d_fyy, d_fyz, d_fzz); +} + +/* Combined ord=3 advection + KO dissipation. + * When advection and KO use the same source field, symmetry packing is shared. + * If they differ (e.g. gxx advection + dxx KO), only KO repacks. + */ +static void gpu_lopsided_kodis(double *d_f_adv, double *d_f_ko, double *d_f_rhs, + double *d_Sfx, double *d_Sfy, double *d_Sfz, + double SoA0, double SoA1, double SoA2, + double eps_val, int all) +{ + double *fh = g_buf.d_fh3; + const size_t nx = (size_t)g_buf.prev_nx; + const size_t ny = (size_t)g_buf.prev_ny; + const size_t nz = (size_t)g_buf.prev_nz; + const size_t w_pack = (nx + 3ull) * (ny + 3ull) * (nz + 3ull); + + kern_symbd_pack_ord3<<>>(d_f_adv, fh, SoA0, SoA1, SoA2); + kern_lopsided<<>>(fh, d_f_rhs, d_Sfx, d_Sfy, d_Sfz); + + if (eps_val > 0.0) { + if (d_f_ko != d_f_adv) { + kern_symbd_pack_ord3<<>>(d_f_ko, fh, SoA0, SoA1, SoA2); + } + kern_kodis<<>>(fh, d_f_rhs, eps_val); + } +} + +/* ================================================================== */ +/* C. Point-wise computation kernels */ +/* ================================================================== */ + +/* Phase 1: alpn1, chin1, gxx=dxx+1, gyy=dyy+1, gzz=dzz+1 */ +__global__ void kern_phase1_prep( + const double* __restrict__ Lap, const double* __restrict__ chi, + const double* __restrict__ dxx, const double* __restrict__ dyy, + const double* __restrict__ dzz, + double* __restrict__ alpn1, double* __restrict__ chin1, + double* __restrict__ gxx, double* __restrict__ gyy, double* __restrict__ gzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + alpn1[i] = Lap[i] + 1.0; + chin1[i] = chi[i] + 1.0; + gxx[i] = dxx[i] + 1.0; + gyy[i] = dyy[i] + 1.0; + gzz[i] = dzz[i] + 1.0; + } +} + +/* Phase 2a: chi_rhs, gij_rhs */ +__global__ void kern_phase2_metric_rhs( + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ trK, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + const double* __restrict__ betaxx, const double* __restrict__ betaxy, + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ betazx, const double* __restrict__ betazy, + const double* __restrict__ betazz, + double* __restrict__ chi_rhs, double* __restrict__ gxx_rhs, + double* __restrict__ gyy_rhs, double* __restrict__ gzz_rhs, + double* __restrict__ gxy_rhs, double* __restrict__ gyz_rhs, + double* __restrict__ gxz_rhs) +{ + const double F2o3 = 2.0/3.0, F1o3 = 1.0/3.0, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double db = betaxx[i] + betayy[i] + betazz[i]; + chi_rhs[i] = F2o3 * chin1[i] * (alpn1[i] * trK[i] - db); + gxx_rhs[i] = -TWO*alpn1[i]*Axx[i] - F2o3*gxx[i]*db + + TWO*(gxx[i]*betaxx[i] + gxy[i]*betayx[i] + gxz[i]*betazx[i]); + gyy_rhs[i] = -TWO*alpn1[i]*Ayy[i] - F2o3*gyy[i]*db + + TWO*(gxy[i]*betaxy[i] + gyy[i]*betayy[i] + gyz[i]*betazy[i]); + gzz_rhs[i] = -TWO*alpn1[i]*Azz[i] - F2o3*gzz[i]*db + + TWO*(gxz[i]*betaxz[i] + gyz[i]*betayz[i] + gzz[i]*betazz[i]); + gxy_rhs[i] = -TWO*alpn1[i]*Axy[i] + F1o3*gxy[i]*db + + gxx[i]*betaxy[i] + gxz[i]*betazy[i] + gyy[i]*betayx[i] + + gyz[i]*betazx[i] - gxy[i]*betazz[i]; + gyz_rhs[i] = -TWO*alpn1[i]*Ayz[i] + F1o3*gyz[i]*db + + gxy[i]*betaxz[i] + gyy[i]*betayz[i] + gxz[i]*betaxy[i] + + gzz[i]*betazy[i] - gyz[i]*betaxx[i]; + gxz_rhs[i] = -TWO*alpn1[i]*Axz[i] + F1o3*gxz[i]*db + + gxx[i]*betaxz[i] + gxy[i]*betayz[i] + gyz[i]*betayx[i] + + gzz[i]*betazx[i] - gxz[i]*betayy[i]; + } +} + +/* Phase 2b: metric inverse */ +__global__ void kern_phase2_inverse( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + double* __restrict__ gupxx, double* __restrict__ gupxy, + double* __restrict__ gupxz, double* __restrict__ gupyy, + double* __restrict__ gupyz, double* __restrict__ gupzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double det = gxx[i]*gyy[i]*gzz[i] + gxy[i]*gyz[i]*gxz[i] + gxz[i]*gxy[i]*gyz[i] + - gxz[i]*gyy[i]*gxz[i] - gxy[i]*gxy[i]*gzz[i] - gxx[i]*gyz[i]*gyz[i]; + double inv = 1.0 / det; + gupxx[i] = (gyy[i]*gzz[i] - gyz[i]*gyz[i]) * inv; + gupxy[i] = -(gxy[i]*gzz[i] - gyz[i]*gxz[i]) * inv; + gupxz[i] = (gxy[i]*gyz[i] - gyy[i]*gxz[i]) * inv; + gupyy[i] = (gxx[i]*gzz[i] - gxz[i]*gxz[i]) * inv; + gupyz[i] = -(gxx[i]*gyz[i] - gxy[i]*gxz[i]) * inv; + gupzz[i] = (gxx[i]*gyy[i] - gxy[i]*gxy[i]) * inv; + } +} + +/* Phase 3: Gamma constraint residuals (co==0 only) */ +__global__ void kern_phase3_gamma_constraint( + const double* __restrict__ Gamx, const double* __restrict__ Gamy, + const double* __restrict__ Gamz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ gxxx, const double* __restrict__ gxyx, + const double* __restrict__ gxzx, const double* __restrict__ gyyx, + const double* __restrict__ gyzx, const double* __restrict__ gzzx, + const double* __restrict__ gxxy, const double* __restrict__ gxyy, + const double* __restrict__ gxzy, const double* __restrict__ gyyy, + const double* __restrict__ gyzy, const double* __restrict__ gzzy, + const double* __restrict__ gxxz, const double* __restrict__ gxyz, + const double* __restrict__ gxzz, const double* __restrict__ gyyz, + const double* __restrict__ gyzz, const double* __restrict__ gzzz, + double* __restrict__ Gmx_Res, double* __restrict__ Gmy_Res, + double* __restrict__ Gmz_Res) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i], uxy=gupxy[i], uxz=gupxz[i]; + double uyy=gupyy[i], uyz=gupyz[i], uzz=gupzz[i]; + + Gmx_Res[i] = Gamx[i] - ( + uxx*(uxx*gxxx[i]+uxy*gxyx[i]+uxz*gxzx[i]) + + uxy*(uxx*gxyx[i]+uxy*gyyx[i]+uxz*gyzx[i]) + + uxz*(uxx*gxzx[i]+uxy*gyzx[i]+uxz*gzzx[i]) + + uxx*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + + uxy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + + uxz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + + uxx*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uxy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uxz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + + Gmy_Res[i] = Gamy[i] - ( + uxx*(uxy*gxxx[i]+uyy*gxyx[i]+uyz*gxzx[i]) + + uxy*(uxy*gxyx[i]+uyy*gyyx[i]+uyz*gyzx[i]) + + uxz*(uxy*gxzx[i]+uyy*gyzx[i]+uyz*gzzx[i]) + + uxy*(uxy*gxxy[i]+uyy*gxyy[i]+uyz*gxzy[i]) + + uyy*(uxy*gxyy[i]+uyy*gyyy[i]+uyz*gyzy[i]) + + uyz*(uxy*gxzy[i]+uyy*gyzy[i]+uyz*gzzy[i]) + + uxy*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uyy*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uyz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + + Gmz_Res[i] = Gamz[i] - ( + uxx*(uxz*gxxx[i]+uyz*gxyx[i]+uzz*gxzx[i]) + + uxy*(uxz*gxyx[i]+uyz*gyyx[i]+uzz*gyzx[i]) + + uxz*(uxz*gxzx[i]+uyz*gyzx[i]+uzz*gzzx[i]) + + uxy*(uxz*gxxy[i]+uyz*gxyy[i]+uzz*gxzy[i]) + + uyy*(uxz*gxyy[i]+uyz*gyyy[i]+uzz*gyzy[i]) + + uyz*(uxz*gxzy[i]+uyz*gyzy[i]+uzz*gzzy[i]) + + uxz*(uxz*gxxz[i]+uyz*gxyz[i]+uzz*gxzz[i]) + + uyz*(uxz*gxyz[i]+uyz*gyyz[i]+uzz*gyzz[i]) + + uzz*(uxz*gxzz[i]+uyz*gyzz[i]+uzz*gzzz[i])); + } +} + +/* Phase 4: 18 Christoffel symbols */ +__global__ __launch_bounds__(128, 4) +void kern_phase4_christoffel( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ gxxx, const double* __restrict__ gxyx, + const double* __restrict__ gxzx, const double* __restrict__ gyyx, + const double* __restrict__ gyzx, const double* __restrict__ gzzx, + const double* __restrict__ gxxy, const double* __restrict__ gxyy, + const double* __restrict__ gxzy, const double* __restrict__ gyyy, + const double* __restrict__ gyzy, const double* __restrict__ gzzy, + const double* __restrict__ gxxz, const double* __restrict__ gxyz, + const double* __restrict__ gxzz, const double* __restrict__ gyyz, + const double* __restrict__ gyzz, const double* __restrict__ gzzz, + double* __restrict__ Gxxx, double* __restrict__ Gxxy, double* __restrict__ Gxxz, + double* __restrict__ Gxyy, double* __restrict__ Gxyz, double* __restrict__ Gxzz, + double* __restrict__ Gyxx, double* __restrict__ Gyxy, double* __restrict__ Gyxz, + double* __restrict__ Gyyy, double* __restrict__ Gyyz, double* __restrict__ Gyzz, + double* __restrict__ Gzxx, double* __restrict__ Gzxy, double* __restrict__ Gzxz, + double* __restrict__ Gzyy, double* __restrict__ Gzyz, double* __restrict__ Gzzz_o) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* Gamma^x_{xx} */ + Gxxx[i]=H*(uxx*gxxx[i]+uxy*(TWO*gxyx[i]-gxxy[i])+uxz*(TWO*gxzx[i]-gxxz[i])); + Gyxx[i]=H*(uxy*gxxx[i]+uyy*(TWO*gxyx[i]-gxxy[i])+uyz*(TWO*gxzx[i]-gxxz[i])); + Gzxx[i]=H*(uxz*gxxx[i]+uyz*(TWO*gxyx[i]-gxxy[i])+uzz*(TWO*gxzx[i]-gxxz[i])); + /* yy */ + Gxyy[i]=H*(uxx*(TWO*gxyy[i]-gyyx[i])+uxy*gyyy[i]+uxz*(TWO*gyzy[i]-gyyz[i])); + Gyyy[i]=H*(uxy*(TWO*gxyy[i]-gyyx[i])+uyy*gyyy[i]+uyz*(TWO*gyzy[i]-gyyz[i])); + Gzyy[i]=H*(uxz*(TWO*gxyy[i]-gyyx[i])+uyz*gyyy[i]+uzz*(TWO*gyzy[i]-gyyz[i])); + /* zz */ + Gxzz[i]=H*(uxx*(TWO*gxzz[i]-gzzx[i])+uxy*(TWO*gyzz[i]-gzzy[i])+uxz*gzzz[i]); + Gyzz[i]=H*(uxy*(TWO*gxzz[i]-gzzx[i])+uyy*(TWO*gyzz[i]-gzzy[i])+uyz*gzzz[i]); + Gzzz_o[i]=H*(uxz*(TWO*gxzz[i]-gzzx[i])+uyz*(TWO*gyzz[i]-gzzy[i])+uzz*gzzz[i]); + /* xy */ + Gxxy[i]=H*(uxx*gxxy[i]+uxy*gyyx[i]+uxz*(gxzy[i]+gyzx[i]-gxyz[i])); + Gyxy[i]=H*(uxy*gxxy[i]+uyy*gyyx[i]+uyz*(gxzy[i]+gyzx[i]-gxyz[i])); + Gzxy[i]=H*(uxz*gxxy[i]+uyz*gyyx[i]+uzz*(gxzy[i]+gyzx[i]-gxyz[i])); + /* xz */ + Gxxz[i]=H*(uxx*gxxz[i]+uxy*(gxyz[i]+gyzx[i]-gxzy[i])+uxz*gzzx[i]); + Gyxz[i]=H*(uxy*gxxz[i]+uyy*(gxyz[i]+gyzx[i]-gxzy[i])+uyz*gzzx[i]); + Gzxz[i]=H*(uxz*gxxz[i]+uyz*(gxyz[i]+gyzx[i]-gxzy[i])+uzz*gzzx[i]); + /* yz */ + Gxyz[i]=H*(uxx*(gxyz[i]+gxzy[i]-gyzx[i])+uxy*gyyz[i]+uxz*gzzy[i]); + Gyyz[i]=H*(uxy*(gxyz[i]+gxzy[i]-gyzx[i])+uyy*gyyz[i]+uyz*gzzy[i]); + Gzyz[i]=H*(uxz*(gxyz[i]+gxzy[i]-gyzx[i])+uyz*gyyz[i]+uzz*gzzy[i]); + } +} + +/* Phase 5: A^ij = gup^ia gup^jb A_ab (stored temporarily in Rxx..Rzz) */ +__global__ void kern_phase5_raise_A( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + double* __restrict__ Rxx, double* __restrict__ Rxy, double* __restrict__ Rxz, + double* __restrict__ Ryy, double* __restrict__ Ryz, double* __restrict__ Rzz) +{ + const double TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + Rxx[i]=uxx*uxx*Axx[i]+uxy*uxy*Ayy[i]+uxz*uxz*Azz[i] + +TWO*(uxx*uxy*Axy[i]+uxx*uxz*Axz[i]+uxy*uxz*Ayz[i]); + Ryy[i]=uxy*uxy*Axx[i]+uyy*uyy*Ayy[i]+uyz*uyz*Azz[i] + +TWO*(uxy*uyy*Axy[i]+uxy*uyz*Axz[i]+uyy*uyz*Ayz[i]); + Rzz[i]=uxz*uxz*Axx[i]+uyz*uyz*Ayy[i]+uzz*uzz*Azz[i] + +TWO*(uxz*uyz*Axy[i]+uxz*uzz*Axz[i]+uyz*uzz*Ayz[i]); + Rxy[i]=uxx*uxy*Axx[i]+uxy*uyy*Ayy[i]+uxz*uyz*Azz[i] + +(uxx*uyy+uxy*uxy)*Axy[i]+(uxx*uyz+uxz*uxy)*Axz[i]+(uxy*uyz+uxz*uyy)*Ayz[i]; + Rxz[i]=uxx*uxz*Axx[i]+uxy*uyz*Ayy[i]+uxz*uzz*Azz[i] + +(uxx*uyz+uxy*uxz)*Axy[i]+(uxx*uzz+uxz*uxz)*Axz[i]+(uxy*uzz+uxz*uyz)*Ayz[i]; + Ryz[i]=uxy*uxz*Axx[i]+uyy*uyz*Ayy[i]+uyz*uzz*Azz[i] + +(uxy*uyz+uyy*uxz)*Axy[i]+(uxy*uzz+uyz*uxz)*Axz[i]+(uyy*uzz+uyz*uyz)*Ayz[i]; + } +} + +/* Phase 6: Gamma_rhs part 1 (before fdderivs(beta) and fderivs(Gamma)) */ +__global__ __launch_bounds__(128, 4) +void kern_phase6_gamma_rhs_part1( + const double* __restrict__ Lapx, const double* __restrict__ Lapy, + const double* __restrict__ Lapz, + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Kx, const double* __restrict__ Ky, + const double* __restrict__ Kz, + const double* __restrict__ Sx, const double* __restrict__ Sy, + const double* __restrict__ Sz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs) +{ + const double TWO=2.0, F3o2=1.5, F2o3=2.0/3.0, EIGHT=8.0; + const double PI_V = 3.14159265358979323846; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; + double a=alpn1[i], c1=chin1[i]; + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + + Gamx_rhs[i] = -TWO*(lx*Rxx[i]+ly*Rxy[i]+lz*Rxz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxx[i]+cy*Rxy[i]+cz*Rxz[i]) + -uxx*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uxy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uxz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gxxx[i]*Rxx[i]+Gxyy[i]*Ryy[i]+Gxzz[i]*Rzz[i] + +TWO*(Gxxy[i]*Rxy[i]+Gxxz[i]*Rxz[i]+Gxyz[i]*Ryz[i])); + + Gamy_rhs[i] = -TWO*(lx*Rxy[i]+ly*Ryy[i]+lz*Ryz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxy[i]+cy*Ryy[i]+cz*Ryz[i]) + -uxy*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uyy*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uyz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gyxx[i]*Rxx[i]+Gyyy[i]*Ryy[i]+Gyzz[i]*Rzz[i] + +TWO*(Gyxy[i]*Rxy[i]+Gyxz[i]*Rxz[i]+Gyyz[i]*Ryz[i])); + + Gamz_rhs[i] = -TWO*(lx*Rxz[i]+ly*Ryz[i]+lz*Rzz[i]) + + TWO*a*( + -F3o2/c1*(cx*Rxz[i]+cy*Ryz[i]+cz*Rzz[i]) + -uxz*(F2o3*Kx[i]+EIGHT*PI_V*Sx[i]) + -uyz*(F2o3*Ky[i]+EIGHT*PI_V*Sy[i]) + -uzz*(F2o3*Kz[i]+EIGHT*PI_V*Sz[i]) + +Gzxx[i]*Rxx[i]+Gzyy[i]*Ryy[i]+Gzzz[i]*Rzz[i] + +TWO*(Gzxy[i]*Rxy[i]+Gzxz[i]*Rxz[i]+Gzyz[i]*Ryz[i])); + } +} + +/* Phase 8: Gamma_rhs part 2 — after fdderivs(beta) and fderivs(Gamma) + * Computes: fxx=div(beta_xx), Gamxa, then updates Gamx_rhs etc. + * Input arrays gxxx..gzzz here hold fdderivs(beta) results, + * Gamxx..Gamzz hold fderivs(Gamma) results. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase8_gamma_rhs_part2( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + /* fdderivs(betax) -> gxxx,gxyx,gxzx,gyyx,gyzx,gzzx */ + const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy, + const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy, + const double* __restrict__ bxx_yz, const double* __restrict__ bxx_zz, + /* fdderivs(betay) -> gxxy,gxyy,gxzy,gyyy,gyzy,gzzy */ + const double* __restrict__ bxy_xx, const double* __restrict__ bxy_xy, + const double* __restrict__ bxy_xz, const double* __restrict__ bxy_yy, + const double* __restrict__ bxy_yz, const double* __restrict__ bxy_zz, + /* fdderivs(betaz) -> gxxz,gxyz,gxzz,gyyz,gyzz,gzzz */ + const double* __restrict__ bxz_xx, const double* __restrict__ bxz_xy, + const double* __restrict__ bxz_xz, const double* __restrict__ bxz_yy, + const double* __restrict__ bxz_yz, const double* __restrict__ bxz_zz, + /* fderivs(Gamx) -> Gamxx,Gamxy,Gamxz */ + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + /* fderivs(Gamy) -> Gamyx,Gamyy,Gamyz */ + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + /* fderivs(Gamz) -> Gamzx,Gamzy,Gamzz */ + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + /* Christoffel symbols */ + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* betaij first derivs */ + const double* __restrict__ betaxx, const double* __restrict__ betaxy, + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ betazx, const double* __restrict__ betazy, + const double* __restrict__ betazz, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs, + double* __restrict__ Gamxa_out, double* __restrict__ Gamya_out, + double* __restrict__ Gamza_out) +{ + const double TWO=2.0, F2o3=2.0/3.0, F1o3=1.0/3.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* div(beta_second_derivs) */ + double fxx_v = bxx_xx[i]+bxy_xy[i]+bxz_xz[i]; + double fxy_v = bxx_xy[i]+bxy_yy[i]+bxz_yz[i]; + double fxz_v = bxx_xz[i]+bxy_yz[i]+bxz_zz[i]; + /* Gamma^a contracted */ + double Ga_x = uxx*Gxxx[i]+uyy*Gxyy[i]+uzz*Gxzz[i] + +TWO*(uxy*Gxxy[i]+uxz*Gxxz[i]+uyz*Gxyz[i]); + double Ga_y = uxx*Gyxx[i]+uyy*Gyyy[i]+uzz*Gyzz[i] + +TWO*(uxy*Gyxy[i]+uxz*Gyxz[i]+uyz*Gyyz[i]); + double Ga_z = uxx*Gzxx[i]+uyy*Gzyy[i]+uzz*Gzzz[i] + +TWO*(uxy*Gzxy[i]+uxz*Gzxz[i]+uyz*Gzyz[i]); + Gamxa_out[i]=Ga_x; Gamya_out[i]=Ga_y; Gamza_out[i]=Ga_z; + double db = betaxx[i] + betayy[i] + betazz[i]; + Gamx_rhs[i] += F2o3*Ga_x*db + - Ga_x*betaxx[i] - Ga_y*betaxy[i] - Ga_z*betaxz[i] + + F1o3*(uxx*fxx_v+uxy*fxy_v+uxz*fxz_v) + + uxx*bxx_xx[i]+uyy*bxx_yy[i]+uzz*bxx_zz[i] + + TWO*(uxy*bxx_xy[i]+uxz*bxx_xz[i]+uyz*bxx_yz[i]); + Gamy_rhs[i] += F2o3*Ga_y*db + - Ga_x*betayx[i] - Ga_y*betayy[i] - Ga_z*betayz[i] + + F1o3*(uxy*fxx_v+uyy*fxy_v+uyz*fxz_v) + + uxx*bxy_xx[i]+uyy*bxy_yy[i]+uzz*bxy_zz[i] + + TWO*(uxy*bxy_xy[i]+uxz*bxy_xz[i]+uyz*bxy_yz[i]); + Gamz_rhs[i] += F2o3*Ga_z*db + - Ga_x*betazx[i] - Ga_y*betazy[i] - Ga_z*betazz[i] + + F1o3*(uxz*fxx_v+uyz*fxy_v+uzz*fxz_v) + + uxx*bxz_xx[i]+uyy*bxz_yy[i]+uzz*bxz_zz[i] + + TWO*(uxy*bxz_xy[i]+uxz*bxz_xz[i]+uyz*bxz_yz[i]); + } +} + +/* Phase 9: Christoffel contract — compute g_{ia} Gamma^a_{bc} products + * Overwrites gxxx..gzzz with lowered Christoffel products needed for Ricci. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase9_christoffel_contract( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* output: lowered products g_{ia} Gamma^a_{bc} */ + double* __restrict__ o_gxxx, double* __restrict__ o_gxyx, + double* __restrict__ o_gxzx, double* __restrict__ o_gyyx, + double* __restrict__ o_gyzx, double* __restrict__ o_gzzx, + double* __restrict__ o_gxxy, double* __restrict__ o_gxyy, + double* __restrict__ o_gxzy, double* __restrict__ o_gyyy, + double* __restrict__ o_gyzy, double* __restrict__ o_gzzy, + double* __restrict__ o_gxxz, double* __restrict__ o_gxyz, + double* __restrict__ o_gxzz, double* __restrict__ o_gyyz, + double* __restrict__ o_gyzz, double* __restrict__ o_gzzz) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double g11=gxx[i],g12=gxy[i],g13=gxz[i]; + double g22=gyy[i],g23=gyz[i],g33=gzz[i]; + /* row x: g_{x,a} Gamma^a_{bc} */ + o_gxxx[i]=g11*Gxxx[i]+g12*Gyxx[i]+g13*Gzxx[i]; + o_gxyx[i]=g11*Gxxy[i]+g12*Gyxy[i]+g13*Gzxy[i]; + o_gxzx[i]=g11*Gxxz[i]+g12*Gyxz[i]+g13*Gzxz[i]; + o_gyyx[i]=g11*Gxyy[i]+g12*Gyyy[i]+g13*Gzyy[i]; + o_gyzx[i]=g11*Gxyz[i]+g12*Gyyz[i]+g13*Gzyz[i]; + o_gzzx[i]=g11*Gxzz[i]+g12*Gyzz[i]+g13*Gzzz[i]; + /* row y: g_{y,a} Gamma^a_{bc} */ + o_gxxy[i]=g12*Gxxx[i]+g22*Gyxx[i]+g23*Gzxx[i]; + o_gxyy[i]=g12*Gxxy[i]+g22*Gyxy[i]+g23*Gzxy[i]; + o_gxzy[i]=g12*Gxxz[i]+g22*Gyxz[i]+g23*Gzxz[i]; + o_gyyy[i]=g12*Gxyy[i]+g22*Gyyy[i]+g23*Gzyy[i]; + o_gyzy[i]=g12*Gxyz[i]+g22*Gyyz[i]+g23*Gzyz[i]; + o_gzzy[i]=g12*Gxzz[i]+g22*Gyzz[i]+g23*Gzzz[i]; + /* row z: g_{z,a} Gamma^a_{bc} */ + o_gxxz[i]=g13*Gxxx[i]+g23*Gyxx[i]+g33*Gzxx[i]; + o_gxyz[i]=g13*Gxxy[i]+g23*Gyxy[i]+g33*Gzxy[i]; + o_gxzz[i]=g13*Gxxz[i]+g23*Gyxz[i]+g33*Gzxz[i]; + o_gyyz[i]=g13*Gxyy[i]+g23*Gyyy[i]+g33*Gzyy[i]; + o_gyzz[i]=g13*Gxyz[i]+g23*Gyyz[i]+g33*Gzyz[i]; + o_gzzz[i]=g13*Gxzz[i]+g23*Gyzz[i]+g33*Gzzz[i]; + } +} + +/* Phase 10: After fdderivs of a metric component, contract with gup^{ij} + * R_comp = gup^xx*fxx + gup^yy*fyy + gup^zz*fzz + 2*(gup^xy*fxy + gup^xz*fxz + gup^yz*fyz) + */ +__global__ void kern_phase10_ricci_contract( + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ fxx, const double* __restrict__ fxy, + const double* __restrict__ fxz, const double* __restrict__ fyy, + const double* __restrict__ fyz, const double* __restrict__ fzz, + double* __restrict__ R_comp) +{ + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + R_comp[i] = gupxx[i]*fxx[i] + gupyy[i]*fyy[i] + gupzz[i]*fzz[i] + + 2.0*(gupxy[i]*fxy[i] + gupxz[i]*fxz[i] + gupyz[i]*fyz[i]); + } +} + +/* Phase 11a: Ricci diagonal assembly (Rxx, Ryy, Rzz) */ +__global__ __launch_bounds__(128, 4) +void kern_phase11_ricci_diag( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gamxa, const double* __restrict__ Gamya, + const double* __restrict__ Gamza, + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* lowered Christoffel products */ + const double* __restrict__ lxxx, const double* __restrict__ lxyx, + const double* __restrict__ lxzx, const double* __restrict__ lyyx, + const double* __restrict__ lyzx, const double* __restrict__ lzzx, + const double* __restrict__ lxxy, const double* __restrict__ lxyy, + const double* __restrict__ lxzy, const double* __restrict__ lyyy, + const double* __restrict__ lyzy, const double* __restrict__ lzzy, + const double* __restrict__ lxxz, const double* __restrict__ lxyz, + const double* __restrict__ lxzz, const double* __restrict__ lyyz, + const double* __restrict__ lyzz, const double* __restrict__ lzzz, + double* __restrict__ Rxx, double* __restrict__ Ryy, double* __restrict__ Rzz) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + /* Rxx */ + Rxx[i] = -H*Rxx[i] + + gxx[i]*Gamxx[i]+gxy[i]*Gamyx[i]+gxz[i]*Gamzx[i] + + Gamxa[i]*lxxx[i]+Gamya[i]*lxyx[i]+Gamza[i]*lxzx[i] + + uxx*(TWO*(Gxxx[i]*lxxx[i]+Gyxx[i]*lxyx[i]+Gzxx[i]*lxzx[i]) + +(Gxxx[i]*lxxx[i]+Gyxx[i]*lxxy[i]+Gzxx[i]*lxxz[i])) + + uxy*(TWO*(Gxxx[i]*lxyx[i]+Gyxx[i]*lyyx[i]+Gzxx[i]*lyzx[i] + +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i]) + +(Gxxy[i]*lxxx[i]+Gyxy[i]*lxxy[i]+Gzxy[i]*lxxz[i]) + +(Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i])) + + uxz*(TWO*(Gxxx[i]*lxzx[i]+Gyxx[i]*lyzx[i]+Gzxx[i]*lzzx[i] + +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i]) + +(Gxxz[i]*lxxx[i]+Gyxz[i]*lxxy[i]+Gzxz[i]*lxxz[i]) + +(Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i])) + + uyy*(TWO*(Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i]) + +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) + + uyz*(TWO*(Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i]) + +(Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i]) + +(Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i])) + + uzz*(TWO*(Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i]) + +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])); + + /* Ryy */ + Ryy[i] = -H*Ryy[i] + + gxy[i]*Gamxy[i]+gyy[i]*Gamyy_d[i]+gyz[i]*Gamzy[i] + + Gamxa[i]*lxyy[i]+Gamya[i]*lyyy[i]+Gamza[i]*lyzy[i] + + uxx*(TWO*(Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i]) + +(Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i])) + + uxy*(TWO*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] + +Gxyy[i]*lxxy[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxzy[i]) + +(Gxyy[i]*lxyx[i]+Gyyy[i]*lxyy[i]+Gzyy[i]*lxyz[i]) + +(Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i])) + + uxz*(TWO*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] + +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i]) + +(Gxyz[i]*lxyx[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxyz[i]) + +(Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i])) + + uyy*(TWO*(Gxyy[i]*lxyy[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyzy[i]) + +(Gxyy[i]*lyyx[i]+Gyyy[i]*lyyy[i]+Gzyy[i]*lyyz[i])) + + uyz*(TWO*(Gxyy[i]*lxzy[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lzzy[i] + +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i]) + +(Gxyz[i]*lyyx[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyyz[i]) + +(Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i])) + + uzz*(TWO*(Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i]) + +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])); + + /* Rzz */ + Rzz[i] = -H*Rzz[i] + + gxz[i]*Gamxz[i]+gyz[i]*Gamyz_d[i]+gzz[i]*Gamzz_d[i] + + Gamxa[i]*lxzz[i]+Gamya[i]*lyzz[i]+Gamza[i]*lzzz[i] + + uxx*(TWO*(Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i]) + +(Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i])) + + uxy*(TWO*(Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] + +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i]) + +(Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i]) + +(Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i])) + + uxz*(TWO*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] + +Gxzz[i]*lxxz[i]+Gyzz[i]*lxyz[i]+Gzzz[i]*lxzz[i]) + +(Gxzz[i]*lxzx[i]+Gyzz[i]*lxzy[i]+Gzzz[i]*lxzz[i]) + +(Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i])) + + uyy*(TWO*(Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i]) + +(Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i])) + + uyz*(TWO*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] + +Gxzz[i]*lxyz[i]+Gyzz[i]*lyyz[i]+Gzzz[i]*lyzz[i]) + +(Gxzz[i]*lyzx[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lyzz[i]) + +(Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i])) + + uzz*(TWO*(Gxzz[i]*lxzz[i]+Gyzz[i]*lyzz[i]+Gzzz[i]*lzzz[i]) + +(Gxzz[i]*lzzx[i]+Gyzz[i]*lzzy[i]+Gzzz[i]*lzzz[i])); + } +} + +/* Phase 11b: Ricci off-diagonal assembly (Rxy, Rxz, Ryz) */ +__global__ __launch_bounds__(128, 4) +void kern_phase11_ricci_offdiag( + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gamxa, const double* __restrict__ Gamya, + const double* __restrict__ Gamza, + const double* __restrict__ Gamxx, const double* __restrict__ Gamxy, + const double* __restrict__ Gamxz, + const double* __restrict__ Gamyx, const double* __restrict__ Gamyy_d, + const double* __restrict__ Gamyz_d, + const double* __restrict__ Gamzx, const double* __restrict__ Gamzy, + const double* __restrict__ Gamzz_d, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + const double* __restrict__ lxxx, const double* __restrict__ lxyx, + const double* __restrict__ lxzx, const double* __restrict__ lyyx, + const double* __restrict__ lyzx, const double* __restrict__ lzzx, + const double* __restrict__ lxxy, const double* __restrict__ lxyy, + const double* __restrict__ lxzy, const double* __restrict__ lyyy, + const double* __restrict__ lyzy, const double* __restrict__ lzzy, + const double* __restrict__ lxxz, const double* __restrict__ lxyz, + const double* __restrict__ lxzz, const double* __restrict__ lyyz, + const double* __restrict__ lyzz, const double* __restrict__ lzzz, + double* __restrict__ Rxy, double* __restrict__ Rxz, double* __restrict__ Ryz) +{ + const double H = 0.5, TWO = 2.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + + /* Rxy */ + Rxy[i] = H*( + -Rxy[i] + +gxx[i]*Gamxy[i]+gxy[i]*Gamyy_d[i]+gxz[i]*Gamzy[i] + +gxy[i]*Gamxx[i]+gyy[i]*Gamyx[i]+gyz[i]*Gamzx[i] + +Gamxa[i]*lxyx[i]+Gamya[i]*lyyx[i]+Gamza[i]*lyzx[i] + +Gamxa[i]*lxxy[i]+Gamya[i]*lxyy[i]+Gamza[i]*lxzy[i]) + +uxx*(Gxxx[i]*lxxy[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxzy[i] + +Gxxy[i]*lxxx[i]+Gyxy[i]*lxyx[i]+Gzxy[i]*lxzx[i] + +Gxxx[i]*lxyx[i]+Gyxx[i]*lxyy[i]+Gzxx[i]*lxyz[i]) + +uxy*(Gxxx[i]*lxyy[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyzy[i] + +Gxxy[i]*lxyx[i]+Gyxy[i]*lyyx[i]+Gzxy[i]*lyzx[i] + +Gxxy[i]*lxyx[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxyz[i] + +Gxxy[i]*lxxy[i]+Gyxy[i]*lxyy[i]+Gzxy[i]*lxzy[i] + +Gxyy[i]*lxxx[i]+Gyyy[i]*lxyx[i]+Gzyy[i]*lxzx[i] + +Gxxx[i]*lyyx[i]+Gyxx[i]*lyyy[i]+Gzxx[i]*lyyz[i]) + +uxz*(Gxxx[i]*lxzy[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lzzy[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lyzx[i]+Gzxy[i]*lzzx[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxyz[i] + +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] + +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] + +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) + +uyy*(Gxxy[i]*lxyy[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyzy[i] + +Gxyy[i]*lxyx[i]+Gyyy[i]*lyyx[i]+Gzyy[i]*lyzx[i] + +Gxxy[i]*lyyx[i]+Gyxy[i]*lyyy[i]+Gzxy[i]*lyyz[i]) + +uyz*(Gxxy[i]*lxzy[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lzzy[i] + +Gxyy[i]*lxzx[i]+Gyyy[i]*lyzx[i]+Gzyy[i]*lzzx[i] + +Gxxz[i]*lyyx[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyyz[i] + +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] + +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uzz*(Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] + +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i]); + + /* Rxz */ + Rxz[i] = H*( + -Rxz[i] + +gxx[i]*Gamxz[i]+gxy[i]*Gamyz_d[i]+gxz[i]*Gamzz_d[i] + +gxz[i]*Gamxx[i]+gyz[i]*Gamyx[i]+gzz[i]*Gamzx[i] + +Gamxa[i]*lxzx[i]+Gamya[i]*lyzx[i]+Gamza[i]*lzzx[i] + +Gamxa[i]*lxxz[i]+Gamya[i]*lxyz[i]+Gamza[i]*lxzz[i]) + +uxx*(Gxxx[i]*lxxz[i]+Gyxx[i]*lxyz[i]+Gzxx[i]*lxzz[i] + +Gxxz[i]*lxxx[i]+Gyxz[i]*lxyx[i]+Gzxz[i]*lxzx[i] + +Gxxx[i]*lxzx[i]+Gyxx[i]*lxzy[i]+Gzxx[i]*lxzz[i]) + +uxy*(Gxxx[i]*lxyz[i]+Gyxx[i]*lyyz[i]+Gzxx[i]*lyzz[i] + +Gxxz[i]*lxyx[i]+Gyxz[i]*lyyx[i]+Gzxz[i]*lyzx[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i] + +Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] + +Gxyz[i]*lxxx[i]+Gyyz[i]*lxyx[i]+Gzyz[i]*lxzx[i] + +Gxxx[i]*lyzx[i]+Gyxx[i]*lyzy[i]+Gzxx[i]*lyzz[i]) + +uxz*(Gxxx[i]*lxzz[i]+Gyxx[i]*lyzz[i]+Gzxx[i]*lzzz[i] + +Gxxz[i]*lxzx[i]+Gyxz[i]*lyzx[i]+Gzxz[i]*lzzx[i] + +Gxxz[i]*lxzx[i]+Gyxz[i]*lxzy[i]+Gzxz[i]*lxzz[i] + +Gxxz[i]*lxxz[i]+Gyxz[i]*lxyz[i]+Gzxz[i]*lxzz[i] + +Gxzz[i]*lxxx[i]+Gyzz[i]*lxyx[i]+Gzzz[i]*lxzx[i] + +Gxxx[i]*lzzx[i]+Gyxx[i]*lzzy[i]+Gzxx[i]*lzzz[i]) + +uyy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] + +Gxyz[i]*lxyx[i]+Gyyz[i]*lyyx[i]+Gzyz[i]*lyzx[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uyz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lyzx[i]+Gzyz[i]*lzzx[i] + +Gxxz[i]*lyzx[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lyzz[i] + +Gxxz[i]*lxyz[i]+Gyxz[i]*lyyz[i]+Gzxz[i]*lyzz[i] + +Gxzz[i]*lxyx[i]+Gyzz[i]*lyyx[i]+Gzzz[i]*lyzx[i] + +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) + +uzz*(Gxxz[i]*lxzz[i]+Gyxz[i]*lyzz[i]+Gzxz[i]*lzzz[i] + +Gxzz[i]*lxzx[i]+Gyzz[i]*lyzx[i]+Gzzz[i]*lzzx[i] + +Gxxz[i]*lzzx[i]+Gyxz[i]*lzzy[i]+Gzxz[i]*lzzz[i]); + + /* Ryz */ + Ryz[i] = H*( + -Ryz[i] + +gxy[i]*Gamxz[i]+gyy[i]*Gamyz_d[i]+gyz[i]*Gamzz_d[i] + +gxz[i]*Gamxy[i]+gyz[i]*Gamyy_d[i]+gzz[i]*Gamzy[i] + +Gamxa[i]*lxzy[i]+Gamya[i]*lyzy[i]+Gamza[i]*lzzy[i] + +Gamxa[i]*lxyz[i]+Gamya[i]*lyyz[i]+Gamza[i]*lyzz[i]) + +uxx*(Gxxy[i]*lxxz[i]+Gyxy[i]*lxyz[i]+Gzxy[i]*lxzz[i] + +Gxxz[i]*lxxy[i]+Gyxz[i]*lxyy[i]+Gzxz[i]*lxzy[i] + +Gxxy[i]*lxzx[i]+Gyxy[i]*lxzy[i]+Gzxy[i]*lxzz[i]) + +uxy*(Gxxy[i]*lxyz[i]+Gyxy[i]*lyyz[i]+Gzxy[i]*lyzz[i] + +Gxxz[i]*lxyy[i]+Gyxz[i]*lyyy[i]+Gzxz[i]*lyzy[i] + +Gxyy[i]*lxzx[i]+Gyyy[i]*lxzy[i]+Gzyy[i]*lxzz[i] + +Gxyy[i]*lxxz[i]+Gyyy[i]*lxyz[i]+Gzyy[i]*lxzz[i] + +Gxyz[i]*lxxy[i]+Gyyz[i]*lxyy[i]+Gzyz[i]*lxzy[i] + +Gxxy[i]*lyzx[i]+Gyxy[i]*lyzy[i]+Gzxy[i]*lyzz[i]) + +uxz*(Gxxy[i]*lxzz[i]+Gyxy[i]*lyzz[i]+Gzxy[i]*lzzz[i] + +Gxxz[i]*lxzy[i]+Gyxz[i]*lyzy[i]+Gzxz[i]*lzzy[i] + +Gxyz[i]*lxzx[i]+Gyyz[i]*lxzy[i]+Gzyz[i]*lxzz[i] + +Gxyz[i]*lxxz[i]+Gyyz[i]*lxyz[i]+Gzyz[i]*lxzz[i] + +Gxzz[i]*lxxy[i]+Gyzz[i]*lxyy[i]+Gzzz[i]*lxzy[i] + +Gxxy[i]*lzzx[i]+Gyxy[i]*lzzy[i]+Gzxy[i]*lzzz[i]) + +uyy*(Gxyy[i]*lxyz[i]+Gyyy[i]*lyyz[i]+Gzyy[i]*lyzz[i] + +Gxyz[i]*lxyy[i]+Gyyz[i]*lyyy[i]+Gzyz[i]*lyzy[i] + +Gxyy[i]*lyzx[i]+Gyyy[i]*lyzy[i]+Gzyy[i]*lyzz[i]) + +uyz*(Gxyy[i]*lxzz[i]+Gyyy[i]*lyzz[i]+Gzyy[i]*lzzz[i] + +Gxyz[i]*lxzy[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lzzy[i] + +Gxyz[i]*lyzx[i]+Gyyz[i]*lyzy[i]+Gzyz[i]*lyzz[i] + +Gxyz[i]*lxyz[i]+Gyyz[i]*lyyz[i]+Gzyz[i]*lyzz[i] + +Gxzz[i]*lxyy[i]+Gyzz[i]*lyyy[i]+Gzzz[i]*lyzy[i] + +Gxyy[i]*lzzx[i]+Gyyy[i]*lzzy[i]+Gzyy[i]*lzzz[i]) + +uzz*(Gxyz[i]*lxzz[i]+Gyyz[i]*lyzz[i]+Gzyz[i]*lzzz[i] + +Gxzz[i]*lxzy[i]+Gyzz[i]*lyzy[i]+Gzzz[i]*lzzy[i] + +Gxyz[i]*lzzx[i]+Gyyz[i]*lzzy[i]+Gzyz[i]*lzzz[i]); + } +} + +/* Phase 13: chi correction to Ricci tensor + * After fdderivs(chi), subtract Christoffel*chi_deriv, compute conformal factor f, + * then add chi contribution to Rxx..Rzz. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase13_chi_correction( + const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + double* __restrict__ fxx, double* __restrict__ fxy, + double* __restrict__ fxz, double* __restrict__ fyy, + double* __restrict__ fyz, double* __restrict__ fzz, + double* __restrict__ Rxx, double* __restrict__ Rxy, + double* __restrict__ Rxz, double* __restrict__ Ryy, + double* __restrict__ Ryz, double* __restrict__ Rzz) +{ + const double H=0.5, TWO=2.0, F3o2=1.5; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double cx=chix[i],cy=chiy[i],cz=chiz[i],c1=chin1[i]; + /* subtract Christoffel * chi_deriv */ + fxx[i] -= Gxxx[i]*cx+Gyxx[i]*cy+Gzxx[i]*cz; + fxy[i] -= Gxxy[i]*cx+Gyxy[i]*cy+Gzxy[i]*cz; + fxz[i] -= Gxxz[i]*cx+Gyxz[i]*cy+Gzxz[i]*cz; + fyy[i] -= Gxyy[i]*cx+Gyyy[i]*cy+Gzyy[i]*cz; + fyz[i] -= Gxyz[i]*cx+Gyyz[i]*cy+Gzyz[i]*cz; + fzz[i] -= Gxzz[i]*cx+Gyzz[i]*cy+Gzzz[i]*cz; + + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double f_val = uxx*(fxx[i]-F3o2/c1*cx*cx) + + uyy*(fyy[i]-F3o2/c1*cy*cy) + + uzz*(fzz[i]-F3o2/c1*cz*cz) + + TWO*uxy*(fxy[i]-F3o2/c1*cx*cy) + + TWO*uxz*(fxz[i]-F3o2/c1*cx*cz) + + TWO*uyz*(fyz[i]-F3o2/c1*cy*cz); + + double inv2c = 1.0/(c1*TWO); + Rxx[i] += (fxx[i]-cx*cx*inv2c+gxx[i]*f_val)*inv2c; + Ryy[i] += (fyy[i]-cy*cy*inv2c+gyy[i]*f_val)*inv2c; + Rzz[i] += (fzz[i]-cz*cz*inv2c+gzz[i]*f_val)*inv2c; + Rxy[i] += (fxy[i]-cx*cy*inv2c+gxy[i]*f_val)*inv2c; + Rxz[i] += (fxz[i]-cx*cz*inv2c+gxz[i]*f_val)*inv2c; + Ryz[i] += (fyz[i]-cy*cz*inv2c+gyz[i]*f_val)*inv2c; + } +} + +/* Phase 15: trK_rhs, Aij_rhs, gauge (after fdderivs(Lap) and fderivs(chi)) + * Also updates Christoffel with physical chi correction, computes Lap_rhs, beta_rhs, dtSf_rhs. + */ +__global__ __launch_bounds__(128, 4) +void kern_phase15_trK_Aij_gauge( + const double* __restrict__ alpn1, const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gxx, const double* __restrict__ gxy, + const double* __restrict__ gxz, const double* __restrict__ gyy, + const double* __restrict__ gyz, const double* __restrict__ gzz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ trK, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + const double* __restrict__ Lapx, const double* __restrict__ Lapy, + const double* __restrict__ Lapz, + const double* __restrict__ betaxx, const double* __restrict__ betaxy, + const double* __restrict__ betaxz, const double* __restrict__ betayx, + const double* __restrict__ betayy, const double* __restrict__ betayz, + const double* __restrict__ betazx, const double* __restrict__ betazy, + const double* __restrict__ betazz, + const double* __restrict__ rho, + const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, + const double* __restrict__ Sz_m, + const double* __restrict__ Sxx_m, const double* __restrict__ Sxy_m, + const double* __restrict__ Sxz_m, const double* __restrict__ Syy_m, + const double* __restrict__ Syz_m, const double* __restrict__ Szz_m, + const double* __restrict__ dtSfx, const double* __restrict__ dtSfy, + const double* __restrict__ dtSfz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + double* __restrict__ Gxxx, double* __restrict__ Gxxy, + double* __restrict__ Gxxz, double* __restrict__ Gxyy, + double* __restrict__ Gxyz_o, double* __restrict__ Gxzz, + double* __restrict__ Gyxx, double* __restrict__ Gyxy, + double* __restrict__ Gyxz, double* __restrict__ Gyyy, + double* __restrict__ Gyyz, double* __restrict__ Gyzz, + double* __restrict__ Gzxx, double* __restrict__ Gzxy, + double* __restrict__ Gzxz, double* __restrict__ Gzyy, + double* __restrict__ Gzyz, double* __restrict__ Gzzz, + /* fxx..fzz = fdderivs(Lap) output */ + double* __restrict__ fxx, double* __restrict__ fxy, + double* __restrict__ fxz, double* __restrict__ fyy, + double* __restrict__ fyz, double* __restrict__ fzz, + /* dtSfx_rhs..dtSfz_rhs = fderivs(chi) output, then overwritten */ + double* __restrict__ dtSfx_rhs, double* __restrict__ dtSfy_rhs, + double* __restrict__ dtSfz_rhs, + double* __restrict__ trK_rhs, + double* __restrict__ Axx_rhs, double* __restrict__ Axy_rhs, + double* __restrict__ Axz_rhs, double* __restrict__ Ayy_rhs, + double* __restrict__ Ayz_rhs, double* __restrict__ Azz_rhs, + double* __restrict__ Lap_rhs, + double* __restrict__ betax_rhs, double* __restrict__ betay_rhs, + double* __restrict__ betaz_rhs, + double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs, + double* __restrict__ Gamz_rhs, + double* __restrict__ f_arr, double* __restrict__ S_arr) +{ + const double TWO=2.0, FOUR=4.0, EIGHT=8.0, H=0.5; + const double F1o3=1.0/3.0, F2o3=2.0/3.0, F3o2=1.5; + const double PI_V=3.14159265358979323846; + const double F16=16.0, F8=8.0; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; i += blockDim.x*gridDim.x) { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double a=alpn1[i], c1=chin1[i]; + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + double lx=Lapx[i],ly=Lapy[i],lz=Lapz[i]; + + /* raised chi/chi */ + double gx=(uxx*cx+uxy*cy+uxz*cz)/c1; + double gy=(uxy*cx+uyy*cy+uyz*cz)/c1; + double gz=(uxz*cx+uyz*cy+uzz*cz)/c1; + + /* Christoffel physical correction */ + Gxxx[i]-=((cx+cx)/c1-gxx[i]*gx)*H; + Gyxx[i]-=(0.0-gxx[i]*gy)*H; + Gzxx[i]-=(0.0-gxx[i]*gz)*H; + Gxyy[i]-=(0.0-gyy[i]*gx)*H; + Gyyy[i]-=((cy+cy)/c1-gyy[i]*gy)*H; + Gzyy[i]-=(0.0-gyy[i]*gz)*H; + Gxzz[i]-=(0.0-gzz[i]*gx)*H; + Gyzz[i]-=(0.0-gzz[i]*gy)*H; + Gzzz[i]-=((cz+cz)/c1-gzz[i]*gz)*H; + Gxxy[i]-=(cy/c1-gxy[i]*gx)*H; + Gyxy[i]-=(cx/c1-gxy[i]*gy)*H; + Gzxy[i]-=(0.0-gxy[i]*gz)*H; + Gxxz[i]-=(cz/c1-gxz[i]*gx)*H; + Gyxz[i]-=(0.0-gxz[i]*gy)*H; + Gzxz[i]-=(cx/c1-gxz[i]*gz)*H; + Gxyz_o[i]-=(0.0-gyz[i]*gx)*H; + Gyyz[i]-=(cz/c1-gyz[i]*gy)*H; + Gzyz[i]-=(cy/c1-gyz[i]*gz)*H; + + /* fxx..fzz correction: subtract Gamma*Lap_deriv */ + fxx[i]-=Gxxx[i]*lx+Gyxx[i]*ly+Gzxx[i]*lz; + fyy[i]-=Gxyy[i]*lx+Gyyy[i]*ly+Gzyy[i]*lz; + fzz[i]-=Gxzz[i]*lx+Gyzz[i]*ly+Gzzz[i]*lz; + fxy[i]-=Gxxy[i]*lx+Gyxy[i]*ly+Gzxy[i]*lz; + fxz[i]-=Gxxz[i]*lx+Gyxz[i]*ly+Gzxz[i]*lz; + fyz[i]-=Gxyz_o[i]*lx+Gyyz[i]*ly+Gzyz[i]*lz; + + /* D^i D_i alpha */ + double DDA = uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] + +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]); + + /* trace of S_ij (physical) */ + double S_v = c1*(uxx*Sxx_m[i]+uyy*Syy_m[i]+uzz*Szz_m[i] + +TWO*(uxy*Sxy_m[i]+uxz*Sxz_m[i]+uyz*Syz_m[i])); + + /* A^ij A_ij */ + double AijAij = + uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) + +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) + +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) + +TWO*( + uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) + +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) + +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); + + double trK_v = trK[i]; + double db = betaxx[i] + betayy[i] + betazz[i]; + + /* trK_rhs step 1: store D^iD_i alpha * chin1 */ + trK_rhs[i] = c1 * DDA; + + /* f_arr = -(1/3) * (DDA + alpha/chi * (2/3*K^2 - AijAij - 16pi*rho + 8pi*S)) */ + double f_v = F2o3*trK_v*trK_v - AijAij - F16*PI_V*rho[i] + EIGHT*PI_V*S_v; + f_arr[i] = -F1o3*(uxx*fxx[i]+uyy*fyy[i]+uzz*fzz[i] + +TWO*(uxy*fxy[i]+uxz*fxz[i]+uyz*fyz[i]) + +(a/c1)*f_v); + + /* fij = alpha*(Rij - 8pi*Sij) - D_iD_j alpha */ + double fxx_v=a*(Rxx[i]-EIGHT*PI_V*Sxx_m[i])-fxx[i]; + double fxy_v=a*(Rxy[i]-EIGHT*PI_V*Sxy_m[i])-fxy[i]; + double fxz_v=a*(Rxz[i]-EIGHT*PI_V*Sxz_m[i])-fxz[i]; + double fyy_v=a*(Ryy[i]-EIGHT*PI_V*Syy_m[i])-fyy[i]; + double fyz_v=a*(Ryz[i]-EIGHT*PI_V*Syz_m[i])-fyz[i]; + double fzz_v=a*(Rzz[i]-EIGHT*PI_V*Szz_m[i])-fzz[i]; + + /* Aij_rhs = chi*(fij - gij*f) */ + Axx_rhs[i]=fxx_v-gxx[i]*f_arr[i]; + Ayy_rhs[i]=fyy_v-gyy[i]*f_arr[i]; + Azz_rhs[i]=fzz_v-gzz[i]*f_arr[i]; + Axy_rhs[i]=fxy_v-gxy[i]*f_arr[i]; + Axz_rhs[i]=fxz_v-gxz[i]*f_arr[i]; + Ayz_rhs[i]=fyz_v-gyz[i]*f_arr[i]; + + /* A_il A^l_j */ + double AA_xx=uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i]); + double AA_yy=uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i]); + double AA_zz=uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i]); + double AA_xy=uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i]); + double AA_xz=uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i]); + double AA_yz=uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]); + + /* trK_rhs final */ + trK_rhs[i] = -trK_rhs[i] + + a*(F1o3*trK_v*trK_v + +uxx*AA_xx+uyy*AA_yy+uzz*AA_zz + +TWO*(uxy*AA_xy+uxz*AA_xz+uyz*AA_yz) + +FOUR*PI_V*(rho[i]+S_v)); + + /* Aij_rhs final */ + Axx_rhs[i]=c1*Axx_rhs[i]+a*(trK_v*Axx[i]-TWO*AA_xx) + +TWO*(Axx[i]*betaxx[i]+Axy[i]*betayx[i]+Axz[i]*betazx[i])-F2o3*Axx[i]*db; + Ayy_rhs[i]=c1*Ayy_rhs[i]+a*(trK_v*Ayy[i]-TWO*AA_yy) + +TWO*(Axy[i]*betaxy[i]+Ayy[i]*betayy[i]+Ayz[i]*betazy[i])-F2o3*Ayy[i]*db; + Azz_rhs[i]=c1*Azz_rhs[i]+a*(trK_v*Azz[i]-TWO*AA_zz) + +TWO*(Axz[i]*betaxz[i]+Ayz[i]*betayz[i]+Azz[i]*betazz[i])-F2o3*Azz[i]*db; + Axy_rhs[i]=c1*Axy_rhs[i]+a*(trK_v*Axy[i]-TWO*AA_xy) + +Axx[i]*betaxy[i]+Axz[i]*betazy[i]+Ayy[i]*betayx[i] + +Ayz[i]*betazx[i]+F1o3*Axy[i]*db-Axy[i]*betazz[i]; + Ayz_rhs[i]=c1*Ayz_rhs[i]+a*(trK_v*Ayz[i]-TWO*AA_yz) + +Axy[i]*betaxz[i]+Ayy[i]*betayz[i]+Axz[i]*betaxy[i] + +Azz[i]*betazy[i]+F1o3*Ayz[i]*db-Ayz[i]*betaxx[i]; + Axz_rhs[i]=c1*Axz_rhs[i]+a*(trK_v*Axz[i]-TWO*AA_xz) + +Axx[i]*betaxz[i]+Axy[i]*betayz[i]+Ayz[i]*betayx[i] + +Azz[i]*betazx[i]+F1o3*Axz[i]*db-Axz[i]*betayy[i]; + + /* gauge */ + Lap_rhs[i] = -TWO*a*trK_v; + betax_rhs[i] = 0.75*dtSfx[i]; + betay_rhs[i] = 0.75*dtSfy[i]; + betaz_rhs[i] = 0.75*dtSfz[i]; +#if (GAUGE == 0) + dtSfx_rhs[i] = Gamx_rhs[i] - 2.0*dtSfx[i]; + dtSfy_rhs[i] = Gamy_rhs[i] - 2.0*dtSfy[i]; + dtSfz_rhs[i] = Gamz_rhs[i] - 2.0*dtSfz[i]; +#endif + } +} + +/* Phase 18: Hamilton & momentum constraints (co==0 only) */ +__global__ __launch_bounds__(128, 4) +void kern_phase18_constraints( + const double* __restrict__ chin1, + const double* __restrict__ chix, const double* __restrict__ chiy, + const double* __restrict__ chiz, + const double* __restrict__ gupxx, const double* __restrict__ gupxy, + const double* __restrict__ gupxz, const double* __restrict__ gupyy, + const double* __restrict__ gupyz, const double* __restrict__ gupzz, + const double* __restrict__ trK, + const double* __restrict__ Axx, const double* __restrict__ Axy, + const double* __restrict__ Axz, const double* __restrict__ Ayy, + const double* __restrict__ Ayz, const double* __restrict__ Azz, + const double* __restrict__ Rxx, const double* __restrict__ Rxy, + const double* __restrict__ Rxz, const double* __restrict__ Ryy, + const double* __restrict__ Ryz, const double* __restrict__ Rzz, + const double* __restrict__ rho, + const double* __restrict__ Sx_m, const double* __restrict__ Sy_m, + const double* __restrict__ Sz_m, + const double* __restrict__ Kx, const double* __restrict__ Ky, + const double* __restrict__ Kz, + const double* __restrict__ Gxxx, const double* __restrict__ Gxxy, + const double* __restrict__ Gxxz, const double* __restrict__ Gxyy, + const double* __restrict__ Gxyz, const double* __restrict__ Gxzz, + const double* __restrict__ Gyxx, const double* __restrict__ Gyxy, + const double* __restrict__ Gyxz, const double* __restrict__ Gyyy, + const double* __restrict__ Gyyz, const double* __restrict__ Gyzz, + const double* __restrict__ Gzxx, const double* __restrict__ Gzxy, + const double* __restrict__ Gzxz, const double* __restrict__ Gzyy, + const double* __restrict__ Gzyz, const double* __restrict__ Gzzz, + /* dA/dx arrays (fderivs of Aij) */ + const double* __restrict__ dAxx_x, const double* __restrict__ dAxx_y, + const double* __restrict__ dAxx_z, + const double* __restrict__ dAxy_x, const double* __restrict__ dAxy_y, + const double* __restrict__ dAxy_z, + const double* __restrict__ dAxz_x, const double* __restrict__ dAxz_y, + const double* __restrict__ dAxz_z, + const double* __restrict__ dAyy_x, const double* __restrict__ dAyy_y, + const double* __restrict__ dAyy_z, + const double* __restrict__ dAyz_x, const double* __restrict__ dAyz_y, + const double* __restrict__ dAyz_z, + const double* __restrict__ dAzz_x, const double* __restrict__ dAzz_y, + const double* __restrict__ dAzz_z, + double* __restrict__ ham_Res, + double* __restrict__ movx_Res, double* __restrict__ movy_Res, + double* __restrict__ movz_Res) +{ + const double TWO=2.0, F2o3=2.0/3.0, F8=8.0, F16=16.0; + const double PI_V=3.14159265358979323846; + for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < d_gp.all; + i += blockDim.x*gridDim.x) + { + double uxx=gupxx[i],uxy=gupxy[i],uxz=gupxz[i]; + double uyy=gupyy[i],uyz=gupyz[i],uzz=gupzz[i]; + double c1=chin1[i]; + + /* Hamiltonian constraint */ + double R_sc = uxx*Rxx[i]+uyy*Ryy[i]+uzz*Rzz[i] + +TWO*(uxy*Rxy[i]+uxz*Rxz[i]+uyz*Ryz[i]); + /* AijAij (same as in phase15) */ + double AijAij = + uxx*(uxx*Axx[i]*Axx[i]+uyy*Axy[i]*Axy[i]+uzz*Axz[i]*Axz[i] + +TWO*(uxy*Axx[i]*Axy[i]+uxz*Axx[i]*Axz[i]+uyz*Axy[i]*Axz[i])) + +uyy*(uxx*Axy[i]*Axy[i]+uyy*Ayy[i]*Ayy[i]+uzz*Ayz[i]*Ayz[i] + +TWO*(uxy*Axy[i]*Ayy[i]+uxz*Axy[i]*Ayz[i]+uyz*Ayy[i]*Ayz[i])) + +uzz*(uxx*Axz[i]*Axz[i]+uyy*Ayz[i]*Ayz[i]+uzz*Azz[i]*Azz[i] + +TWO*(uxy*Axz[i]*Ayz[i]+uxz*Axz[i]*Azz[i]+uyz*Ayz[i]*Azz[i])) + +TWO*(uxy*(uxx*Axx[i]*Axy[i]+uyy*Axy[i]*Ayy[i]+uzz*Axz[i]*Ayz[i] + +uxy*(Axx[i]*Ayy[i]+Axy[i]*Axy[i]) + +uxz*(Axx[i]*Ayz[i]+Axz[i]*Axy[i]) + +uyz*(Axy[i]*Ayz[i]+Axz[i]*Ayy[i])) + +uxz*(uxx*Axx[i]*Axz[i]+uyy*Axy[i]*Ayz[i]+uzz*Axz[i]*Azz[i] + +uxy*(Axx[i]*Ayz[i]+Axy[i]*Axz[i]) + +uxz*(Axx[i]*Azz[i]+Axz[i]*Axz[i]) + +uyz*(Axy[i]*Azz[i]+Axz[i]*Ayz[i])) + +uyz*(uxx*Axy[i]*Axz[i]+uyy*Ayy[i]*Ayz[i]+uzz*Ayz[i]*Azz[i] + +uxy*(Axy[i]*Ayz[i]+Ayy[i]*Axz[i]) + +uxz*(Axy[i]*Azz[i]+Ayz[i]*Axz[i]) + +uyz*(Ayy[i]*Azz[i]+Ayz[i]*Ayz[i]))); + + ham_Res[i] = c1*R_sc + F2o3*trK[i]*trK[i] - AijAij - F16*PI_V*rho[i]; + + /* Momentum constraints: need covariant derivative of A */ + double cx=chix[i],cy=chiy[i],cz=chiz[i]; + /* D_j A^j_x etc — subtract Christoffel and chi terms */ + /* gxxx = dAxx_x - 2*Gxxx*Axx - ... - chix*Axx/chin1 etc */ + double mx_xx = dAxx_x[i]-(Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i] + +Gxxx[i]*Axx[i]+Gyxx[i]*Axy[i]+Gzxx[i]*Axz[i])-cx*Axx[i]/c1; + double mx_xy = dAxy_x[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] + +Gxxx[i]*Axy[i]+Gyxx[i]*Ayy[i]+Gzxx[i]*Ayz[i])-cx*Axy[i]/c1; + double mx_xz = dAxz_x[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] + +Gxxx[i]*Axz[i]+Gyxx[i]*Ayz[i]+Gzxx[i]*Azz[i])-cx*Axz[i]/c1; + double mx_yy = dAyy_x[i]-(Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i] + +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cx*Ayy[i]/c1; + double mx_yz = dAyz_x[i]-(Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i] + +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cx*Ayz[i]/c1; + double mx_zz = dAzz_x[i]-(Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i] + +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cx*Azz[i]/c1; + + double my_xx = dAxx_y[i]-(Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i] + +Gxxy[i]*Axx[i]+Gyxy[i]*Axy[i]+Gzxy[i]*Axz[i])-cy*Axx[i]/c1; + double my_xy = dAxy_y[i]-(Gxyy[i]*Axx[i]+Gyyy[i]*Axy[i]+Gzyy[i]*Axz[i] + +Gxxy[i]*Axy[i]+Gyxy[i]*Ayy[i]+Gzxy[i]*Ayz[i])-cy*Axy[i]/c1; + double my_xz = dAxz_y[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] + +Gxxy[i]*Axz[i]+Gyxy[i]*Ayz[i]+Gzxy[i]*Azz[i])-cy*Axz[i]/c1; + double my_yy = dAyy_y[i]-(Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i] + +Gxyy[i]*Axy[i]+Gyyy[i]*Ayy[i]+Gzyy[i]*Ayz[i])-cy*Ayy[i]/c1; + double my_yz = dAyz_y[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] + +Gxyy[i]*Axz[i]+Gyyy[i]*Ayz[i]+Gzyy[i]*Azz[i])-cy*Ayz[i]/c1; + double my_zz = dAzz_y[i]-(Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i] + +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cy*Azz[i]/c1; + + double mz_xx = dAxx_z[i]-(Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i] + +Gxxz[i]*Axx[i]+Gyxz[i]*Axy[i]+Gzxz[i]*Axz[i])-cz*Axx[i]/c1; + double mz_xy = dAxy_z[i]-(Gxyz[i]*Axx[i]+Gyyz[i]*Axy[i]+Gzyz[i]*Axz[i] + +Gxxz[i]*Axy[i]+Gyxz[i]*Ayy[i]+Gzxz[i]*Ayz[i])-cz*Axy[i]/c1; + double mz_xz = dAxz_z[i]-(Gxzz[i]*Axx[i]+Gyzz[i]*Axy[i]+Gzzz[i]*Axz[i] + +Gxxz[i]*Axz[i]+Gyxz[i]*Ayz[i]+Gzxz[i]*Azz[i])-cz*Axz[i]/c1; + double mz_yy = dAyy_z[i]-(Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i] + +Gxyz[i]*Axy[i]+Gyyz[i]*Ayy[i]+Gzyz[i]*Ayz[i])-cz*Ayy[i]/c1; + double mz_yz = dAyz_z[i]-(Gxzz[i]*Axy[i]+Gyzz[i]*Ayy[i]+Gzzz[i]*Ayz[i] + +Gxyz[i]*Axz[i]+Gyyz[i]*Ayz[i]+Gzyz[i]*Azz[i])-cz*Ayz[i]/c1; + double mz_zz = dAzz_z[i]-(Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i] + +Gxzz[i]*Axz[i]+Gyzz[i]*Ayz[i]+Gzzz[i]*Azz[i])-cz*Azz[i]/c1; + + movx_Res[i] = uxx*mx_xx+uyy*my_xy+uzz*mz_xz + +uxy*mx_xy+uxz*mx_xz+uyz*my_xz + +uxy*my_xx+uxz*mz_xx+uyz*mz_xy + - F2o3*Kx[i] - F8*PI_V*Sx_m[i]; + movy_Res[i] = uxx*mx_xy+uyy*my_yy+uzz*mz_yz + +uxy*mx_yy+uxz*mx_yz+uyz*my_yz + +uxy*my_xy+uxz*mz_xy+uyz*mz_yy + - F2o3*Ky[i] - F8*PI_V*Sy_m[i]; + movz_Res[i] = uxx*mx_xz+uyy*my_yz+uzz*mz_zz + +uxy*mx_yz+uxz*mx_zz+uyz*my_zz + +uxy*my_xz+uxz*mz_xz+uyz*mz_yz + - F2o3*Kz[i] - F8*PI_V*Sz_m[i]; + } +} + +/* ================================================================== */ +/* Main host function — drop-in replacement for bssn_rhs_c.C */ +/* ================================================================== */ + +extern "C" +int f_compute_rhs_bssn(int *ex, double &T, + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, + double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, + double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy_m, double *Sxz, double *Syy, double *Syz_m, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, + double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, + double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, + double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co) +{ + /* --- Multi-GPU: select device --- */ + init_gpu_dispatch(); + CUDA_CHECK(cudaSetDevice(g_dispatch.my_device)); + + const int nx = ex[0], ny = ex[1], nz = ex[2]; + const int all = nx * ny * nz; + const double dX = X[1]-X[0], dY = Y[1]-Y[0], dZ = Z[1]-Z[0]; + const int NO_SYMM = 0, EQ_SYMM = 1; + const double SYM = 1.0, ANTI = -1.0; + + /* --- Allocate GPU buffers --- */ + ensure_gpu_buffers(nx, ny, nz); + + /* --- Setup GridParams --- */ + GridParams gp; + gp.ex[0]=nx; gp.ex[1]=ny; gp.ex[2]=nz; + gp.all=all; gp.dX=dX; gp.dY=dY; gp.dZ=dZ; + gp.d12dx=1.0/(12.0*dX); gp.d12dy=1.0/(12.0*dY); gp.d12dz=1.0/(12.0*dZ); + gp.d2dx=1.0/(2.0*dX); gp.d2dy=1.0/(2.0*dY); gp.d2dz=1.0/(2.0*dZ); + gp.Fdxdx=1.0/(12.0*dX*dX); gp.Fdydy=1.0/(12.0*dY*dY); gp.Fdzdz=1.0/(12.0*dZ*dZ); + gp.Sdxdx=1.0/(dX*dX); gp.Sdydy=1.0/(dY*dY); gp.Sdzdz=1.0/(dZ*dZ); + gp.Fdxdy=1.0/(144.0*dX*dY); gp.Fdxdz=1.0/(144.0*dX*dZ); gp.Fdydz=1.0/(144.0*dY*dZ); + gp.Sdxdy=0.25/(dX*dY); gp.Sdxdz=0.25/(dX*dZ); gp.Sdydz=0.25/(dY*dZ); + gp.imaxF=nx; gp.jmaxF=ny; gp.kmaxF=nz; + gp.iminF=1; gp.jminF=1; gp.kminF=1; + if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF = -1; + if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF = -1; + if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF = -1; + gp.iminF3=1; gp.jminF3=1; gp.kminF3=1; + if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) gp.kminF3 = -2; + if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) gp.iminF3 = -2; + if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) gp.jminF3 = -2; + gp.Symmetry=Symmetry; gp.eps=eps; gp.co=co; + gp.fh2_nx=nx+2; gp.fh2_ny=ny+2; gp.fh2_nz=nz+2; + gp.fh3_nx=nx+3; gp.fh3_ny=ny+3; gp.fh3_nz=nz+3; + CUDA_CHECK(cudaMemcpyToSymbol(d_gp, &gp, sizeof(GridParams))); + + /* --- Shorthand for device slot pointers --- */ + #define D(s) g_buf.slot[s] + const size_t bytes = (size_t)all * sizeof(double); + + /* --- H2D: stage all inputs, then one bulk copy --- */ + double *h2d_src[] = { + chi, trK, dxx, gxy, gxz, dyy, gyz, dzz, + Axx, Axy, Axz, Ayy, Ayz, Azz, + Gamx, Gamy, Gamz, + Lap, betax, betay, betaz, + dtSfx, dtSfy, dtSfz, + rho, Sx, Sy, Sz, + Sxx, Sxy_m, Sxz, Syy, Syz_m, Szz + }; + static_assert((int)(sizeof(h2d_src) / sizeof(h2d_src[0])) == H2D_INPUT_SLOT_COUNT, + "h2d_src list must match H2D_INPUT_SLOT_COUNT"); + for (int s = 0; s < H2D_INPUT_SLOT_COUNT; ++s) { + std::memcpy(g_buf.h_stage + (size_t)s * all, h2d_src[s], bytes); + } + CUDA_CHECK(cudaMemcpy(D(S_chi), g_buf.h_stage, + (size_t)H2D_INPUT_SLOT_COUNT * bytes, + cudaMemcpyHostToDevice)); + + /* ============================================================ */ + /* Phase 1: prep — alpn1, chin1, gxx, gyy, gzz */ + /* ============================================================ */ + kern_phase1_prep<<>>( + D(S_Lap), D(S_chi), D(S_dxx), D(S_dyy), D(S_dzz), + D(S_alpn1), D(S_chin1), D(S_gxx), D(S_gyy), D(S_gzz)); + + /* 12x fderivs */ + gpu_fderivs(D(S_betax), D(S_betaxx),D(S_betaxy),D(S_betaxz), ANTI,SYM,SYM, all); + gpu_fderivs(D(S_betay), D(S_betayx),D(S_betayy),D(S_betayz), SYM,ANTI,SYM, all); + gpu_fderivs(D(S_betaz), D(S_betazx),D(S_betazy),D(S_betazz), SYM,SYM,ANTI, all); + gpu_fderivs(D(S_chi), D(S_chix),D(S_chiy),D(S_chiz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_dxx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_gxy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); + gpu_fderivs(D(S_gxz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); + gpu_fderivs(D(S_dyy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_gyz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); + gpu_fderivs(D(S_dzz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Lap), D(S_Lapx),D(S_Lapy),D(S_Lapz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_trK), D(S_Kx),D(S_Ky),D(S_Kz), SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 2: metric RHS + inverse */ + /* ============================================================ */ + kern_phase2_metric_rhs<<>>( + D(S_alpn1), D(S_chin1), + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_trK), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_betaxx), D(S_betaxy), D(S_betaxz), + D(S_betayx), D(S_betayy), D(S_betayz), + D(S_betazx), D(S_betazy), D(S_betazz), + D(S_chi_rhs), D(S_gxx_rhs), D(S_gyy_rhs), D(S_gzz_rhs), + D(S_gxy_rhs), D(S_gyz_rhs), D(S_gxz_rhs)); + + kern_phase2_inverse<<>>( + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz)); + + /* Phase 3: Gamma constraint (co==0) */ + if (co == 0) { + kern_phase3_gamma_constraint<<>>( + D(S_Gamx), D(S_Gamy), D(S_Gamz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), + D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), + D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), + D(S_Gmx_Res), D(S_Gmy_Res), D(S_Gmz_Res)); + } + + /* Phase 4: Christoffel symbols */ + kern_phase4_christoffel<<>>( + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_gxxx), D(S_gxyx), D(S_gxzx), D(S_gyyx), D(S_gyzx), D(S_gzzx), + D(S_gxxy), D(S_gxyy), D(S_gxzy), D(S_gyyy), D(S_gyzy), D(S_gzzy), + D(S_gxxz), D(S_gxyz), D(S_gxzz), D(S_gyyz), D(S_gyzz), D(S_gzzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz)); + + /* Phase 5: Raise A index (stored in Rxx..Rzz temporarily) */ + kern_phase5_raise_A<<>>( + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz)); + + /* Phase 6: Gamma_rhs part 1 */ + kern_phase6_gamma_rhs_part1<<>>( + D(S_Lapx), D(S_Lapy), D(S_Lapz), + D(S_alpn1), D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Kx), D(S_Ky), D(S_Kz), + D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), + D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs)); + + /* Phase 7: fdderivs(beta) + fderivs(Gamma) */ + gpu_fdderivs(D(S_betax), D(S_gxxx),D(S_gxyx),D(S_gxzx), + D(S_gyyx),D(S_gyzx),D(S_gzzx), ANTI,SYM,SYM, all); + gpu_fdderivs(D(S_betay), D(S_gxxy),D(S_gxyy),D(S_gxzy), + D(S_gyyy),D(S_gyzy),D(S_gzzy), SYM,ANTI,SYM, all); + gpu_fdderivs(D(S_betaz), D(S_gxxz),D(S_gxyz),D(S_gxzz), + D(S_gyyz),D(S_gyzz),D(S_gzzz), SYM,SYM,ANTI, all); + gpu_fderivs(D(S_Gamx), D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), ANTI,SYM,SYM, all); + gpu_fderivs(D(S_Gamy), D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), SYM,ANTI,SYM, all); + gpu_fderivs(D(S_Gamz), D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), SYM,SYM,ANTI, all); + + /* Phase 8: Gamma_rhs part 2 */ + kern_phase8_gamma_rhs_part2<<>>( + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_betaxx),D(S_betaxy),D(S_betaxz), + D(S_betayx),D(S_betayy),D(S_betayz), + D(S_betazx),D(S_betazy),D(S_betazz), + D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs), + D(S_Gamxa),D(S_Gamya),D(S_Gamza)); + + /* Phase 9: Christoffel contract (lowered products for Ricci) */ + kern_phase9_christoffel_contract<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz)); + + /* Phase 10: 6x fdderivs(metric) + Ricci contract */ + gpu_fdderivs(D(S_dxx), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxx)); + + gpu_fdderivs(D(S_dyy), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryy)); + + gpu_fdderivs(D(S_dzz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rzz)); + + gpu_fdderivs(D(S_gxy), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), ANTI,ANTI,SYM, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxy)); + + gpu_fdderivs(D(S_gxz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), ANTI,SYM,ANTI, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Rxz)); + + gpu_fdderivs(D(S_gyz), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,ANTI,ANTI, all); + kern_phase10_ricci_contract<<>>( + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_fxx),D(S_fxy),D(S_fxz),D(S_fyy),D(S_fyz),D(S_fzz), D(S_Ryz)); + + /* Phase 11: Ricci assembly (diagonal + off-diagonal) */ + kern_phase11_ricci_diag<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_Gamxa),D(S_Gamya),D(S_Gamza), + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), + D(S_Rxx),D(S_Ryy),D(S_Rzz)); + + kern_phase11_ricci_offdiag<<>>( + D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz), + D(S_gupxx),D(S_gupxy),D(S_gupxz),D(S_gupyy),D(S_gupyz),D(S_gupzz), + D(S_Gamxa),D(S_Gamya),D(S_Gamza), + D(S_Gamxx),D(S_Gamxy),D(S_Gamxz), + D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t), + D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), + D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz), + D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz), + D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz), + D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz), + D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz), + D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz), + D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx), + D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy), + D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz), + D(S_Rxy),D(S_Rxz),D(S_Ryz)); + + /* ============================================================ */ + /* Phase 12: fdderivs(chi) */ + /* ============================================================ */ + gpu_fdderivs(D(S_chi), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 13: chi correction to Ricci */ + /* ============================================================ */ + kern_phase13_chi_correction<<>>( + D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_fxx), D(S_fxy), D(S_fxz), + D(S_fyy), D(S_fyz), D(S_fzz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), + D(S_Ryy), D(S_Ryz), D(S_Rzz)); + + /* ============================================================ */ + /* Phase 14: fdderivs(Lap) + fderivs(chi) */ + /* ============================================================ */ + gpu_fdderivs(D(S_Lap), D(S_fxx),D(S_fxy),D(S_fxz), + D(S_fyy),D(S_fyz),D(S_fzz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_chi), D(S_dtSfx_rhs),D(S_dtSfy_rhs),D(S_dtSfz_rhs), + SYM,SYM,SYM, all); + + /* ============================================================ */ + /* Phase 15: trK_rhs, Aij_rhs, gauge */ + /* ============================================================ */ + kern_phase15_trK_Aij_gauge<<>>( + D(S_alpn1), D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gxx), D(S_gxy), D(S_gxz), D(S_gyy), D(S_gyz), D(S_gzz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_trK), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_Lapx), D(S_Lapy), D(S_Lapz), + D(S_betaxx), D(S_betaxy), D(S_betaxz), + D(S_betayx), D(S_betayy), D(S_betayz), + D(S_betazx), D(S_betazy), D(S_betazz), + D(S_rho), + D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Sxx), D(S_Sxy), D(S_Sxz), D(S_Syy), D(S_Syz), D(S_Szz), + D(S_dtSfx), D(S_dtSfy), D(S_dtSfz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + D(S_fxx), D(S_fxy), D(S_fxz), + D(S_fyy), D(S_fyz), D(S_fzz), + D(S_dtSfx_rhs), D(S_dtSfy_rhs), D(S_dtSfz_rhs), + D(S_trK_rhs), + D(S_Axx_rhs), D(S_Axy_rhs), D(S_Axz_rhs), + D(S_Ayy_rhs), D(S_Ayz_rhs), D(S_Azz_rhs), + D(S_Lap_rhs), + D(S_betax_rhs), D(S_betay_rhs), D(S_betaz_rhs), + D(S_Gamx_rhs), D(S_Gamy_rhs), D(S_Gamz_rhs), + D(S_f_arr), D(S_S_arr)); + + /* ============================================================ */ + /* Phase 16/17: advection + KO dissipation (shared ord=3 pack) */ + /* ============================================================ */ + gpu_lopsided_kodis(D(S_gxx), D(S_dxx), D(S_gxx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_Gamz), D(S_Gamz), D(S_Gamz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all); + gpu_lopsided_kodis(D(S_gxy), D(S_gxy), D(S_gxy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, eps, all); + gpu_lopsided_kodis(D(S_Lap), D(S_Lap), D(S_Lap_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_gxz), D(S_gxz), D(S_gxz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, eps, all); + gpu_lopsided_kodis(D(S_betax), D(S_betax), D(S_betax_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_gyy), D(S_dyy), D(S_gyy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_betay), D(S_betay), D(S_betay_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all); + gpu_lopsided_kodis(D(S_gyz), D(S_gyz), D(S_gyz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, eps, all); + gpu_lopsided_kodis(D(S_betaz), D(S_betaz), D(S_betaz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all); + gpu_lopsided_kodis(D(S_gzz), D(S_dzz), D(S_gzz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_dtSfx), D(S_dtSfx), D(S_dtSfx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_Axx), D(S_Axx), D(S_Axx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_dtSfy), D(S_dtSfy), D(S_dtSfy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all); + gpu_lopsided_kodis(D(S_Axy), D(S_Axy), D(S_Axy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, eps, all); + gpu_lopsided_kodis(D(S_dtSfz), D(S_dtSfz), D(S_dtSfz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all); + gpu_lopsided_kodis(D(S_Axz), D(S_Axz), D(S_Axz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, eps, all); + gpu_lopsided_kodis(D(S_Ayy), D(S_Ayy), D(S_Ayy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_Ayz), D(S_Ayz), D(S_Ayz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, eps, all); + gpu_lopsided_kodis(D(S_Azz), D(S_Azz), D(S_Azz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_chi), D(S_chi), D(S_chi_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_trK), D(S_trK), D(S_trK_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_Gamx), D(S_Gamx), D(S_Gamx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all); + gpu_lopsided_kodis(D(S_Gamy), D(S_Gamy), D(S_Gamy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all); + + /* ============================================================ */ + /* Phase 18: Hamilton & momentum constraints (co==0) */ + /* ============================================================ */ + if (co == 0) { + /* 6x fderivs on Aij — reuse gxxx..gzzz slots for dA/dx output */ + gpu_fderivs(D(S_Axx), D(S_gxxx),D(S_gxxy),D(S_gxxz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Axy), D(S_gxyx),D(S_gxyy),D(S_gxyz), ANTI,ANTI,SYM, all); + gpu_fderivs(D(S_Axz), D(S_gxzx),D(S_gxzy),D(S_gxzz), ANTI,SYM,ANTI, all); + gpu_fderivs(D(S_Ayy), D(S_gyyx),D(S_gyyy),D(S_gyyz), SYM,SYM,SYM, all); + gpu_fderivs(D(S_Ayz), D(S_gyzx),D(S_gyzy),D(S_gyzz), SYM,ANTI,ANTI, all); + gpu_fderivs(D(S_Azz), D(S_gzzx),D(S_gzzy),D(S_gzzz), SYM,SYM,SYM, all); + + kern_phase18_constraints<<>>( + D(S_chin1), + D(S_chix), D(S_chiy), D(S_chiz), + D(S_gupxx), D(S_gupxy), D(S_gupxz), + D(S_gupyy), D(S_gupyz), D(S_gupzz), + D(S_trK), + D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz), + D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz), + D(S_rho), D(S_Sx), D(S_Sy), D(S_Sz), + D(S_Kx), D(S_Ky), D(S_Kz), + D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz), + D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz), + D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz), + D(S_Gamyyy), D(S_Gamyyz), D(S_Gamyzz), + D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz), + D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz), + /* dA/dx arrays */ + D(S_gxxx), D(S_gxxy), D(S_gxxz), + D(S_gxyx), D(S_gxyy), D(S_gxyz), + D(S_gxzx), D(S_gxzy), D(S_gxzz), + D(S_gyyx), D(S_gyyy), D(S_gyyz), + D(S_gyzx), D(S_gyzy), D(S_gyzz), + D(S_gzzx), D(S_gzzy), D(S_gzzz), + D(S_ham_Res), D(S_movx_Res), D(S_movy_Res), D(S_movz_Res)); + } + + /* ============================================================ */ + /* D2H: copy all output arrays back to host */ + /* ============================================================ */ + const int d2h_slot_count = D2H_BASE_SLOT_COUNT + + ((co == 0) ? D2H_CONSTRAINT_SLOT_COUNT : 0); + CUDA_CHECK(cudaMemcpy(g_buf.h_stage, D(S_chi_rhs), + (size_t)d2h_slot_count * bytes, + cudaMemcpyDeviceToHost)); + + double *d2h_dst[] = { + chi_rhs, trK_rhs, + gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, + Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, + Gamx_rhs, Gamy_rhs, Gamz_rhs, + Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, + dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, + Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz, + Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz, + Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz, + Rxx, Rxy, Rxz, Ryy, Ryz, Rzz + }; + static_assert((int)(sizeof(d2h_dst) / sizeof(d2h_dst[0])) == D2H_BASE_SLOT_COUNT, + "d2h_dst list must match D2H_BASE_SLOT_COUNT"); + for (int s = 0; s < D2H_BASE_SLOT_COUNT; ++s) { + std::memcpy(d2h_dst[s], g_buf.h_stage + (size_t)s * all, bytes); + } + if (co == 0) { + double *d2h_dst_co[] = { + ham_Res, movx_Res, movy_Res, movz_Res, Gmx_Res, Gmy_Res, Gmz_Res + }; + static_assert((int)(sizeof(d2h_dst_co) / sizeof(d2h_dst_co[0])) == + D2H_CONSTRAINT_SLOT_COUNT, + "d2h_dst_co list must match D2H_CONSTRAINT_SLOT_COUNT"); + for (int s = 0; s < D2H_CONSTRAINT_SLOT_COUNT; ++s) { + std::memcpy(d2h_dst_co[s], + g_buf.h_stage + (size_t)(D2H_BASE_SLOT_COUNT + s) * all, + bytes); + } + } + + #undef D + return 0; +} diff --git a/AMSS_NCKU_source/bssn_rhs_cuda.h b/AMSS_NCKU_source/bssn_rhs_cuda.h new file mode 100644 index 0000000..cec0111 --- /dev/null +++ b/AMSS_NCKU_source/bssn_rhs_cuda.h @@ -0,0 +1,36 @@ +#ifndef BSSN_RHS_CUDA_H +#define BSSN_RHS_CUDA_H + +#ifdef __cplusplus +extern "C" { +#endif + +int f_compute_rhs_bssn(int *ex, double &T, + double *X, double *Y, double *Z, + double *chi, double *trK, + double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, + double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, + double *Gamx, double *Gamy, double *Gamz, + double *Lap, double *betax, double *betay, double *betaz, + double *dtSfx, double *dtSfy, double *dtSfz, + double *chi_rhs, double *trK_rhs, + double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, + double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, + double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, + double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, + double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, + double *rho, double *Sx, double *Sy, double *Sz, + double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, + double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, + double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, + double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, + double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, + double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, + double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, + int &Symmetry, int &Lev, double &eps, int &co); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/AMSS_NCKU_source/bssn_step_gpu.C b/AMSS_NCKU_source/bssn_step_gpu.C deleted file mode 100644 index 45ee555..0000000 --- a/AMSS_NCKU_source/bssn_step_gpu.C +++ /dev/null @@ -1,1942 +0,0 @@ -// includes, system -#include -#include -#include -#include -#include -#include - -#ifdef RESULT_CHECK -#include -#endif - -// include BSSN class files -#include "macrodef.h" -#include "fmisc.h" -#include "bssn_gpu_class.h" -#include "bssn_rhs.h" -#include "enforce_algebra.h" -#include "rungekutta4_rout.h" -#include "sommerfeld_rout.h" - -// include gpu files -#include "bssn_gpu.h" - -#if (PSTR == 0) -#if 1 -void bssn_class::Step_GPU(int lev, int YN) -{ - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - -// new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) -#warning "shell part still bam type" - if (lev == 0) // Shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], varl0->data->SoA, - Symmetry, pre); -#endif - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs_ss(CALLED_BY_STEP, myrank, RHS_SS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } -#endif - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_THEN)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) - if (lev == 1) // shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], varl0->data->SoA, - Symmetry, cor); -#endif - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count << " variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs_ss(CALLED_BY_STEP, myrank, RHS_SS_PARA_CALLED_THEN)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } - } -#if (RPS == 0) - // mesh refinement boundary part - RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} -#else // #if 1 -// ICN for bam comparison -void bssn_class::Step_GPU(int lev, int YN) -{ - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif - f_icn_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs_ss(CALLED_BY_STEP, myrank, RHS_SS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_icn_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check rhs - { - SH->Dump_Data(RHSList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check rhs"<Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // data analysis part - // Warning NOTE: the variables1 are used as temp storege room - if (lev == a_lev) - { - AnalysisStuff(lev, dT_lev); - } - // corrector - for (iter_count = 1; iter_count < 3; iter_count++) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_THEN)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif - f_icn_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], varl0->data->SoA, - Symmetry, cor); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count << " variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - -#ifdef WithShell - // evolve Shell Patches - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs_ss(CALLED_BY_STEP, myrank, RHS_SS_PARA_CALLED_THEN)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } -#endif - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_icn_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_icn_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#endif - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } - } - } -#if (RPS == 0) - // mesh refinement boundary part - RestrictProlong(lev, YN, BB); - -#ifdef WithShell - if (lev == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } -#ifdef WithShell - if (lev == 0) - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -#if 0 -// check StateList - { - SH->Dump_Data(StateList,0,PhysTime,dT_lev); - if(myrank == 0) - { - cout<<"check StateList"< 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - } - } -} -#endif - -#elif (PSTR == 1) -void bssn_class::Step_GPU(int lev, int YN) -{ - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - -// new code 2013-2-15, zjcao -#if (MAPBH == 1) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - for (int ith = 0; ith < 3; ith++) - Porg1[ithBH][ith] = Porg0[ithBH][ith] + Porg_rhs[ithBH][ith] * dT_lev; - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif //(MAPBH == 1) - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - MyList *Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) -#warning "shell part still bam type" - if (lev == 0) // Shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], varl0->data->SoA, - Symmetry, pre); -#endif - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Predictor rhs calculation"); - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); - - Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg[ithBH][2] = fabs(Porg[ithBH][2]); - if (Symmetry == 2) - { - Porg[ithBH][0] = fabs(Porg[ithBH][0]); - Porg[ithBH][1] = fabs(Porg[ithBH][1]); - } - if (!finite(Porg[ithBH][0]) || !finite(Porg[ithBH][1]) || !finite(Porg[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "predictor step finds NaN for BH's position from (" - << Porg0[ithBH][0] << "," << Porg0[ithBH][1] << "," << Porg0[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } -#endif - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector"); - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"head of Corrector"); - - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs(CALLED_BY_STEP, myrank, RHS_PARA_CALLED_THEN)) - { - cout << "find NaN in domain: (" << cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[1] << ":" << cg->bbox[4] << "," - << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { -#if (SommerType == 0) -#ifndef WithShell - if (lev == 0) // sommerfeld indeed - f_sommerfeld_routbam(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); -#endif -#endif - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - -#ifndef WithShell - if (lev > 0) // fix BD point -#endif - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], varl0->data->SoA, - Symmetry, cor); - -#if (SommerType == 1) - if (lev == 1) // shibata type sommerfeld - f_sommerfeld_rout(cg->shape, cg->X[0], cg->X[1], cg->X[2], - Pp->data->bbox[0], Pp->data->bbox[1], Pp->data->bbox[2], Pp->data->bbox[3], Pp->data->bbox[4], Pp->data->bbox[5], - dT_lev, cg->fgfs[phi0->sgfn], - cg->fgfs[Lap0->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varl1->data->sgfn], varl0->data->SoA, - Symmetry, cor); -#endif - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector error check"); - - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, GH->Commlev[lev]); - } - if (ERROR) - { - Parallel::Dump_Data(GH->PatL[lev], SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in RK4 substep#" << iter_count << " variables at t = " << PhysTime << ", lev = " << lev << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); - - Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); - - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count); - f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count); - if (Symmetry > 0) - Porg1[ithBH][2] = fabs(Porg1[ithBH][2]); - if (Symmetry == 2) - { - Porg1[ithBH][0] = fabs(Porg1[ithBH][0]); - Porg1[ithBH][1] = fabs(Porg1[ithBH][1]); - } - if (!finite(Porg1[ithBH][0]) || !finite(Porg1[ithBH][1]) || !finite(Porg1[ithBH][2])) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << iter_count << " corrector step finds NaN for BH's position from (" - << Porg[ithBH][0] << "," << Porg[ithBH][1] << "," << Porg[ithBH][2] << ")" << endl; - - MyList *DG_List = new MyList(Sfx0); - DG_List->insert(Sfx0); - DG_List->insert(Sfy0); - DG_List->insert(Sfz0); - Parallel::Dump_Data(GH->PatL[lev], DG_List, 0, PhysTime, dT_lev); - DG_List->clearList(); - } - } - } - misc::tillherecheck(GH->Commlev[lev], GH->start_rank[lev], "after Corrector of black hole position"); -#endif - - // swap time level - if (iter_count < 3) - { - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after pre cor swap"); - -#if (MAPBH == 0) - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg[ithBH][0] = Porg1[ithBH][0]; - Porg[ithBH][1] = Porg1[ithBH][1]; - Porg[ithBH][2] = Porg1[ithBH][2]; - } - } -#endif - } - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"tail of corrector"); - } -#if (RPS == 0) - // mesh refinement boundary part - misc::tillherecheck(GH->Commlev[lev], GH->start_rank[lev], "before RestrictProlong"); - RestrictProlong(lev, YN, BB); -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - Pp = GH->PatL[lev]; - while (Pp) - { - MyList *BP = Pp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == Pp->data->ble) - break; - BP = BP->next; - } - Pp = Pp->next; - } - // for black hole position - if (BH_num > 0 && lev == GH->levels - 1) - { - for (int ithBH = 0; ithBH < BH_num; ithBH++) - { - Porg0[ithBH][0] = Porg1[ithBH][0]; - Porg0[ithBH][1] = Porg1[ithBH][1]; - Porg0[ithBH][2] = Porg1[ithBH][2]; - // if(myrank==GH->start_rank[lev]) cout<start_rank[lev]) cout<mylev<Commlev[lev],GH->start_rank[lev],"complet GH Step"); -} -#endif // PSTR == ? - -//--------------------------With Shell-------------------------- - -#ifdef WithShell -void bssn_class::SHStep() -{ - int lev = 0; - // #if (PSTR == 1) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"start Step"); - // #endif - - setpbh(BH_num, Porg0, Mass, BH_num_input); - - double dT_lev = dT * pow(0.5, Mymax(lev, trfls)); - - // #if (PSTR == 1) - // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor"); - // #endif - -#ifdef With_AHF - AH_Step_Find(lev, dT_lev); -#endif - bool BB = fgt(PhysTime, StartTime, dT_lev / 2); - double ndeps = numepss; - if (lev < GH->movls) - ndeps = numepsb; - double TRK4 = PhysTime; - int iter_count = 0; // count RK4 substeps - int pre = 0, cor = 1; - int ERROR = 0; - - MyList *sPp; - // Predictor - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], - cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); -#endif - - if (gpu_rhs_ss(RHS_SS_PARA_CALLED_FIRST_TIME)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varlrhs->data->sgfn], - cg->fgfs[varl0->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - -#if (PSTR == 1) -// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor's error check"); -#endif - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - - if (ERROR) - { - SH->Dump_Data(StateList, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN in state variables on Shell Patches at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_pre, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } - - // corrector - for (iter_count = 1; iter_count < 4; iter_count++) - { - // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; - if (iter_count == 1 || iter_count == 3) - TRK4 += dT_lev / 2; - - { - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - int fngfs = sPp->data->fngfs; - while (BP) - { - Block *cg = BP->data; - if (myrank == cg->rank) - { -#if (AGM == 0) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#elif (AGM == 1) - if (iter_count == 3) - f_enforce_ga(cg->shape, - cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], - cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); -#endif - - if (gpu_rhs_ss(RHS_SS_PARA_CALLED_THEN)) - { - cout << "find NaN in Shell domain: sst = " << sPp->data->sst << ", (" << cg->bbox[0] << ":" << cg->bbox[3] << "," - << cg->bbox[1] << ":" << cg->bbox[4] << "," << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; - ERROR = 1; - } - // rk4 substep and boundary - { - MyList *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; // we do not check the correspondence here - while (varl0) - { - // sommerfeld indeed for outter boudary while fix BD for inner boundary - f_sommerfeld_routbam_ss(cg->shape, cg->X[0], cg->X[1], cg->X[2], - sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2], sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5], - cg->fgfs[varl1->data->sgfn], - cg->fgfs[varl->data->sgfn], varl0->data->propspeed, varl0->data->SoA, - Symmetry); - - f_rungekutta4_rout(cg->shape, dT_lev, cg->fgfs[varl0->data->sgfn], cg->fgfs[varl1->data->sgfn], cg->fgfs[varlrhs->data->sgfn], - iter_count); - - varl0 = varl0->next; - varl = varl->next; - varl1 = varl1->next; - varlrhs = varlrhs->next; - } - } - f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); - } - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } - // check error information - { - int erh = ERROR; - MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - } - if (ERROR) - { - SH->Dump_Data(SynchList_pre, 0, PhysTime, dT_lev); - if (myrank == 0) - { - if (ErrorMonitor->outfile) - ErrorMonitor->outfile << "find NaN on Shell Patches in RK4 substep#" << iter_count << " variables at t = " << PhysTime << endl; - MPI_Abort(MPI_COMM_WORLD, 1); - } - } - - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->Synch(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "Shell stuff synchronization used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } - - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(SynchList_pre, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } - } -#if (RPS == 0) - { - clock_t prev_clock, curr_clock; - if (myrank == 0) - curr_clock = clock(); - SH->CS_Inter(SynchList_cor, Symmetry); - if (myrank == 0) - { - prev_clock = curr_clock; - curr_clock = clock(); - cout << "CS_Inter used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl; - } - } -#endif - // note the data structure before update - // SynchList_cor 1 ----------- - // - // StateList 0 ----------- - // - // OldStateList old ----------- - // update - sPp = SH->PatL; - while (sPp) - { - MyList *BP = sPp->data->blb; - while (BP) - { - Block *cg = BP->data; - cg->swapList(StateList, SynchList_cor, myrank); - cg->swapList(OldStateList, SynchList_cor, myrank); - if (BP == sPp->data->ble) - break; - BP = BP->next; - } - sPp = sPp->next; - } -} -d -#endif // withshell diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index 72b9cbd..7a8878b 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -1,35 +1,35 @@ -include makefile.inc - -## polint(ordn=6) kernel selector: -## 1 (default): barycentric fast path -## 0 : fallback to Neville path -POLINT6_USE_BARY ?= 1 -POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY) - -## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) -## make -> opt (PGO-guided, maximum performance) -## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data) -PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata +include makefile.inc + +## polint(ordn=6) kernel selector: +## 1 (default): barycentric fast path +## 0 : fallback to Neville path +POLINT6_USE_BARY ?= 1 +POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY) + +## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) +## make -> opt (PGO-guided, maximum performance) +## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data) +PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata ifeq ($(PGO_MODE),instrument) ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability -CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ - -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) -f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ - -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -else -## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \ -## PGO has been turned off, now tested and found to be negative optimization -## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization - - -CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) -f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -endif +CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ + -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) +f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ + -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) +else +## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \ +## PGO has been turned off, now tested and found to be negative optimization +## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization + + +CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ + -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) +f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ + -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) +endif .SUFFIXES: .o .f90 .C .for .cu @@ -42,12 +42,16 @@ endif .for.o: $(f77) -c $< -o $@ -.cu.o: - $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) - -# C rewrite of BSSN RHS kernel and helpers -bssn_rhs_c.o: bssn_rhs_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ +.cu.o: + $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) + +# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers) +bssn_rhs_cuda.o: bssn_rhs_cuda.cu bssn_rhs.h macrodef.h + $(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH) + +# C rewrite of BSSN RHS kernel and helpers +bssn_rhs_c.o: bssn_rhs_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fderivs_c.o: fderivs_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ @@ -58,14 +62,14 @@ fdderivs_c.o: fdderivs_c.C kodiss_c.o: kodiss_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ -lopsided_c.o: lopsided_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ - -lopsided_kodis_c.o: lopsided_kodis_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ - -#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h -# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ +lopsided_c.o: lopsided_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +lopsided_kodis_c.o: lopsided_kodis_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h +# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata @@ -79,24 +83,46 @@ TwoPunctures.o: TwoPunctures.C TwoPunctureABE.o: TwoPunctureABE.C ${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@ -# Input files - -## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran) +# Input files + +## CUDA BSSN RHS switch +## 1 : use the rewritten CUDA bssn_rhs backend +## 0 : keep the normal CPU/Fortran selection below +USE_CUDA_BSSN ?= 0 + +## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran) ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below -CFILES = +CFILES_CPU = else # C++ mode (default): C rewrite of bssn_rhs and helper kernels -CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o +CFILES_CPU = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o +endif + +CFILES_CUDA_BSSN = bssn_rhs_cuda.o + +ifeq ($(USE_CUDA_BSSN),1) +CFILES = $(CFILES_CUDA_BSSN) +else +CFILES = $(CFILES_CPU) endif ## RK4 kernel switch (independent from USE_CXX_KERNELS) ifeq ($(USE_CXX_RK4),1) -CFILES += rungekutta4_rout_c.o +RK4_C_OBJ = rungekutta4_rout_c.o RK4_F90_OBJ = else +RK4_C_OBJ = RK4_F90_OBJ = rungekutta4_rout.o endif + +CFILES += $(RK4_C_OBJ) +ABE_CUDA_CFILES = $(CFILES_CUDA_BSSN) $(RK4_C_OBJ) + +ABE_LDLIBS = $(LDLIBS) +ifeq ($(USE_CUDA_BSSN),1) +ABE_LDLIBS += -lcudart $(CUDA_LIB_PATH) +endif C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\ @@ -105,7 +131,7 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o -C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ +#C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ cgh.o surface_integral.o ShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ @@ -113,12 +139,12 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o NullShellPatch2_Evo.o \ bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o -F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\ - prolongrestrict_cell.o prolongrestrict_vertex.o\ - $(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\ - lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\ - shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\ - getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\ +F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\ + prolongrestrict_cell.o prolongrestrict_vertex.o\ + $(RK4_F90_OBJ) diff_new.o kodiss.o kodiss_sh.o\ + lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\ + shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\ + getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\ fadmquantites_bssn.o Z4c_rhs.o Z4c_rhs_ss.o point_diff_new_sh.o\ cpbc.o getnp4old.o NullEvol.o initial_null.o initial_maxwell.o\ getnpem2.o empart.o NullNews.o fourdcurvature.o\ @@ -143,10 +169,10 @@ initial_guess.o Newton.o Jacobian.o ilucg.o IntPnts0.o IntPnts.o TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o -CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o +#CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o -# file dependences -$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh +# file dependences +$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ @@ -157,7 +183,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ initial_null2.h NullShellPatch2.h -$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ +#$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ rungekutta4_rout.h var.h bssn_rhs.h sommerfeld_rout.h\ cgh.h surface_integral.h ShellPatch.h shellfunctions.h perf.h\ @@ -169,7 +195,7 @@ $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h $(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h -$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h +$(C++FILES) $(C++FILES_GPU) $(CFILES) $(ABE_CUDA_CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h TwoPunctureFILES: TwoPunctures.h @@ -177,15 +203,18 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h misc.o : zbesh.o -# projects -ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) - $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) - -ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) - $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) +# projects +ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) + $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(ABE_LDLIBS) + +ABE_CUDA: $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) + $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(ABE_CUDA_CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH) + +#ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) +# $(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS) TwoPunctureABE: $(TwoPunctureFILES) $(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS) -clean: - rm *.o ABE ABEGPU TwoPunctureABE make.log -f +clean: + rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f diff --git a/AMSS_NCKU_source/makefile.inc b/AMSS_NCKU_source/makefile.inc index 331cff1..69fe86b 100755 --- a/AMSS_NCKU_source/makefile.inc +++ b/AMSS_NCKU_source/makefile.inc @@ -63,3 +63,7 @@ Cu = nvcc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include #CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc +CUDA_ARCH ?= sm_80 +ifneq ($(strip $(CUDA_ARCH)),) +CUDA_APP_FLAGS += -arch=$(CUDA_ARCH) +endif diff --git a/generate_macrodef.py b/generate_macrodef.py index 40c76e9..efa9d72 100755 --- a/generate_macrodef.py +++ b/generate_macrodef.py @@ -148,7 +148,7 @@ def generate_macrodef_h(): # use GPU or not if ( input_data.GPU_Calculation == "yes"): - print( "#define USE_GPU", file=file1 ) + print( "//#define USE_GPU", file=file1 ) print( file=file1 ) elif ( input_data.GPU_Calculation == "no"): print( "//#define USE_GPU", file=file1 ) diff --git a/makefile_and_run.py b/makefile_and_run.py index 5682476..5d403a6 100755 --- a/makefile_and_run.py +++ b/makefile_and_run.py @@ -72,7 +72,7 @@ def makefile_ABE(): if (input_data.GPU_Calculation == "no"): makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off ABE" elif (input_data.GPU_Calculation == "yes"): - makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU" + makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off ABE_CUDA" else: print( " CPU/GPU numerical calculation setting is wrong " ) print( ) @@ -151,7 +151,7 @@ def run_ABE(): #mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" mpi_command_outfile = "ABE_out.log" elif (input_data.GPU_Calculation == "yes"): - mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU" + mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE_CUDA" mpi_command_outfile = "ABEGPU_out.log" ## Execute the MPI command and stream output