Fuse phases 5 and 6 for Gamma_rhs computation and optimize phases 8 and 9 for efficiency
This commit is contained in:
@@ -2946,6 +2946,96 @@ void kern_phase6_gamma_rhs_part1(
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 5+6 fused: raise A^ij in registers, then consume immediately in Gamma_rhs. */
|
||||
__global__ __launch_bounds__(128, 4)
|
||||
void kern_phase5_6_gamma_rhs_part1_fused(
|
||||
const double* __restrict__ Lapx, const double* __restrict__ Lapy,
|
||||
const double* __restrict__ Lapz,
|
||||
const double* __restrict__ alpn1, const double* __restrict__ chin1,
|
||||
const double* __restrict__ chix, const double* __restrict__ chiy,
|
||||
const double* __restrict__ chiz,
|
||||
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
|
||||
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
|
||||
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
|
||||
const double* __restrict__ Axx, const double* __restrict__ Axy,
|
||||
const double* __restrict__ Axz, const double* __restrict__ Ayy,
|
||||
const double* __restrict__ Ayz, const double* __restrict__ Azz,
|
||||
const double* __restrict__ Kx, const double* __restrict__ Ky,
|
||||
const double* __restrict__ Kz,
|
||||
const double* __restrict__ Sx, const double* __restrict__ Sy,
|
||||
const double* __restrict__ Sz,
|
||||
const double* __restrict__ Gxxx, const double* __restrict__ Gxxy,
|
||||
const double* __restrict__ Gxxz, const double* __restrict__ Gxyy,
|
||||
const double* __restrict__ Gxyz, const double* __restrict__ Gxzz,
|
||||
const double* __restrict__ Gyxx, const double* __restrict__ Gyxy,
|
||||
const double* __restrict__ Gyxz, const double* __restrict__ Gyyy,
|
||||
const double* __restrict__ Gyyz, const double* __restrict__ Gyzz,
|
||||
const double* __restrict__ Gzxx, const double* __restrict__ Gzxy,
|
||||
const double* __restrict__ Gzxz, const double* __restrict__ Gzyy,
|
||||
const double* __restrict__ Gzyz, const double* __restrict__ Gzzz,
|
||||
double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs,
|
||||
double* __restrict__ Gamz_rhs)
|
||||
{
|
||||
const double TWO = 2.0, F3o2 = 1.5, F2o3 = 2.0 / 3.0, EIGHT = 8.0;
|
||||
const double PI_V = 3.14159265358979323846;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < d_gp.all; i += blockDim.x * gridDim.x) {
|
||||
const double uxx = gupxx[i], uxy = gupxy[i], uxz = gupxz[i];
|
||||
const double uyy = gupyy[i], uyz = gupyz[i], uzz = gupzz[i];
|
||||
const double Axx_v = Axx[i], Axy_v = Axy[i], Axz_v = Axz[i];
|
||||
const double Ayy_v = Ayy[i], Ayz_v = Ayz[i], Azz_v = Azz[i];
|
||||
|
||||
const double Rxx_v = uxx * uxx * Axx_v + uxy * uxy * Ayy_v + uxz * uxz * Azz_v
|
||||
+ TWO * (uxx * uxy * Axy_v + uxx * uxz * Axz_v + uxy * uxz * Ayz_v);
|
||||
const double Ryy_v = uxy * uxy * Axx_v + uyy * uyy * Ayy_v + uyz * uyz * Azz_v
|
||||
+ TWO * (uxy * uyy * Axy_v + uxy * uyz * Axz_v + uyy * uyz * Ayz_v);
|
||||
const double Rzz_v = uxz * uxz * Axx_v + uyz * uyz * Ayy_v + uzz * uzz * Azz_v
|
||||
+ TWO * (uxz * uyz * Axy_v + uxz * uzz * Axz_v + uyz * uzz * Ayz_v);
|
||||
const double Rxy_v = uxx * uxy * Axx_v + uxy * uyy * Ayy_v + uxz * uyz * Azz_v
|
||||
+ (uxx * uyy + uxy * uxy) * Axy_v
|
||||
+ (uxx * uyz + uxz * uxy) * Axz_v
|
||||
+ (uxy * uyz + uxz * uyy) * Ayz_v;
|
||||
const double Rxz_v = uxx * uxz * Axx_v + uxy * uyz * Ayy_v + uxz * uzz * Azz_v
|
||||
+ (uxx * uyz + uxy * uxz) * Axy_v
|
||||
+ (uxx * uzz + uxz * uxz) * Axz_v
|
||||
+ (uxy * uzz + uxz * uyz) * Ayz_v;
|
||||
const double Ryz_v = uxy * uxz * Axx_v + uyy * uyz * Ayy_v + uyz * uzz * Azz_v
|
||||
+ (uxy * uyz + uyy * uxz) * Axy_v
|
||||
+ (uxy * uzz + uyz * uxz) * Axz_v
|
||||
+ (uyy * uzz + uyz * uyz) * Ayz_v;
|
||||
|
||||
const double lx = Lapx[i], ly = Lapy[i], lz = Lapz[i];
|
||||
const double a = alpn1[i], c1 = chin1[i];
|
||||
const double cx = chix[i], cy = chiy[i], cz = chiz[i];
|
||||
|
||||
Gamx_rhs[i] = -TWO * (lx * Rxx_v + ly * Rxy_v + lz * Rxz_v)
|
||||
+ TWO * a * (
|
||||
-F3o2 / c1 * (cx * Rxx_v + cy * Rxy_v + cz * Rxz_v)
|
||||
-uxx * (F2o3 * Kx[i] + EIGHT * PI_V * Sx[i])
|
||||
-uxy * (F2o3 * Ky[i] + EIGHT * PI_V * Sy[i])
|
||||
-uxz * (F2o3 * Kz[i] + EIGHT * PI_V * Sz[i])
|
||||
+ Gxxx[i] * Rxx_v + Gxyy[i] * Ryy_v + Gxzz[i] * Rzz_v
|
||||
+ TWO * (Gxxy[i] * Rxy_v + Gxxz[i] * Rxz_v + Gxyz[i] * Ryz_v));
|
||||
|
||||
Gamy_rhs[i] = -TWO * (lx * Rxy_v + ly * Ryy_v + lz * Ryz_v)
|
||||
+ TWO * a * (
|
||||
-F3o2 / c1 * (cx * Rxy_v + cy * Ryy_v + cz * Ryz_v)
|
||||
-uxy * (F2o3 * Kx[i] + EIGHT * PI_V * Sx[i])
|
||||
-uyy * (F2o3 * Ky[i] + EIGHT * PI_V * Sy[i])
|
||||
-uyz * (F2o3 * Kz[i] + EIGHT * PI_V * Sz[i])
|
||||
+ Gyxx[i] * Rxx_v + Gyyy[i] * Ryy_v + Gyzz[i] * Rzz_v
|
||||
+ TWO * (Gyxy[i] * Rxy_v + Gyxz[i] * Rxz_v + Gyyz[i] * Ryz_v));
|
||||
|
||||
Gamz_rhs[i] = -TWO * (lx * Rxz_v + ly * Ryz_v + lz * Rzz_v)
|
||||
+ TWO * a * (
|
||||
-F3o2 / c1 * (cx * Rxz_v + cy * Ryz_v + cz * Rzz_v)
|
||||
-uxz * (F2o3 * Kx[i] + EIGHT * PI_V * Sx[i])
|
||||
-uyz * (F2o3 * Ky[i] + EIGHT * PI_V * Sy[i])
|
||||
-uzz * (F2o3 * Kz[i] + EIGHT * PI_V * Sz[i])
|
||||
+ Gzxx[i] * Rxx_v + Gzyy[i] * Ryy_v + Gzzz[i] * Rzz_v
|
||||
+ TWO * (Gzxy[i] * Rxy_v + Gzxz[i] * Rxz_v + Gzyz[i] * Ryz_v));
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 8: Gamma_rhs part 2 — after fdderivs(beta) and fderivs(Gamma)
|
||||
* Computes: fxx=div(beta_xx), Gamxa, then updates Gamx_rhs etc.
|
||||
* Input arrays gxxx..gzzz here hold fdderivs(beta) results,
|
||||
@@ -3088,6 +3178,126 @@ void kern_phase9_christoffel_contract(
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 8+9 fused: update Gamma rhs, contract Gamma^a, and lower Christoffels in one pass. */
|
||||
__global__ __launch_bounds__(128, 2)
|
||||
void kern_phase8_9_gamma_rhs_contract_fused(
|
||||
const double* __restrict__ gupxx, const double* __restrict__ gupxy,
|
||||
const double* __restrict__ gupxz, const double* __restrict__ gupyy,
|
||||
const double* __restrict__ gupyz, const double* __restrict__ gupzz,
|
||||
const double* __restrict__ bxx_xx, const double* __restrict__ bxx_xy,
|
||||
const double* __restrict__ bxx_xz, const double* __restrict__ bxx_yy,
|
||||
const double* __restrict__ bxx_yz, const double* __restrict__ bxx_zz,
|
||||
const double* __restrict__ bxy_xx, const double* __restrict__ bxy_xy,
|
||||
const double* __restrict__ bxy_xz, const double* __restrict__ bxy_yy,
|
||||
const double* __restrict__ bxy_yz, const double* __restrict__ bxy_zz,
|
||||
const double* __restrict__ bxz_xx, const double* __restrict__ bxz_xy,
|
||||
const double* __restrict__ bxz_xz, const double* __restrict__ bxz_yy,
|
||||
const double* __restrict__ bxz_yz, const double* __restrict__ bxz_zz,
|
||||
const double* __restrict__ Gxxx, const double* __restrict__ Gxxy,
|
||||
const double* __restrict__ Gxxz, const double* __restrict__ Gxyy,
|
||||
const double* __restrict__ Gxyz, const double* __restrict__ Gxzz,
|
||||
const double* __restrict__ Gyxx, const double* __restrict__ Gyxy,
|
||||
const double* __restrict__ Gyxz, const double* __restrict__ Gyyy,
|
||||
const double* __restrict__ Gyyz, const double* __restrict__ Gyzz,
|
||||
const double* __restrict__ Gzxx, const double* __restrict__ Gzxy,
|
||||
const double* __restrict__ Gzxz, const double* __restrict__ Gzyy,
|
||||
const double* __restrict__ Gzyz, const double* __restrict__ Gzzz,
|
||||
const double* __restrict__ betaxx, const double* __restrict__ betaxy,
|
||||
const double* __restrict__ betaxz, const double* __restrict__ betayx,
|
||||
const double* __restrict__ betayy, const double* __restrict__ betayz,
|
||||
const double* __restrict__ betazx, const double* __restrict__ betazy,
|
||||
const double* __restrict__ betazz,
|
||||
const double* __restrict__ gxx, const double* __restrict__ gxy,
|
||||
const double* __restrict__ gxz, const double* __restrict__ gyy,
|
||||
const double* __restrict__ gyz, const double* __restrict__ gzz,
|
||||
double* __restrict__ Gamx_rhs, double* __restrict__ Gamy_rhs,
|
||||
double* __restrict__ Gamz_rhs,
|
||||
double* __restrict__ Gamxa_out, double* __restrict__ Gamya_out,
|
||||
double* __restrict__ Gamza_out,
|
||||
double* __restrict__ o_gxxx, double* __restrict__ o_gxyx,
|
||||
double* __restrict__ o_gxzx, double* __restrict__ o_gyyx,
|
||||
double* __restrict__ o_gyzx, double* __restrict__ o_gzzx,
|
||||
double* __restrict__ o_gxxy, double* __restrict__ o_gxyy,
|
||||
double* __restrict__ o_gxzy, double* __restrict__ o_gyyy,
|
||||
double* __restrict__ o_gyzy, double* __restrict__ o_gzzy,
|
||||
double* __restrict__ o_gxxz, double* __restrict__ o_gxyz,
|
||||
double* __restrict__ o_gxzz, double* __restrict__ o_gyyz,
|
||||
double* __restrict__ o_gyzz, double* __restrict__ o_gzzz)
|
||||
{
|
||||
const double TWO = 2.0, F2o3 = 2.0 / 3.0, F1o3 = 1.0 / 3.0;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < d_gp.all; i += blockDim.x * gridDim.x) {
|
||||
const double uxx = gupxx[i], uxy = gupxy[i], uxz = gupxz[i];
|
||||
const double uyy = gupyy[i], uyz = gupyz[i], uzz = gupzz[i];
|
||||
|
||||
const double Gxxx_v = Gxxx[i], Gxxy_v = Gxxy[i], Gxxz_v = Gxxz[i];
|
||||
const double Gxyy_v = Gxyy[i], Gxyz_v = Gxyz[i], Gxzz_v = Gxzz[i];
|
||||
const double Gyxx_v = Gyxx[i], Gyxy_v = Gyxy[i], Gyxz_v = Gyxz[i];
|
||||
const double Gyyy_v = Gyyy[i], Gyyz_v = Gyyz[i], Gyzz_v = Gyzz[i];
|
||||
const double Gzxx_v = Gzxx[i], Gzxy_v = Gzxy[i], Gzxz_v = Gzxz[i];
|
||||
const double Gzyy_v = Gzyy[i], Gzyz_v = Gzyz[i], Gzzz_v = Gzzz[i];
|
||||
|
||||
const double fxx_v = bxx_xx[i] + bxy_xy[i] + bxz_xz[i];
|
||||
const double fxy_v = bxx_xy[i] + bxy_yy[i] + bxz_yz[i];
|
||||
const double fxz_v = bxx_xz[i] + bxy_yz[i] + bxz_zz[i];
|
||||
|
||||
const double Ga_x = uxx * Gxxx_v + uyy * Gxyy_v + uzz * Gxzz_v
|
||||
+ TWO * (uxy * Gxxy_v + uxz * Gxxz_v + uyz * Gxyz_v);
|
||||
const double Ga_y = uxx * Gyxx_v + uyy * Gyyy_v + uzz * Gyzz_v
|
||||
+ TWO * (uxy * Gyxy_v + uxz * Gyxz_v + uyz * Gyyz_v);
|
||||
const double Ga_z = uxx * Gzxx_v + uyy * Gzyy_v + uzz * Gzzz_v
|
||||
+ TWO * (uxy * Gzxy_v + uxz * Gzxz_v + uyz * Gzyz_v);
|
||||
|
||||
Gamxa_out[i] = Ga_x;
|
||||
Gamya_out[i] = Ga_y;
|
||||
Gamza_out[i] = Ga_z;
|
||||
|
||||
const double betaxx_v = betaxx[i], betaxy_v = betaxy[i], betaxz_v = betaxz[i];
|
||||
const double betayx_v = betayx[i], betayy_v = betayy[i], betayz_v = betayz[i];
|
||||
const double betazx_v = betazx[i], betazy_v = betazy[i], betazz_v = betazz[i];
|
||||
const double db = betaxx_v + betayy_v + betazz_v;
|
||||
|
||||
Gamx_rhs[i] += F2o3 * Ga_x * db
|
||||
- Ga_x * betaxx_v - Ga_y * betaxy_v - Ga_z * betaxz_v
|
||||
+ F1o3 * (uxx * fxx_v + uxy * fxy_v + uxz * fxz_v)
|
||||
+ uxx * bxx_xx[i] + uyy * bxx_yy[i] + uzz * bxx_zz[i]
|
||||
+ TWO * (uxy * bxx_xy[i] + uxz * bxx_xz[i] + uyz * bxx_yz[i]);
|
||||
Gamy_rhs[i] += F2o3 * Ga_y * db
|
||||
- Ga_x * betayx_v - Ga_y * betayy_v - Ga_z * betayz_v
|
||||
+ F1o3 * (uxy * fxx_v + uyy * fxy_v + uyz * fxz_v)
|
||||
+ uxx * bxy_xx[i] + uyy * bxy_yy[i] + uzz * bxy_zz[i]
|
||||
+ TWO * (uxy * bxy_xy[i] + uxz * bxy_xz[i] + uyz * bxy_yz[i]);
|
||||
Gamz_rhs[i] += F2o3 * Ga_z * db
|
||||
- Ga_x * betazx_v - Ga_y * betazy_v - Ga_z * betazz_v
|
||||
+ F1o3 * (uxz * fxx_v + uyz * fxy_v + uzz * fxz_v)
|
||||
+ uxx * bxz_xx[i] + uyy * bxz_yy[i] + uzz * bxz_zz[i]
|
||||
+ TWO * (uxy * bxz_xy[i] + uxz * bxz_xz[i] + uyz * bxz_yz[i]);
|
||||
|
||||
const double g11 = gxx[i], g12 = gxy[i], g13 = gxz[i];
|
||||
const double g22 = gyy[i], g23 = gyz[i], g33 = gzz[i];
|
||||
|
||||
o_gxxx[i] = g11 * Gxxx_v + g12 * Gyxx_v + g13 * Gzxx_v;
|
||||
o_gxyx[i] = g11 * Gxxy_v + g12 * Gyxy_v + g13 * Gzxy_v;
|
||||
o_gxzx[i] = g11 * Gxxz_v + g12 * Gyxz_v + g13 * Gzxz_v;
|
||||
o_gyyx[i] = g11 * Gxyy_v + g12 * Gyyy_v + g13 * Gzyy_v;
|
||||
o_gyzx[i] = g11 * Gxyz_v + g12 * Gyyz_v + g13 * Gzyz_v;
|
||||
o_gzzx[i] = g11 * Gxzz_v + g12 * Gyzz_v + g13 * Gzzz_v;
|
||||
|
||||
o_gxxy[i] = g12 * Gxxx_v + g22 * Gyxx_v + g23 * Gzxx_v;
|
||||
o_gxyy[i] = g12 * Gxxy_v + g22 * Gyxy_v + g23 * Gzxy_v;
|
||||
o_gxzy[i] = g12 * Gxxz_v + g22 * Gyxz_v + g23 * Gzxz_v;
|
||||
o_gyyy[i] = g12 * Gxyy_v + g22 * Gyyy_v + g23 * Gzyy_v;
|
||||
o_gyzy[i] = g12 * Gxyz_v + g22 * Gyyz_v + g23 * Gzyz_v;
|
||||
o_gzzy[i] = g12 * Gxzz_v + g22 * Gyzz_v + g23 * Gzzz_v;
|
||||
|
||||
o_gxxz[i] = g13 * Gxxx_v + g23 * Gyxx_v + g33 * Gzxx_v;
|
||||
o_gxyz[i] = g13 * Gxxy_v + g23 * Gyxy_v + g33 * Gzxy_v;
|
||||
o_gxzz[i] = g13 * Gxxz_v + g23 * Gyxz_v + g33 * Gzxz_v;
|
||||
o_gyyz[i] = g13 * Gxyy_v + g23 * Gyyy_v + g33 * Gzyy_v;
|
||||
o_gyzz[i] = g13 * Gxyz_v + g23 * Gyyz_v + g33 * Gzyz_v;
|
||||
o_gzzz[i] = g13 * Gxzz_v + g23 * Gyzz_v + g33 * Gzzz_v;
|
||||
}
|
||||
}
|
||||
|
||||
/* Phase 10: After fdderivs of a metric component, contract with gup^{ij}
|
||||
* R_comp = gup^xx*fxx + gup^yy*fyy + gup^zz*fzz + 2*(gup^xy*fxy + gup^xz*fxz + gup^yz*fyz)
|
||||
*/
|
||||
@@ -4566,22 +4776,15 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
|
||||
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz));
|
||||
|
||||
kern_phase5_raise_A<<<grid(all),BLK>>>(
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
||||
D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz));
|
||||
|
||||
kern_phase6_gamma_rhs_part1<<<grid(all),BLK>>>(
|
||||
kern_phase5_6_gamma_rhs_part1_fused<<<grid(all),BLK>>>(
|
||||
D(S_Lapx), D(S_Lapy), D(S_Lapz),
|
||||
D(S_alpn1), D(S_chin1),
|
||||
D(S_chix), D(S_chiy), D(S_chiz),
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
||||
D(S_Kx), D(S_Ky), D(S_Kz),
|
||||
D(S_Sx), D(S_Sy), D(S_Sz),
|
||||
D(S_Rxx), D(S_Rxy), D(S_Rxz),
|
||||
D(S_Ryy), D(S_Ryz), D(S_Rzz),
|
||||
D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz),
|
||||
D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz),
|
||||
D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz),
|
||||
@@ -4623,15 +4826,12 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
soa_signs, all);
|
||||
}
|
||||
|
||||
kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>(
|
||||
kern_phase8_9_gamma_rhs_contract_fused<<<grid(all),BLK>>>(
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
|
||||
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
|
||||
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
@@ -4641,17 +4841,9 @@ static void launch_rhs_pipeline(int all, double eps, int co)
|
||||
D(S_betaxx),D(S_betaxy),D(S_betaxz),
|
||||
D(S_betayx),D(S_betayy),D(S_betayz),
|
||||
D(S_betazx),D(S_betazy),D(S_betazz),
|
||||
D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza));
|
||||
|
||||
kern_phase9_christoffel_contract<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
|
||||
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
|
||||
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
|
||||
D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz));
|
||||
@@ -5192,24 +5384,16 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_Gamzxx), D(S_Gamzxy), D(S_Gamzxz),
|
||||
D(S_Gamzyy), D(S_Gamzyz), D(S_Gamzzz));
|
||||
|
||||
/* Phase 5: Raise A index (stored in Rxx..Rzz temporarily) */
|
||||
kern_phase5_raise_A<<<grid(all),BLK>>>(
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
||||
D(S_Rxx), D(S_Rxy), D(S_Rxz), D(S_Ryy), D(S_Ryz), D(S_Rzz));
|
||||
|
||||
/* Phase 6: Gamma_rhs part 1 */
|
||||
kern_phase6_gamma_rhs_part1<<<grid(all),BLK>>>(
|
||||
/* Phase 5+6: raise A in registers, then build Gamma_rhs part 1 */
|
||||
kern_phase5_6_gamma_rhs_part1_fused<<<grid(all),BLK>>>(
|
||||
D(S_Lapx), D(S_Lapy), D(S_Lapz),
|
||||
D(S_alpn1), D(S_chin1),
|
||||
D(S_chix), D(S_chiy), D(S_chiz),
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_Axx), D(S_Axy), D(S_Axz), D(S_Ayy), D(S_Ayz), D(S_Azz),
|
||||
D(S_Kx), D(S_Ky), D(S_Kz),
|
||||
D(S_Sx), D(S_Sy), D(S_Sz),
|
||||
D(S_Rxx), D(S_Rxy), D(S_Rxz),
|
||||
D(S_Ryy), D(S_Ryz), D(S_Rzz),
|
||||
D(S_Gamxxx), D(S_Gamxxy), D(S_Gamxxz),
|
||||
D(S_Gamxyy), D(S_Gamxyz), D(S_Gamxzz),
|
||||
D(S_Gamyxx), D(S_Gamyxy), D(S_Gamyxz),
|
||||
@@ -5230,15 +5414,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
gpu_fderivs(D(S_Gamz), D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t), SYM,SYM,ANTI, all);
|
||||
|
||||
/* Phase 8: Gamma_rhs part 2 */
|
||||
kern_phase8_gamma_rhs_part2<<<grid(all),BLK>>>(
|
||||
kern_phase8_9_gamma_rhs_contract_fused<<<grid(all),BLK>>>(
|
||||
D(S_gupxx), D(S_gupxy), D(S_gupxz),
|
||||
D(S_gupyy), D(S_gupyz), D(S_gupzz),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz),
|
||||
D(S_Gamxx),D(S_Gamxy),D(S_Gamxz),
|
||||
D(S_Gamyx),D(S_Gamyy_t),D(S_Gamyz_t),
|
||||
D(S_Gamzx),D(S_Gamzy),D(S_Gamzz_t),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
@@ -5248,18 +5429,9 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
||||
D(S_betaxx),D(S_betaxy),D(S_betaxz),
|
||||
D(S_betayx),D(S_betayy),D(S_betayz),
|
||||
D(S_betazx),D(S_betazy),D(S_betazz),
|
||||
D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza));
|
||||
|
||||
/* Phase 9: Christoffel contract (lowered products for Ricci) */
|
||||
kern_phase9_christoffel_contract<<<grid(all),BLK>>>(
|
||||
D(S_gxx),D(S_gxy),D(S_gxz),D(S_gyy),D(S_gyz),D(S_gzz),
|
||||
D(S_Gamxxx),D(S_Gamxxy),D(S_Gamxxz),
|
||||
D(S_Gamxyy),D(S_Gamxyz),D(S_Gamxzz),
|
||||
D(S_Gamyxx),D(S_Gamyxy),D(S_Gamyxz),
|
||||
D(S_Gamyyy),D(S_Gamyyz),D(S_Gamyzz),
|
||||
D(S_Gamzxx),D(S_Gamzxy),D(S_Gamzxz),
|
||||
D(S_Gamzyy),D(S_Gamzyz),D(S_Gamzzz),
|
||||
D(S_Gamx_rhs),D(S_Gamy_rhs),D(S_Gamz_rhs),
|
||||
D(S_Gamxa),D(S_Gamya),D(S_Gamza),
|
||||
D(S_gxxx),D(S_gxyx),D(S_gxzx),D(S_gyyx),D(S_gyzx),D(S_gzzx),
|
||||
D(S_gxxy),D(S_gxyy),D(S_gxzy),D(S_gyyy),D(S_gyzy),D(S_gzzy),
|
||||
D(S_gxxz),D(S_gxyz),D(S_gxzz),D(S_gyyz),D(S_gyzz),D(S_gzzz));
|
||||
|
||||
Reference in New Issue
Block a user