// includes, system #include #include #include #include #include #include #include #include //#include "cutil.h" #include #include using namespace std; //includes, bssn #include "gpu_mem.h" #include "bssn_gpu.h" #ifdef RESULT_CHECK #include #endif void destroy_meta(Meta *meta); void compare_result_gpu(int ftag1,double * datac,int data_num){ #ifdef RESULT_CHECK double * data = (double*)malloc(sizeof(double)*data_num); cudaMemcpy(data, datac, data_num * sizeof(double), cudaMemcpyDeviceToHost); compare_result(ftag1,data,data_num); free(data); #else (void)ftag1; (void)datac; (void)data_num; #endif } namespace { int read_local_rank_from_env() { const char *keys[] = { "AMSS_NCKU_CUDA_LOCAL_RANK", "I_MPI_LOCAL_RANK", "OMPI_COMM_WORLD_LOCAL_RANK", "MPI_LOCALRANKID", "PMI_LOCAL_RANK", "SLURM_LOCALID" }; for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { const char *value = getenv(keys[i]); if (value && *value) return atoi(value); } return -1; } int read_forced_device_from_env() { const char *value = getenv("AMSS_NCKU_CUDA_DEVICE"); if (value && *value) return atoi(value); return -1; } int read_positive_env_value(const char *key) { const char *value = getenv(key); if (!value || !*value) return -1; const int parsed = atoi(value); return parsed > 0 ? parsed : -1; } int clamp_launch_block_dim(int requested, int max_threads_per_block) { if (requested <= 0) return 0; if (max_threads_per_block > 0 && requested > max_threads_per_block) requested = max_threads_per_block; requested = (requested / 32) * 32; if (requested <= 0) requested = max_threads_per_block >= 32 ? 32 : max_threads_per_block; return requested > 0 ? requested : BLOCK_DIM; } int select_cuda_device_for_process(int mpi_rank) { static int cached_device = -2; if (cached_device >= -1) return cached_device; int device_count = 0; cudaError_t err = cudaGetDeviceCount(&device_count); if (err != cudaSuccess || device_count <= 0) { cached_device = -1; return cached_device; } int device = read_forced_device_from_env(); if (device < 0) { int local_rank = read_local_rank_from_env(); if (local_rank < 0) local_rank = mpi_rank; device = local_rank % device_count; } if (device < 0) device = 0; if (device >= device_count) device %= device_count; err = cudaSetDevice(device); if (err != cudaSuccess) { cerr << "cudaSetDevice(" << device << ") failed: " << cudaGetErrorString(err) << endl; cached_device = -1; return cached_device; } cached_device = device; return cached_device; } struct BufferSpec { double **slot; size_t count; }; struct CopySpec { double *dst; const double *src; size_t count; }; struct ZeroSpec { double *ptr; size_t count; }; struct GpuRhsCache { Meta meta{}; int ex[3] = {0, 0, 0}; int matrix_size = 0; int device = -1; bool allocated = false; const double *last_x = nullptr; const double *last_y = nullptr; const double *last_z = nullptr; bool meta_uploaded = false; static const int max_mapped_buffers = 512; const double *host_buffers[max_mapped_buffers] = {nullptr}; const double *device_buffers[max_mapped_buffers] = {nullptr}; int mapped_buffer_count = 0; }; struct GpuRhsLaunchConfig { int device = -1; int sm_count = 0; int max_threads_per_block = 1024; int grid_dim = GRID_DIM; int block_dim = BLOCK_DIM; int step_size = GRID_DIM * BLOCK_DIM; int env_grid_dim = -1; int env_block_dim = -1; bool env_loaded = false; }; struct ExternalBufferRegistry { static const int max_mapped_buffers = 4096; const double *host_buffers[max_mapped_buffers] = {nullptr}; const double *device_buffers[max_mapped_buffers] = {nullptr}; int mapped_buffer_count = 0; }; struct OwnedBufferRegistry { static const int max_mapped_buffers = 4096; const double *host_buffers[max_mapped_buffers] = {nullptr}; double *device_buffers[max_mapped_buffers] = {nullptr}; size_t capacities[max_mapped_buffers] = {0}; bool valid[max_mapped_buffers] = {false}; int mapped_buffer_count = 0; }; struct PinnedHostRegistry { static const int max_buffers = 512; const double *host_buffers[max_buffers] = {nullptr}; size_t capacities[max_buffers] = {0}; bool registered[max_buffers] = {false}; bool failed[max_buffers] = {false}; int buffer_count = 0; }; GpuRhsCache &gpu_rhs_cache() { static GpuRhsCache cache; return cache; } GpuRhsLaunchConfig &gpu_rhs_launch_config() { static GpuRhsLaunchConfig config; return config; } ExternalBufferRegistry &external_buffer_registry() { static thread_local ExternalBufferRegistry registry; return registry; } OwnedBufferRegistry &owned_buffer_registry() { static thread_local OwnedBufferRegistry registry; return registry; } PinnedHostRegistry &pinned_host_registry() { static thread_local PinnedHostRegistry registry; return registry; } void reset_meta(Meta *meta) { memset(meta, 0, sizeof(Meta)); } int gpu_rhs_grid_dim() { return gpu_rhs_launch_config().grid_dim; } int gpu_rhs_block_dim() { return gpu_rhs_launch_config().block_dim; } void reset_buffer_map(GpuRhsCache &cache) { cache.mapped_buffer_count = 0; for (int i = 0; i < GpuRhsCache::max_mapped_buffers; ++i) { cache.host_buffers[i] = nullptr; cache.device_buffers[i] = nullptr; } } void map_buffer(GpuRhsCache &cache, const double *host_ptr, const double *device_ptr) { if (!host_ptr || !device_ptr) return; for (int i = 0; i < cache.mapped_buffer_count; ++i) { if (cache.host_buffers[i] == host_ptr) { cache.device_buffers[i] = device_ptr; return; } } if (cache.mapped_buffer_count >= GpuRhsCache::max_mapped_buffers) { cerr << "gpu RHS buffer registry exhausted at " << GpuRhsCache::max_mapped_buffers << " entries" << endl; return; } cache.host_buffers[cache.mapped_buffer_count] = host_ptr; cache.device_buffers[cache.mapped_buffer_count] = device_ptr; cache.mapped_buffer_count++; } void reset_external_buffer_map(ExternalBufferRegistry ®istry) { registry.mapped_buffer_count = 0; for (int i = 0; i < ExternalBufferRegistry::max_mapped_buffers; ++i) { registry.host_buffers[i] = nullptr; registry.device_buffers[i] = nullptr; } } void map_external_buffer(ExternalBufferRegistry ®istry, const double *host_ptr, const double *device_ptr) { if (!host_ptr || !device_ptr) return; for (int i = 0; i < registry.mapped_buffer_count; ++i) { if (registry.host_buffers[i] == host_ptr) { registry.device_buffers[i] = device_ptr; return; } } if (registry.mapped_buffer_count >= ExternalBufferRegistry::max_mapped_buffers) { cerr << "external CUDA buffer registry exhausted at " << ExternalBufferRegistry::max_mapped_buffers << " entries" << endl; return; } registry.host_buffers[registry.mapped_buffer_count] = host_ptr; registry.device_buffers[registry.mapped_buffer_count] = device_ptr; registry.mapped_buffer_count++; } void invalidate_owned_buffer_map(OwnedBufferRegistry ®istry) { for (int i = 0; i < registry.mapped_buffer_count; ++i) registry.valid[i] = false; } const double *find_owned_device_buffer(const OwnedBufferRegistry ®istry, const double *host_ptr) { if (!host_ptr) return nullptr; for (int i = 0; i < registry.mapped_buffer_count; ++i) { if (registry.valid[i] && registry.host_buffers[i] == host_ptr) return registry.device_buffers[i]; } return nullptr; } int find_owned_buffer_slot(OwnedBufferRegistry ®istry, const double *host_ptr) { int reusable_slot = -1; for (int i = 0; i < registry.mapped_buffer_count; ++i) { if (registry.host_buffers[i] == host_ptr) return i; if (!registry.valid[i] && reusable_slot < 0) reusable_slot = i; } if (reusable_slot >= 0) { registry.host_buffers[reusable_slot] = host_ptr; return reusable_slot; } if (registry.mapped_buffer_count >= OwnedBufferRegistry::max_mapped_buffers) return -1; const int slot = registry.mapped_buffer_count++; registry.host_buffers[slot] = host_ptr; registry.device_buffers[slot] = nullptr; registry.capacities[slot] = 0; registry.valid[slot] = false; return slot; } bool ensure_owned_buffer_capacity(OwnedBufferRegistry ®istry, int slot, size_t bytes) { if (slot < 0) return false; if (registry.device_buffers[slot] && registry.capacities[slot] >= bytes) return true; if (registry.device_buffers[slot]) { cudaError_t free_err = cudaFree(registry.device_buffers[slot]); if (free_err != cudaSuccess) { cerr << "cudaFree failed: " << cudaGetErrorString(free_err) << endl; return false; } registry.device_buffers[slot] = nullptr; registry.capacities[slot] = 0; } cudaError_t err = cudaMalloc((void **)®istry.device_buffers[slot], bytes); if (err != cudaSuccess) { cerr << "cudaMalloc failed: " << cudaGetErrorString(err) << endl; return false; } registry.capacities[slot] = bytes; return true; } bool prepare_owned_buffer(const double *host_ptr, size_t count, bool zero_fill) { if (!host_ptr || count == 0) return false; OwnedBufferRegistry ®istry = owned_buffer_registry(); const int slot = find_owned_buffer_slot(registry, host_ptr); if (slot < 0) { cerr << "owned CUDA buffer registry exhausted" << endl; return false; } const size_t bytes = count * sizeof(double); if (!ensure_owned_buffer_capacity(registry, slot, bytes)) return false; cudaError_t err = zero_fill ? cudaMemset(registry.device_buffers[slot], 0, bytes) : cudaMemcpy(registry.device_buffers[slot], host_ptr, bytes, cudaMemcpyHostToDevice); if (err != cudaSuccess) { cerr << (zero_fill ? "cudaMemset" : "cudaMemcpy(H2D)") << " failed: " << cudaGetErrorString(err) << endl; return false; } registry.valid[slot] = true; return true; } int find_pinned_host_slot(PinnedHostRegistry ®istry, const double *host_ptr) { for (int i = 0; i < registry.buffer_count; ++i) { if (registry.host_buffers[i] == host_ptr) return i; } if (registry.buffer_count >= PinnedHostRegistry::max_buffers) return -1; const int slot = registry.buffer_count++; registry.host_buffers[slot] = host_ptr; registry.capacities[slot] = 0; registry.registered[slot] = false; registry.failed[slot] = false; return slot; } void ensure_host_buffer_registered(const double *host_ptr, size_t bytes) { if (!host_ptr || bytes == 0) return; PinnedHostRegistry ®istry = pinned_host_registry(); const int slot = find_pinned_host_slot(registry, host_ptr); if (slot < 0) return; if (registry.registered[slot] && registry.capacities[slot] >= bytes) return; if (registry.failed[slot] && registry.capacities[slot] >= bytes) return; if (registry.registered[slot]) { cudaError_t unreg_err = cudaHostUnregister(const_cast(registry.host_buffers[slot])); if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered) cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl; registry.registered[slot] = false; } cudaError_t err = cudaHostRegister(const_cast(host_ptr), bytes, cudaHostRegisterPortable); if (err == cudaSuccess || err == cudaErrorHostMemoryAlreadyRegistered) { registry.registered[slot] = true; registry.failed[slot] = false; registry.capacities[slot] = bytes; return; } cerr << "cudaHostRegister failed: " << cudaGetErrorString(err) << endl; registry.failed[slot] = true; registry.capacities[slot] = bytes; } bool ensure_device_buffer(double **ptr, size_t count) { if (*ptr) return true; cudaError_t err = cudaMalloc((void **)ptr, count * sizeof(double)); if (err != cudaSuccess) { cerr << "cudaMalloc failed: " << cudaGetErrorString(err) << endl; return false; } return true; } bool allocate_buffers(const BufferSpec *specs, size_t count) { for (size_t i = 0; i < count; ++i) { if (!ensure_device_buffer(specs[i].slot, specs[i].count)) return false; } return true; } bool copy_buffers_to_device(const CopySpec *specs, size_t count) { for (size_t i = 0; i < count; ++i) { cudaError_t err = cudaMemcpy(specs[i].dst, specs[i].src, specs[i].count * sizeof(double), cudaMemcpyHostToDevice); if (err != cudaSuccess) { cerr << "cudaMemcpy(H2D) failed: " << cudaGetErrorString(err) << endl; return false; } } return true; } bool copy_buffer_to_device(double *dst, const double *src, size_t count) { cudaError_t err = cudaMemcpy(dst, src, count * sizeof(double), cudaMemcpyHostToDevice); if (err != cudaSuccess) { cerr << "cudaMemcpy(H2D) failed: " << cudaGetErrorString(err) << endl; return false; } return true; } const double *find_external_device_buffer(const ExternalBufferRegistry ®istry, const double *host_ptr) { if (!host_ptr) return nullptr; for (int i = 0; i < registry.mapped_buffer_count; ++i) { if (registry.host_buffers[i] == host_ptr) return registry.device_buffers[i]; } return nullptr; } bool copy_buffers_to_device_preferring_device(const CopySpec *specs, size_t count) { for (size_t i = 0; i < count; ++i) { const double *device_src = bssn_gpu_find_device_buffer(specs[i].src); cudaMemcpyKind kind = cudaMemcpyHostToDevice; const void *copy_src = specs[i].src; if (device_src) { copy_src = device_src; kind = cudaMemcpyDeviceToDevice; } cudaError_t err = cudaMemcpy(specs[i].dst, copy_src, specs[i].count * sizeof(double), kind); if (err != cudaSuccess) { cerr << "cudaMemcpy(" << (kind == cudaMemcpyDeviceToDevice ? "D2D" : "H2D") << ") failed: " << cudaGetErrorString(err) << endl; return false; } } return true; } bool zero_buffers(const ZeroSpec *specs, size_t count) { for (size_t i = 0; i < count; ++i) { cudaError_t err = cudaMemset(specs[i].ptr, 0, specs[i].count * sizeof(double)); if (err != cudaSuccess) { cerr << "cudaMemset failed: " << cudaGetErrorString(err) << endl; return false; } } return true; } void cleanup_gpu_rhs_cache() { GpuRhsCache &cache = gpu_rhs_cache(); OwnedBufferRegistry &owned = owned_buffer_registry(); PinnedHostRegistry &pinned = pinned_host_registry(); if (!cache.allocated) { for (int i = 0; i < owned.mapped_buffer_count; ++i) { if (owned.device_buffers[i]) { cudaError_t free_err = cudaFree(owned.device_buffers[i]); if (free_err != cudaSuccess) cerr << "cudaFree failed: " << cudaGetErrorString(free_err) << endl; } owned.device_buffers[i] = nullptr; owned.capacities[i] = 0; owned.valid[i] = false; owned.host_buffers[i] = nullptr; } owned.mapped_buffer_count = 0; for (int i = 0; i < pinned.buffer_count; ++i) { if (pinned.registered[i] && pinned.host_buffers[i]) { cudaError_t unreg_err = cudaHostUnregister(const_cast(pinned.host_buffers[i])); if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered) cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl; } pinned.host_buffers[i] = nullptr; pinned.capacities[i] = 0; pinned.registered[i] = false; pinned.failed[i] = false; } pinned.buffer_count = 0; return; } if (cache.device >= 0) cudaSetDevice(cache.device); destroy_meta(&cache.meta); reset_meta(&cache.meta); cache.ex[0] = cache.ex[1] = cache.ex[2] = 0; cache.matrix_size = 0; cache.device = -1; cache.allocated = false; cache.last_x = nullptr; cache.last_y = nullptr; cache.last_z = nullptr; reset_buffer_map(cache); reset_external_buffer_map(external_buffer_registry()); for (int i = 0; i < owned.mapped_buffer_count; ++i) { if (owned.device_buffers[i]) { cudaError_t free_err = cudaFree(owned.device_buffers[i]); if (free_err != cudaSuccess) cerr << "cudaFree failed: " << cudaGetErrorString(free_err) << endl; } owned.device_buffers[i] = nullptr; owned.capacities[i] = 0; owned.valid[i] = false; owned.host_buffers[i] = nullptr; } owned.mapped_buffer_count = 0; for (int i = 0; i < pinned.buffer_count; ++i) { if (pinned.registered[i] && pinned.host_buffers[i]) { cudaError_t unreg_err = cudaHostUnregister(const_cast(pinned.host_buffers[i])); if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered) cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl; } pinned.host_buffers[i] = nullptr; pinned.capacities[i] = 0; pinned.registered[i] = false; pinned.failed[i] = false; } pinned.buffer_count = 0; } bool register_gpu_rhs_cleanup() { static bool registered = false; if (!registered) { atexit(cleanup_gpu_rhs_cache); registered = true; } return true; } void ensure_gpu_rhs_invariant_symbols() { static bool initialized = false; if (initialized) return; double F1o3h = 1.0 / 3.0; double F2o3h = 2.0 / 3.0; double F1o6h = 1.0 / 6.0; double PIh = M_PI; int step = GRID_DIM * BLOCK_DIM; cudaMemcpyToSymbol(F1o3, &F1o3h, sizeof(double)); cudaMemcpyToSymbol(F2o3, &F2o3h, sizeof(double)); cudaMemcpyToSymbol(F1o6, &F1o6h, sizeof(double)); cudaMemcpyToSymbol(PI, &PIh, sizeof(double)); cudaMemcpyToSymbol(STEP_SIZE, &step, sizeof(int)); initialized = true; } bool ensure_gpu_rhs_launch_symbols(int device, int matrix_size) { GpuRhsLaunchConfig &config = gpu_rhs_launch_config(); if (!config.env_loaded) { config.env_grid_dim = read_positive_env_value("AMSS_GPU_GRID_DIM"); config.env_block_dim = read_positive_env_value("AMSS_GPU_BLOCK_DIM"); config.env_loaded = true; } if (config.device != device || config.sm_count <= 0) { cudaDeviceProp prop; cudaError_t err = cudaGetDeviceProperties(&prop, device); if (err != cudaSuccess) { cerr << "cudaGetDeviceProperties(" << device << ") failed: " << cudaGetErrorString(err) << endl; return false; } config.device = device; config.sm_count = prop.multiProcessorCount; config.max_threads_per_block = prop.maxThreadsPerBlock; } int block_dim = clamp_launch_block_dim(config.env_block_dim > 0 ? config.env_block_dim : 256, config.max_threads_per_block); if (block_dim <= 0) block_dim = BLOCK_DIM; int grid_dim = 1; if (config.env_grid_dim > 0) { grid_dim = config.env_grid_dim; } else { int needed_blocks = (matrix_size + block_dim - 1) / block_dim; int grid_cap = config.sm_count > 0 ? config.sm_count * 4 : GRID_DIM; if (grid_cap < 64) grid_cap = 64; if (grid_cap > 512) grid_cap = 512; grid_dim = needed_blocks < grid_cap ? needed_blocks : grid_cap; } if (grid_dim <= 0) grid_dim = 1; const int step_size = grid_dim * block_dim; if (config.step_size != step_size) { cudaError_t err = cudaMemcpyToSymbol(STEP_SIZE, &step_size, sizeof(int)); if (err != cudaSuccess) { cerr << "cudaMemcpyToSymbol(STEP_SIZE) failed: " << cudaGetErrorString(err) << endl; return false; } config.step_size = step_size; } config.grid_dim = grid_dim; config.block_dim = block_dim; return true; } bool prepare_gpu_rhs_cache(GpuRhsCache &cache, int device, int *ex) { register_gpu_rhs_cleanup(); ensure_gpu_rhs_invariant_symbols(); const bool shape_changed = !cache.allocated || cache.device != device || cache.ex[0] != ex[0] || cache.ex[1] != ex[1] || cache.ex[2] != ex[2]; if (!shape_changed) return true; if (cache.allocated) { if (cache.device >= 0) cudaSetDevice(cache.device); destroy_meta(&cache.meta); reset_meta(&cache.meta); } cache.device = device; cache.ex[0] = ex[0]; cache.ex[1] = ex[1]; cache.ex[2] = ex[2]; cache.matrix_size = ex[0] * ex[1] * ex[2]; cache.last_x = nullptr; cache.last_y = nullptr; cache.last_z = nullptr; cache.meta_uploaded = false; reset_buffer_map(cache); Meta *meta = &cache.meta; const int matrix_size = cache.matrix_size; const size_t fh_size = static_cast(ex[0] + 2) * static_cast(ex[1] + 2) * static_cast(ex[2] + 2); const size_t fh2_size = static_cast(ex[0] + 3) * static_cast(ex[1] + 3) * static_cast(ex[2] + 3); const BufferSpec buffers[] = { {&meta->X, static_cast(ex[0])}, {&meta->Y, static_cast(ex[1])}, {&meta->Z, static_cast(ex[2])}, {&meta->chi, static_cast(matrix_size)}, {&meta->dxx, static_cast(matrix_size)}, {&meta->dyy, static_cast(matrix_size)}, {&meta->dzz, static_cast(matrix_size)}, {&meta->trK, static_cast(matrix_size)}, {&meta->gxy, static_cast(matrix_size)}, {&meta->gxz, static_cast(matrix_size)}, {&meta->gyz, static_cast(matrix_size)}, {&meta->Axx, static_cast(matrix_size)}, {&meta->Axy, static_cast(matrix_size)}, {&meta->Axz, static_cast(matrix_size)}, {&meta->Ayz, static_cast(matrix_size)}, {&meta->Ayy, static_cast(matrix_size)}, {&meta->Azz, static_cast(matrix_size)}, {&meta->Gamx, static_cast(matrix_size)}, {&meta->Gamy, static_cast(matrix_size)}, {&meta->Gamz, static_cast(matrix_size)}, {&meta->Lap, static_cast(matrix_size)}, {&meta->betax, static_cast(matrix_size)}, {&meta->betay, static_cast(matrix_size)}, {&meta->betaz, static_cast(matrix_size)}, {&meta->dtSfx, static_cast(matrix_size)}, {&meta->dtSfy, static_cast(matrix_size)}, {&meta->dtSfz, static_cast(matrix_size)}, {&meta->chi_rhs, static_cast(matrix_size)}, {&meta->trK_rhs, static_cast(matrix_size)}, {&meta->gxx_rhs, static_cast(matrix_size)}, {&meta->gxy_rhs, static_cast(matrix_size)}, {&meta->gxz_rhs, static_cast(matrix_size)}, {&meta->gyy_rhs, static_cast(matrix_size)}, {&meta->gyz_rhs, static_cast(matrix_size)}, {&meta->gzz_rhs, static_cast(matrix_size)}, {&meta->Axx_rhs, static_cast(matrix_size)}, {&meta->Axy_rhs, static_cast(matrix_size)}, {&meta->Axz_rhs, static_cast(matrix_size)}, {&meta->Ayy_rhs, static_cast(matrix_size)}, {&meta->Ayz_rhs, static_cast(matrix_size)}, {&meta->Azz_rhs, static_cast(matrix_size)}, {&meta->Gamx_rhs, static_cast(matrix_size)}, {&meta->Gamy_rhs, static_cast(matrix_size)}, {&meta->Gamz_rhs, static_cast(matrix_size)}, {&meta->Lap_rhs, static_cast(matrix_size)}, {&meta->betax_rhs, static_cast(matrix_size)}, {&meta->betay_rhs, static_cast(matrix_size)}, {&meta->betaz_rhs, static_cast(matrix_size)}, {&meta->dtSfx_rhs, static_cast(matrix_size)}, {&meta->dtSfy_rhs, static_cast(matrix_size)}, {&meta->dtSfz_rhs, static_cast(matrix_size)}, {&meta->rho, static_cast(matrix_size)}, {&meta->Sx, static_cast(matrix_size)}, {&meta->Sy, static_cast(matrix_size)}, {&meta->Sz, static_cast(matrix_size)}, {&meta->Sxx, static_cast(matrix_size)}, {&meta->Sxy, static_cast(matrix_size)}, {&meta->Sxz, static_cast(matrix_size)}, {&meta->Syy, static_cast(matrix_size)}, {&meta->Syz, static_cast(matrix_size)}, {&meta->Szz, static_cast(matrix_size)}, {&meta->Gamxxx, static_cast(matrix_size)}, {&meta->Gamxxy, static_cast(matrix_size)}, {&meta->Gamxxz, static_cast(matrix_size)}, {&meta->Gamxyy, static_cast(matrix_size)}, {&meta->Gamxyz, static_cast(matrix_size)}, {&meta->Gamxzz, static_cast(matrix_size)}, {&meta->Gamyxx, static_cast(matrix_size)}, {&meta->Gamyxy, static_cast(matrix_size)}, {&meta->Gamyxz, static_cast(matrix_size)}, {&meta->Gamyyy, static_cast(matrix_size)}, {&meta->Gamyyz, static_cast(matrix_size)}, {&meta->Gamyzz, static_cast(matrix_size)}, {&meta->Gamzxx, static_cast(matrix_size)}, {&meta->Gamzxy, static_cast(matrix_size)}, {&meta->Gamzxz, static_cast(matrix_size)}, {&meta->Gamzyy, static_cast(matrix_size)}, {&meta->Gamzyz, static_cast(matrix_size)}, {&meta->Gamzzz, static_cast(matrix_size)}, {&meta->Rxx, static_cast(matrix_size)}, {&meta->Rxy, static_cast(matrix_size)}, {&meta->Rxz, static_cast(matrix_size)}, {&meta->Ryy, static_cast(matrix_size)}, {&meta->Ryz, static_cast(matrix_size)}, {&meta->Rzz, static_cast(matrix_size)}, {&meta->ham_Res, static_cast(matrix_size)}, {&meta->movx_Res, static_cast(matrix_size)}, {&meta->movy_Res, static_cast(matrix_size)}, {&meta->movz_Res, static_cast(matrix_size)}, {&meta->Gmx_Res, static_cast(matrix_size)}, {&meta->Gmy_Res, static_cast(matrix_size)}, {&meta->Gmz_Res, static_cast(matrix_size)}, {&meta->gxx, static_cast(matrix_size)}, {&meta->gyy, static_cast(matrix_size)}, {&meta->gzz, static_cast(matrix_size)}, {&meta->chix, static_cast(matrix_size)}, {&meta->chiy, static_cast(matrix_size)}, {&meta->chiz, static_cast(matrix_size)}, {&meta->gxxx, static_cast(matrix_size)}, {&meta->gxyx, static_cast(matrix_size)}, {&meta->gxzx, static_cast(matrix_size)}, {&meta->gyyx, static_cast(matrix_size)}, {&meta->gyzx, static_cast(matrix_size)}, {&meta->gzzx, static_cast(matrix_size)}, {&meta->gxxy, static_cast(matrix_size)}, {&meta->gxyy, static_cast(matrix_size)}, {&meta->gxzy, static_cast(matrix_size)}, {&meta->gyyy, static_cast(matrix_size)}, {&meta->gyzy, static_cast(matrix_size)}, {&meta->gzzy, static_cast(matrix_size)}, {&meta->gxxz, static_cast(matrix_size)}, {&meta->gxyz, static_cast(matrix_size)}, {&meta->gxzz, static_cast(matrix_size)}, {&meta->gyyz, static_cast(matrix_size)}, {&meta->gyzz, static_cast(matrix_size)}, {&meta->gzzz, static_cast(matrix_size)}, {&meta->Lapx, static_cast(matrix_size)}, {&meta->Lapy, static_cast(matrix_size)}, {&meta->Lapz, static_cast(matrix_size)}, {&meta->betaxx, static_cast(matrix_size)}, {&meta->betaxy, static_cast(matrix_size)}, {&meta->betaxz, static_cast(matrix_size)}, {&meta->betayy, static_cast(matrix_size)}, {&meta->betayz, static_cast(matrix_size)}, {&meta->betazz, static_cast(matrix_size)}, {&meta->betayx, static_cast(matrix_size)}, {&meta->betazy, static_cast(matrix_size)}, {&meta->betazx, static_cast(matrix_size)}, {&meta->Kx, static_cast(matrix_size)}, {&meta->Ky, static_cast(matrix_size)}, {&meta->Kz, static_cast(matrix_size)}, {&meta->Gamxx, static_cast(matrix_size)}, {&meta->Gamxy, static_cast(matrix_size)}, {&meta->Gamxz, static_cast(matrix_size)}, {&meta->Gamyy, static_cast(matrix_size)}, {&meta->Gamyz, static_cast(matrix_size)}, {&meta->Gamzz, static_cast(matrix_size)}, {&meta->Gamyx, static_cast(matrix_size)}, {&meta->Gamzy, static_cast(matrix_size)}, {&meta->Gamzx, static_cast(matrix_size)}, {&meta->div_beta, static_cast(matrix_size)}, {&meta->S, static_cast(matrix_size)}, {&meta->f, static_cast(matrix_size)}, {&meta->fxx, static_cast(matrix_size)}, {&meta->fxy, static_cast(matrix_size)}, {&meta->fxz, static_cast(matrix_size)}, {&meta->fyy, static_cast(matrix_size)}, {&meta->fyz, static_cast(matrix_size)}, {&meta->fzz, static_cast(matrix_size)}, {&meta->gupxx, static_cast(matrix_size)}, {&meta->gupxy, static_cast(matrix_size)}, {&meta->gupxz, static_cast(matrix_size)}, {&meta->gupyy, static_cast(matrix_size)}, {&meta->gupyz, static_cast(matrix_size)}, {&meta->gupzz, static_cast(matrix_size)}, {&meta->Gamxa, static_cast(matrix_size)}, {&meta->Gamya, static_cast(matrix_size)}, {&meta->Gamza, static_cast(matrix_size)}, {&meta->alpn1, static_cast(matrix_size)}, {&meta->chin1, static_cast(matrix_size)}, {&meta->fh, fh_size}, {&meta->fh2, fh2_size}, #if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) {&meta->reta, static_cast(matrix_size)}, #endif }; if (!allocate_buffers(buffers, sizeof(buffers) / sizeof(buffers[0]))) { destroy_meta(meta); reset_meta(meta); cache.allocated = false; cache.last_x = nullptr; cache.last_y = nullptr; cache.last_z = nullptr; reset_buffer_map(cache); return false; } cudaMemcpyToSymbol(metac, meta, sizeof(Meta)); int _1d_size[4]; int _2d_size[4]; int _3d_size[4]; for (int i = 0; i < 4; ++i) { _1d_size[i] = ex[0] + i; _2d_size[i] = _1d_size[i] * (ex[1] + i); _3d_size[i] = _2d_size[i] * (ex[2] + i); } cudaMemcpyToSymbol(ex_c, ex, 3 * sizeof(int)); cudaMemcpyToSymbol(_1D_SIZE, _1d_size, 4 * sizeof(int)); cudaMemcpyToSymbol(_2D_SIZE, _2d_size, 4 * sizeof(int)); cudaMemcpyToSymbol(_3D_SIZE, _3d_size, 4 * sizeof(int)); cache.allocated = true; cache.meta_uploaded = true; return true; } const double *find_mapped_device_buffer(const GpuRhsCache &cache, const double *host_ptr) { if (!host_ptr) return nullptr; for (int i = 0; i < cache.mapped_buffer_count; ++i) { if (cache.host_buffers[i] == host_ptr) return cache.device_buffers[i]; } return nullptr; } } // namespace const double *bssn_gpu_find_device_buffer(const double *host_ptr) { const double *device_ptr = find_external_device_buffer(external_buffer_registry(), host_ptr); if (device_ptr) return device_ptr; device_ptr = find_owned_device_buffer(owned_buffer_registry(), host_ptr); if (device_ptr) return device_ptr; return find_mapped_device_buffer(gpu_rhs_cache(), host_ptr); } int bssn_gpu_bind_process_device(int mpi_rank) { const int device = select_cuda_device_for_process(mpi_rank); if (device < 0) return 1; cudaError_t err = cudaSetDevice(device); if (err != cudaSuccess) { cerr << "cudaSetDevice(" << device << ") failed: " << cudaGetErrorString(err) << endl; return 1; } return 0; } void bssn_gpu_clear_cached_device_buffers() { reset_external_buffer_map(external_buffer_registry()); reset_buffer_map(gpu_rhs_cache()); invalidate_owned_buffer_map(owned_buffer_registry()); } void bssn_gpu_release_pinned_host_buffers() { PinnedHostRegistry &pinned = pinned_host_registry(); for (int i = 0; i < pinned.buffer_count; ++i) { if (pinned.registered[i] && pinned.host_buffers[i]) { cudaError_t unreg_err = cudaHostUnregister(const_cast(pinned.host_buffers[i])); if (unreg_err != cudaSuccess && unreg_err != cudaErrorHostMemoryNotRegistered) cerr << "cudaHostUnregister failed: " << cudaGetErrorString(unreg_err) << endl; } pinned.host_buffers[i] = nullptr; pinned.capacities[i] = 0; pinned.registered[i] = false; pinned.failed[i] = false; } pinned.buffer_count = 0; } void bssn_gpu_register_device_buffer(const double *host_ptr, const double *device_ptr) { map_external_buffer(external_buffer_registry(), host_ptr, device_ptr); } void bssn_gpu_prepare_host_buffer(const double *host_ptr, int count) { if (count > 0) ensure_host_buffer_registered(host_ptr, static_cast(count) * sizeof(double)); } int bssn_gpu_stage_upload_buffer(const double *host_ptr, int count) { bssn_gpu_prepare_host_buffer(host_ptr, count); return prepare_owned_buffer(host_ptr, static_cast(count), false) ? 0 : 1; } int bssn_gpu_stage_zero_buffer(const double *host_ptr, int count) { bssn_gpu_prepare_host_buffer(host_ptr, count); return prepare_owned_buffer(host_ptr, static_cast(count), true) ? 0 : 1; } int bssn_gpu_stage_upload_region(const double *host_ptr, const int *full_shape, const double *full_llb, const double *full_uub, const int *region_shape, const double *region_llb) { if (!host_ptr || !full_shape || !full_llb || !full_uub || !region_shape || !region_llb) return 1; const double *device_ptr = bssn_gpu_find_device_buffer(host_ptr); if (!device_ptr) return 1; int full_count = 1; for (int i = 0; i < 3; ++i) full_count *= full_shape[i]; bssn_gpu_prepare_host_buffer(host_ptr, full_count); int start[3] = {0, 0, 0}; for (int i = 0; i < 3; ++i) { if (full_shape[i] <= 0 || region_shape[i] <= 0) return 1; #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined #endif const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i] - 1); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #ifdef Cell const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i]); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #error Not define Vertex nor Cell #endif #endif if (start[i] < 0 || start[i] + region_shape[i] > full_shape[i]) return 1; } cudaMemcpy3DParms parms = {}; parms.srcPtr = make_cudaPitchedPtr(const_cast(host_ptr), static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.dstPtr = make_cudaPitchedPtr(const_cast(device_ptr), static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.srcPos = make_cudaPos(static_cast(start[0]) * sizeof(double), static_cast(start[1]), static_cast(start[2])); parms.dstPos = parms.srcPos; parms.extent = make_cudaExtent(static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[1]), static_cast(region_shape[2])); parms.kind = cudaMemcpyHostToDevice; cudaError_t err = cudaMemcpy3D(&parms); if (err != cudaSuccess) { cerr << "cudaMemcpy3D(H2D region) failed: " << cudaGetErrorString(err) << endl; return 1; } return 0; } int bssn_gpu_stage_download_region(double *host_ptr, const int *full_shape, const double *full_llb, const double *full_uub, const int *region_shape, const double *region_llb) { if (!host_ptr || !full_shape || !full_llb || !full_uub || !region_shape || !region_llb) return 1; const double *device_ptr = bssn_gpu_find_device_buffer(host_ptr); if (!device_ptr) return 1; int full_count = 1; for (int i = 0; i < 3; ++i) full_count *= full_shape[i]; bssn_gpu_prepare_host_buffer(host_ptr, full_count); int start[3] = {0, 0, 0}; for (int i = 0; i < 3; ++i) { if (full_shape[i] <= 0 || region_shape[i] <= 0) return 1; #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined #endif const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i] - 1); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #ifdef Cell const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i]); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #error Not define Vertex nor Cell #endif #endif if (start[i] < 0 || start[i] + region_shape[i] > full_shape[i]) return 1; } cudaMemcpy3DParms parms = {}; parms.srcPtr = make_cudaPitchedPtr(const_cast(device_ptr), static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.dstPtr = make_cudaPitchedPtr(host_ptr, static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.srcPos = make_cudaPos(static_cast(start[0]) * sizeof(double), static_cast(start[1]), static_cast(start[2])); parms.dstPos = parms.srcPos; parms.extent = make_cudaExtent(static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[1]), static_cast(region_shape[2])); parms.kind = cudaMemcpyDeviceToHost; cudaError_t err = cudaMemcpy3D(&parms); if (err != cudaSuccess) { cerr << "cudaMemcpy3D(D2H region) failed: " << cudaGetErrorString(err) << endl; return 1; } return 0; } int bssn_gpu_stage_download_region_to_buffer(const double *host_src_ptr, const int *full_shape, const double *full_llb, const double *full_uub, const int *region_shape, const double *region_llb, double *host_dst_ptr) { if (!host_src_ptr || !host_dst_ptr || !full_shape || !full_llb || !full_uub || !region_shape || !region_llb) return 1; const double *device_ptr = bssn_gpu_find_device_buffer(host_src_ptr); if (!device_ptr) return 1; int start[3] = {0, 0, 0}; for (int i = 0; i < 3; ++i) { if (full_shape[i] <= 0 || region_shape[i] <= 0) return 1; #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined #endif const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i] - 1); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #ifdef Cell const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i]); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #error Not define Vertex nor Cell #endif #endif if (start[i] < 0 || start[i] + region_shape[i] > full_shape[i]) return 1; } cudaMemcpy3DParms parms = {}; parms.srcPtr = make_cudaPitchedPtr(const_cast(device_ptr), static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.dstPtr = make_cudaPitchedPtr(host_dst_ptr, static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[0]), static_cast(region_shape[1])); parms.srcPos = make_cudaPos(static_cast(start[0]) * sizeof(double), static_cast(start[1]), static_cast(start[2])); parms.dstPos = make_cudaPos(0, 0, 0); parms.extent = make_cudaExtent(static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[1]), static_cast(region_shape[2])); parms.kind = cudaMemcpyDeviceToHost; cudaError_t err = cudaMemcpy3D(&parms); if (err != cudaSuccess) { cerr << "cudaMemcpy3D(D2H region->buffer) failed: " << cudaGetErrorString(err) << endl; return 1; } return 0; } int bssn_gpu_stage_upload_buffer_to_region(const double *host_src_ptr, double *host_dst_ptr, const int *full_shape, const double *full_llb, const double *full_uub, const int *region_shape, const double *region_llb) { if (!host_src_ptr || !host_dst_ptr || !full_shape || !full_llb || !full_uub || !region_shape || !region_llb) return 1; const double *device_ptr = bssn_gpu_find_device_buffer(host_dst_ptr); if (!device_ptr) return 1; int start[3] = {0, 0, 0}; for (int i = 0; i < 3; ++i) { if (full_shape[i] <= 0 || region_shape[i] <= 0) return 1; #ifdef Vertex #ifdef Cell #error Both Cell and Vertex are defined #endif const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i] - 1); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #ifdef Cell const double dx = (full_uub[i] - full_llb[i]) / static_cast(full_shape[i]); start[i] = static_cast((region_llb[i] - full_llb[i]) / dx + 0.4); #else #error Not define Vertex nor Cell #endif #endif if (start[i] < 0 || start[i] + region_shape[i] > full_shape[i]) return 1; } cudaMemcpy3DParms parms = {}; parms.srcPtr = make_cudaPitchedPtr(const_cast(host_src_ptr), static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[0]), static_cast(region_shape[1])); parms.dstPtr = make_cudaPitchedPtr(const_cast(device_ptr), static_cast(full_shape[0]) * sizeof(double), static_cast(full_shape[0]), static_cast(full_shape[1])); parms.srcPos = make_cudaPos(0, 0, 0); parms.dstPos = make_cudaPos(static_cast(start[0]) * sizeof(double), static_cast(start[1]), static_cast(start[2])); parms.extent = make_cudaExtent(static_cast(region_shape[0]) * sizeof(double), static_cast(region_shape[1]), static_cast(region_shape[2])); parms.kind = cudaMemcpyHostToDevice; cudaError_t err = cudaMemcpy3D(&parms); if (err != cudaSuccess) { cerr << "cudaMemcpy3D(H2D buffer->region) failed: " << cudaGetErrorString(err) << endl; return 1; } return 0; } __global__ void test_const_address(double * testd){ int _t = blockIdx.x*blockDim.x+threadIdx.x; if(_t == 0) testd[0] = F1o3; } __global__ void enforce_ga(double * trA){ int _t = blockIdx.x*blockDim.x+threadIdx.x; //int ps; //TOTRY: i,j,k; double value; while(_t < _3D_SIZE[0]) { M_ gxx[_t] = M_ dxx[_t] + 1; M_ gyy[_t] = M_ dyy[_t] + 1; M_ gzz[_t] = M_ dzz[_t] + 1; // for M_ g; M_ gupzz[_t] = M_ gxx[_t] * M_ gyy[_t] * M_ gzz[_t] + M_ gxy[_t] * M_ gyz[_t] * M_ gxz[_t] + M_ gxz[_t] * M_ gxy[_t] * M_ gyz[_t] - M_ gxz[_t] * M_ gyy[_t] * M_ gxz[_t] - M_ gxy[_t] * M_ gxy[_t] * M_ gzz[_t] - M_ gxx[_t] * M_ gyz[_t] * M_ gyz[_t]; M_ gupzz[_t] = 1.0 / pow( M_ gupzz[_t] , F1o3 ) ; M_ gxx[_t] = M_ gxx[_t] * M_ gupzz[_t]; M_ gxy[_t] = M_ gxy[_t] * M_ gupzz[_t]; M_ gxz[_t] = M_ gxz[_t] * M_ gupzz[_t]; M_ gyy[_t] = M_ gyy[_t] * M_ gupzz[_t]; M_ gyz[_t] = M_ gyz[_t] * M_ gupzz[_t]; M_ gzz[_t] = M_ gzz[_t] * M_ gupzz[_t]; M_ dxx[_t] = M_ gxx[_t] - 1; M_ dyy[_t] = M_ gyy[_t] - 1; M_ dzz[_t] = M_ gzz[_t] - 1; // for A ; M_ gupxx[_t] = ( M_ gyy[_t] * M_ gzz[_t] - M_ gyz[_t] * M_ gyz[_t] ); M_ gupxy[_t] = - ( M_ gxy[_t] * M_ gzz[_t] - M_ gyz[_t] * M_ gxz[_t] ); M_ gupxz[_t] = ( M_ gxy[_t] * M_ gyz[_t] - M_ gyy[_t] * M_ gxz[_t] ); M_ gupyy[_t] = ( M_ gxx[_t] * M_ gzz[_t] - M_ gxz[_t] * M_ gxz[_t] ); M_ gupyz[_t] = - ( M_ gxx[_t] * M_ gyz[_t] - M_ gxy[_t] * M_ gxz[_t] ); M_ gupzz[_t] = ( M_ gxx[_t] * M_ gyy[_t] - M_ gxy[_t] * M_ gxy[_t] ); trA[_t] = M_ gupxx[_t] *M_ Axx[_t] + M_ gupyy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Azz[_t] + 2 * (M_ gupxy[_t] *M_ Axy[_t] + M_ gupxz[_t] *M_ Axz[_t] + M_ gupyz[_t] * M_ Ayz[_t]); M_ Axx[_t] = M_ Axx[_t] - F1o3 * M_ gxx[_t] * trA[_t]; M_ Axy[_t] = M_ Axy[_t] - F1o3 * M_ gxy[_t] * trA[_t]; M_ Axz[_t] = M_ Axz[_t] - F1o3 * M_ gxz[_t] * trA[_t]; M_ Ayy[_t] = M_ Ayy[_t] - F1o3 * M_ gyy[_t] * trA[_t]; M_ Ayz[_t] = M_ Ayz[_t] - F1o3 * M_ gyz[_t] * trA[_t]; M_ Azz[_t] = M_ Azz[_t] - F1o3 * M_ gzz[_t] * trA[_t]; //------------------- _t += STEP_SIZE; } } inline void sub_enforce_ga(double *trA, int matrix_size){ enforce_ga<<>>(trA); cudaMemset(trA,0,matrix_size * sizeof(double)); //cudaMemset(Mh_ gupxx,0,matrix_size * sizeof(double)); //trA gxx,gyy,gzz gupxx,gupxy,gupxz,gupyy,gupyz,gupzz } __device__ volatile unsigned int global_count = 0; __global__ void test_init_matrix(){ int tid = blockIdx.x*blockDim.x+threadIdx.x; int curr = tid; while(curr < _3D_SIZE[2]) { metac.fh[curr] = 0; curr += STEP_SIZE; } curr = tid; while(curr < _3D_SIZE[0]) { metac.betaxx[curr] = 0; metac.betaxy[curr] = 0; metac.betaxz[curr] = 0; curr += STEP_SIZE; } } __global__ void init_matrix(double * mat){ int tid = blockIdx.x*blockDim.x+threadIdx.x; int curr = tid; while(curr < _3D_SIZE[0]) { mat[curr] = 0; curr += STEP_SIZE; } } __global__ void init_3_matrixs(double * mat1,double* mat2,double *mat3){ int tid = blockIdx.x*blockDim.x+threadIdx.x; int curr = tid; while(curr < _3D_SIZE[0]) { mat1[curr] = 0; mat2[curr] = 0; mat3[curr] = 0; curr += STEP_SIZE; } } __global__ void init_matrix_fh(double * mat){ int tid = blockIdx.x*blockDim.x+threadIdx.x; int curr = tid; while(curr < _3D_SIZE[2]) { mat[curr] = 0; curr += STEP_SIZE; } } __global__ void sub_symmetry_bd_partF(int ord, double * func, double *funcc) { int curr = blockIdx.x*blockDim.x+threadIdx.x; int ps; //TOTRY: i,j,k; double value; while(curr < _3D_SIZE[0]) { int k = curr / _2D_SIZE[0]; ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); //= ps % ex_c[0]; funcc[i+ ord + (ord +j)* _1D_SIZE[ord] + (k + ord) * _2D_SIZE[ord]] = func[curr]; curr += STEP_SIZE; } } #ifdef Vertex __global__ void sub_symmetry_bd_partI(int ord, double * func, double * funcc,double S1){ //for i int curr = blockIdx.x*blockDim.x+threadIdx.x; int ps; int m; while(curr < (ex_c[1]+ord)*(ex_c[2]+ord) ){ m = ord * 2; ps = curr * _1D_SIZE[ord]; for(int i = 0;i < ord; ++i){ funcc[ps] = funcc [ps + m] * S1; ps ++; m -= 2; } curr+= STEP_SIZE; } __syncthreads(); } __global__ void sub_symmetry_bd_partJ(int ord,double * func, double * funcc,double S2){ //for j int curr = blockIdx.x*blockDim.x+threadIdx.x; int ps; int m; while(curr < (ex_c[0]+ord)*(ex_c[2]+ord)) { m = 2 * ord; ps = (curr/_1D_SIZE[ord])*_2D_SIZE[ord] + (curr % _1D_SIZE[ord]); for(int i = 0;i>>(ord,func,funcc); sub_symmetry_bd_partI<<>>(ord,func,funcc,SoA[0]); sub_symmetry_bd_partJ<<>>(ord,func,funcc,SoA[1]); sub_symmetry_bd_partK<<>>(ord,func,funcc,SoA[2]); } __global__ void sub_fdderivs_part1(double * f,double *fh,double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz) { int curr = blockIdx.x*blockDim.x+threadIdx.x; int ps; //TOTRY: i,j,k; double value; while(curr < _3D_SIZE[0]) { int k = curr / _2D_SIZE[0]; ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); if(k == ex_c[2]-1 || i == ex_c[0]-1 || j == ex_c[1]-1){ curr += STEP_SIZE; continue; } else { //xx if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]){ fxx[curr] = Fdxdx*(-_FH2_(i,(j+2),(k+2))+16*_FH2_((i+1),(j+2),(k+2))-30*_FH2_((i+2),(j+2),(k+2)) -_FH2_((i+4),(j+2),(k+2))+16*_FH2_((i+3),(j+2),(k+2)) ); } else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]){ fxx[curr] = Sdxdx*(_FH2_((i+1),(j+2),(k+2))-2*_FH2_((i+2),(j+2),(k+2)) +_FH2_(i+3,(j+2),(k+2)) ); } //zz-- if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]){ fzz[curr] = Fdzdz * (-_FH2_((i+2),(j+2),k) + 16 *_FH2_((i+2),(j+2),(k+1))- 30*_FH2_((i+2),(j+2),(k+2)) -_FH2_((i+2),(j+2),(k+4))+ 16*_FH2_((i+2),(j+2),(k+3)) ); } else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]){ fzz[curr] = Sdzdz*(_FH2_((i+2),(j+2),(k+1))- 2 * _FH2_((i+2),(j+2),(k+2)) + _FH2_((i+2),(j+2),(k+3)) ); } //yy-- if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]){ fyy[curr] = Fdydy*(-_FH2_((i+2),j,(k+2))+16*_FH2_((i+2),(j+1),(k+2))-30*_FH2_((i+2),(j+2),(k+2)) -_FH2_((i+2),(j+4),(k+2))+16*_FH2_((i+2),(j+3),(k+2)) ); } else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]){ fyy[curr] = Sdydy*(_FH2_((i+2),(j+1),(k+2))-2*_FH2_((i+2),(j+2),(k+2)) +_FH2_((i+2),(j+3),(k+2)) ); } //xy if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) fxy[curr] = Fdxdy*((_FH2_(i,j,(k+2))-8*_FH2_((i+1),j,(k+2))+8*_FH2_((i+3),j,(k+2))-_FH2_((i+4),j,(k+2))) -8 *(_FH2_(i,(j+1),(k+2))-8*_FH2_((i+1),(j+1),(k+2))+8*_FH2_((i+3),(j+1),(k+2))-_FH2_((i+4),(j+1),(k+2))) +8 *(_FH2_(i,(j+3),(k+2))-8*_FH2_((i+1),(j+3),(k+2))+8*_FH2_((i+3),(j+3),(k+2))-_FH2_((i+4),(j+3),(k+2))) - (_FH2_(i,(j+4),(k+2))-8*_FH2_((i+1),(j+4),(k+2))+8*_FH2_((i+3),(j+4),(k+2))-_FH2_((i+4),(j+4),(k+2)))); else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) fxy[curr] = Sdxdy*(_FH2_((i+1),(j+1),(k+2))-_FH2_((i+3),(j+1),(k+2))-_FH2_((i+1),(j+3),(k+2))+_FH2_((i+3),(j+3),(k+2))); //xz if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) fxz[curr] = Fdxdz*( (_FH2_(i,(j+2),k)-8*_FH2_((i+1),(j+2),k)+8*_FH2_((i+3),(j+2),k)-_FH2_((i+4),(j+2),k)) -8 *(_FH2_(i,(j+2),(k+1))-8*_FH2_((i+1),(j+2),(k+1))+8*_FH2_((i+3),(j+2),(k+1))-_FH2_((i+4),(j+2),(k+1))) +8 *(_FH2_(i,(j+2),(k+3))-8*_FH2_((i+1),(j+2),(k+3))+8*_FH2_((i+3),(j+2),(k+3))-_FH2_((i+4),(j+2),(k+3))) - (_FH2_(i,(j+2),(k+4))-8*_FH2_((i+1),(j+2),(k+4))+8*_FH2_((i+3),(j+2),(k+4))-_FH2_((i+4),(j+2),(k+4)))); else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) fxz[curr] = Sdxdz*(_FH2_((i+1),(j+2),(k+1))-_FH2_((i+3),(j+2),(k+1))-_FH2_((i+1),(j+2),(k+3))+_FH2_((i+3),(j+2),(k+3))); //yz if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1] && k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) fyz[curr] = Fdydz*( (_FH2_((i+2),j,k)-8*_FH2_((i+2),(j+1),k)+8*_FH2_((i+2),(j+3),k)-_FH2_((i+2),(j+4),k)) -8 *(_FH2_((i+2),j,(k+1))-8*_FH2_((i+2),(j+1),(k+1))+8*_FH2_((i+2),(j+3),(k+1))-_FH2_((i+2),(j+4),(k+1))) +8 *(_FH2_((i+2),j,(k+3))-8*_FH2_((i+2),(j+1),(k+3))+8*_FH2_((i+2),(j+3),(k+3))-_FH2_((i+2),(j+4),(k+3))) - (_FH2_((i+2),j,(k+4))-8*_FH2_((i+2),(j+1),(k+4))+8*_FH2_((i+2),(j+3),(k+4))-_FH2_((i+2),(j+4),(k+4)))); else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1] && k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) fyz[curr] = Sdydz*(_FH2_((i+2),(j+1),(k+1))-_FH2_((i+2),(j+3),(k+1))-_FH2_((i+2),(j+1),(k+3))+_FH2_((i+2),(j+3),(k+3))); curr += STEP_SIZE; } } __syncthreads(); } inline void sub_fdderivs(double * f,double *fh,double *fxx,double *fxy,double *fxz,double *fyy,double *fyz,double *fzz,double* SoA) { sub_symmetry_bd(2,f,fh,SoA); cudaMemset(fxx,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fxy,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fxz,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fyy,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fyz,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fzz,0,_3D_SIZE[0] * sizeof(double)); sub_fdderivs_part1<<>>(f,fh,fxx,fxy,fxz,fyy,fyz,fzz); } __global__ void sub_fderivs_part1(double * f,double * fh,double *fx,double *fy,double *fz ) { int curr = blockIdx.x*blockDim.x+threadIdx.x; int ps; //TOTRY: i,j,k; double value; while(curr < _3D_SIZE[0]) { int k = curr / _2D_SIZE[0]; ps = curr - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); if(k == ex_c[2]-1 || i == ex_c[0]-1 || j == ex_c[1]-1){ curr += STEP_SIZE; continue; } //X-- if(i+2 <= ijk_max[0] && i-2 >= ijk_min[0]) fx[curr] = d12dxyz[0]*(fh[i+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - 8*fh[i+1+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + 8*fh[i+3+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - fh[i+4+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] ); else if(i+1 <= ijk_max[0] && i-1 >= ijk_min[0]) fx[curr] = d2dxyz[0]*(-fh[i+1+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + fh[i+3+(j+2)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] ); //Y-- if(j+2 <= ijk_max[1] && j-2 >= ijk_min[1]) fy[curr]=d12dxyz[1]*(fh[i+2+j*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]- 8*fh[i+2+(j+1)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + 8*fh[i+2+(j+3)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] - fh[i+2+(j+4)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]); else if(j+1 <= ijk_max[1] && j-1 >= ijk_min[1]) fy[curr]=d2dxyz[1]*(-fh[i+2+(j+1)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]] + fh[i+2+(j+3)*_1D_SIZE[2]+(k+2)*_2D_SIZE[2]]); //Z-- if(k+2 <= ijk_max[2] && k-2 >= ijk_min[2]) fz[curr]=d12dxyz[2]*( fh[i+2+(j+2)*_1D_SIZE[2]+k *_2D_SIZE[2]] - 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]] + 8* fh[i+2+(j+2)*_1D_SIZE[2]+(k+3)*_2D_SIZE[2]] - fh[i+2+(j+2)*_1D_SIZE[2]+(k+4)*_2D_SIZE[2]]); else if(k+1 <= ijk_max[2] && k-1 >= ijk_min[2]) fz[curr]=d2dxyz[2]*(-fh[i+2+(j+2)*_1D_SIZE[2]+(k+1)*_2D_SIZE[2]]+ fh[i+2+(j+2)*_1D_SIZE[2]+(k+3)*_2D_SIZE[2]]); curr += STEP_SIZE; } } inline void sub_fderivs(double * f,double * fh,double *fx,double *fy,double *fz,double * SoA) { sub_symmetry_bd(2,f,fh,SoA); cudaMemset(fx,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fy,0,_3D_SIZE[0] * sizeof(double)); cudaMemset(fz,0,_3D_SIZE[0] * sizeof(double)); sub_fderivs_part1<<>>(f,fh,fx,fy,fz); } __global__ void computeRicci_part1(double * dst) { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { dst[_t] = M_ gupxx [_t]* M_ fxx [_t]+ M_ gupyy[_t]* M_ fyy[_t]+ M_ gupzz[_t]* M_ fzz[_t]+ ( M_ gupxy[_t]* M_ fxy[_t]+ M_ gupxz[_t]* M_ fxz[_t]+ M_ gupyz[_t]* M_ fyz[_t]) * 2; _t += STEP_SIZE; } } inline void computeRicci(double * src,double* dst,double * SoA, Meta* meta) { sub_fdderivs(src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,SoA); computeRicci_part1<<>>(dst); }/*Exception*/ __global__ void sub_kodis_part1(double *f,double *fh,double *f_rhs) { int _t = blockIdx.x*blockDim.x+threadIdx.x; int ps; //TOTRY: i,j,k; double value; double inc_f_rhs; while(_t < _3D_SIZE[0]) { int k = _t / _2D_SIZE[0]; ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); if(k == ex_c[2]-1 && i == ex_c[0]-1 && j == ex_c[1]-1){ _t += STEP_SIZE; continue; } if(i-3 >= ijk_min3[0] && i+3 <= ijk_max[0] && j-3 >= ijk_min3[1] && j+3 <= ijk_max[1] && k-3 >= ijk_min3[2] && k+3 <= ijk_max[2]) { // x direction inc_f_rhs = ( (_FH3_(i,(j+3),(k+3))+_FH3_((i+6),(j+3),(k+3))) - 6*(_FH3_((i+1),(j+3),(k+3))+_FH3_((i+5),(j+3),(k+3))) + 15*(_FH3_((i+2),(j+3),(k+3))+_FH3_((i+4),(j+3),(k+3))) - 20* _FH3_((i+3),(j+3),(k+3)) ) /dX; // y direction inc_f_rhs += ( (_FH3_((i+3),j,(k+3))+_FH3_((i+3),(j+6),(k+3))) - 6*(_FH3_((i+3),(j+1),(k+3))+_FH3_((i+3),(j+5),(k+3))) + 15*(_FH3_((i+3),(j+2),(k+3))+_FH3_((i+3),(j+4),(k+3))) - 20* _FH3_((i+3),(j+3),(k+3)) )/dY; // z direction inc_f_rhs += ( (_FH3_((i+3),(j+3),k)+_FH3_((i+3),(j+3),(k+6))) - 6*(_FH3_((i+3),(j+3),(k+1))+_FH3_((i+3),(j+3),(k+5))) + 15*(_FH3_((i+3),(j+3),(k+2))+_FH3_((i+3),(j+3),(k+4))) - 20* _FH3_((i+3),(j+3),(k+3)) )/dZ; inc_f_rhs *= eps_c; inc_f_rhs /= 64; f_rhs[_t] += inc_f_rhs; //be careful the mark is "+=" not "==" ! } _t += STEP_SIZE; } } inline void sub_kodis(double *f,double *fh,double *f_rhs,double *SoA) { sub_symmetry_bd(3,f,fh,SoA); sub_kodis_part1<<>>(f,fh,f_rhs); } __global__ void sub_lopsided_part1(double *f,double* fh,double *f_rhs,double *Sfx,double *Sfy,double *Sfz) { int _t = blockIdx.x*blockDim.x+threadIdx.x; int ps; //TOTRY: i,j,k; double value; while(_t < _3D_SIZE[0]) { int k = _t / _2D_SIZE[0]; ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); if(k < ex_c[2]-1 && i < ex_c[0]-1 && j < ex_c[1]-1){ // x direction if(Sfx[_t] >= 0 && i+3 <= ijk_max[0] && i-1 >= ijk_min2[0]) f_rhs[_t]=f_rhs[_t]+ Sfx[_t]*d12dxyz[0]*(-3*_FH3_((i+2),(j+3),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+4),(j+3),(k+3)) -6*_FH3_((i+5),(j+3),(k+3))+ _FH3_((i+6),(j+3),(k+3))); else if(Sfx[_t] <= 0 && i-3 >= ijk_min2[0] && i+1 <= ijk_max[0]) f_rhs[_t]=f_rhs[_t]- Sfx[_t]*d12dxyz[0]*(-3*_FH3_((i+4),(j+3),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+2),(j+3),(k+3)) -6*_FH3_((i+1),(j+3),(k+3))+ _FH3_(i,(j+3),(k+3))); else if(i+2 <= ijk_max[0] && i-2 >= ijk_min2[0]) f_rhs[_t]=f_rhs[_t]+ Sfx[_t]*d12dxyz[0]*(_FH3_((i+1),(j+3),(k+3))-8*_FH3_((i+2),(j+3),(k+3))+8*_FH3_((i+4),(j+3),(k+3))-_FH3_((i+5),(j+3),(k+3))); else if(i+1 <= ijk_max[0] && i-1 >= ijk_min2[0]) f_rhs[_t]=f_rhs[_t] + Sfx[_t]*d2dxyz[0]*(-_FH3_((i+2),(j+3),(k+3))+_FH3_((i+4),(j+3),(k+3))); // y direction if(Sfy[_t] >= 0 && j+3 <= ijk_max[1] && j-1 >= ijk_min2[1]) f_rhs[_t]=f_rhs[_t]+ Sfy[_t]*d12dxyz[1]*(-3*_FH3_((i+3),(j+2),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+4),(k+3)) -6*_FH3_((i+3),(j+5),(k+3))+ _FH3_((i+3),(j+6),(k+3))); else if(Sfy[_t] <= 0 && j-3 >= ijk_min2[1] && j+1 <= ijk_max[1]) f_rhs[_t]=f_rhs[_t]- Sfy[_t]*d12dxyz[1]*(-3*_FH3_((i+3),(j+4),(k+3))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+2),(k+3)) -6*_FH3_((i+3),(j+1),(k+3))+ _FH3_((i+3),j,(k+3))); else if(j+2 <= ijk_max[1] && j-2 >= ijk_min2[1]) f_rhs[_t]=f_rhs[_t]+ Sfy[_t]*d12dxyz[1]*(_FH3_((i+3),(j+1),(k+3))-8*_FH3_((i+3),(j+2),(k+3))+8*_FH3_((i+3),(j+4),(k+3))-_FH3_((i+3),(j+5),(k+3))); else if(j+1 <= ijk_max[1] && j-1 >= ijk_min2[1]) f_rhs[_t]=f_rhs[_t] + Sfy[_t]*d2dxyz[1]*(-_FH3_((i+3),(j+2),(k+3))+_FH3_((i+3),(j+4),(k+3))); // z direction if(Sfz[_t] >= 0 && k+3 <= ijk_max[2] && k-1 >= ijk_min2[2]) // v // D f = ------[ - 3f - 10f + 18f - 6f + f ] // i 12dx i-v i i+v i+2v i+3v f_rhs[_t]=f_rhs[_t]+ Sfz[_t]*d12dxyz[2]*(-3*_FH3_((i+3),(j+3),(k+2))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+3),(k+4)) -6*_FH3_((i+3),(j+3),(k+5))+ _FH3_((i+3),(j+3),(k+6))); else if(Sfz[_t] <= 0 && k-3 >= ijk_min2[2] && k+1 <= ijk_max[2]) f_rhs[_t]=f_rhs[_t]- Sfz[_t]*d12dxyz[2]*(-3*_FH3_((i+3),(j+3),(k+4))-10*_FH3_((i+3),(j+3),(k+3))+18*_FH3_((i+3),(j+3),(k+2)) -6*_FH3_((i+3),(j+3),(k+1))+ _FH3_((i+3),(j+3),k)); else if(k+2 <= ijk_max[2] && k-2 >= ijk_min2[2]) f_rhs[_t]=f_rhs[_t]+ Sfz[_t]*d12dxyz[2]*(_FH3_((i+3),(j+3),(k+1))-8*_FH3_((i+3),(j+3),(k+2))+8*_FH3_((i+3),(j+3),(k+4))-_FH3_((i+3),(j+3),(k+5))); else if(k+1 <= ijk_max[2] && k-1 >= ijk_min2[2]) f_rhs[_t]=f_rhs[_t]+Sfz[_t]*d2dxyz[2]*(-_FH3_((i+3),(j+3),(k+2))+_FH3_((i+3),(j+3),(k+4))); } //------------------- _t += STEP_SIZE; } } inline void sub_lopsided(double *f,double*fh,double *f_rhs,double *Sfx,double *Sfy,double *Sfz,double *SoA){ sub_symmetry_bd(3,f,fh,SoA); sub_lopsided_part1<<>>(f,fh,f_rhs,Sfx,Sfy,Sfz); } __global__ void compute_rhs_bssn_part1() { int tid = blockIdx.x*blockDim.x+threadIdx.x; int curr = tid; while(curr < _3D_SIZE[0]) { metac.alpn1[curr] = metac.Lap[curr] + 1; metac.chin1[curr] = metac.chi[curr] + 1; metac.gxx[curr] = metac.dxx[curr] + 1; metac.gyy[curr] = metac.dyy[curr] + 1; metac.gzz[curr] = metac.dzz[curr] + 1; curr += STEP_SIZE; } } __global__ void compute_rhs_bssn_part2() { //__shared__ int judge = 1; int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ div_beta[_t] = M_ betaxx[_t] + M_ betayy[_t] + M_ betazz[_t]; M_ chi_rhs[_t] = F2o3 *M_ chin1[_t]*( M_ alpn1[_t] * M_ trK[_t] - M_ div_beta[_t] ); //rhs[_t] for M_ chi M_ gxx_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axx[_t] - F2o3 * M_ gxx[_t]* M_ div_beta[_t] + 2 *( M_ gxx[_t]* M_ betaxx[_t]+ M_ gxy[_t]* M_ betayx[_t]+ M_ gxz[_t]* M_ betazx[_t]); M_ gyy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayy[_t] - F2o3 * M_ gyy[_t]* M_ div_beta[_t] + 2 *( M_ gxy[_t]* M_ betaxy[_t]+ M_ gyy[_t]* M_ betayy[_t]+ M_ gyz[_t]* M_ betazy[_t]); M_ gzz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Azz[_t] - F2o3 * M_ gzz[_t]* M_ div_beta[_t] + 2 *( M_ gxz[_t]* M_ betaxz[_t]+ M_ gyz[_t]* M_ betayz[_t]+ M_ gzz[_t]* M_ betazz[_t]); M_ gxy_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axy[_t] + F1o3 * M_ gxy[_t] * M_ div_beta[_t] + M_ gxx[_t]* M_ betaxy[_t] + M_ gxz[_t]* M_ betazy[_t]+ M_ gyy[_t]* M_ betayx[_t]+ M_ gyz[_t]* M_ betazx[_t] - M_ gxy[_t]* M_ betazz[_t]; M_ gyz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Ayz[_t] + F1o3 * M_ gyz[_t] * M_ div_beta[_t] + M_ gxy[_t]* M_ betaxz[_t]+ M_ gyy[_t]* M_ betayz[_t] + M_ gxz[_t]* M_ betaxy[_t] + M_ gzz[_t]* M_ betazy[_t] - M_ gyz[_t]* M_ betaxx[_t]; M_ gxz_rhs[_t] = - 2 * M_ alpn1[_t] * M_ Axz[_t] + F1o3 * M_ gxz[_t] * M_ div_beta[_t] + M_ gxx[_t]* M_ betaxz[_t]+ M_ gxy[_t]* M_ betayz[_t] + M_ gyz[_t]* M_ betayx[_t]+ M_ gzz[_t]* M_ betazx[_t] - M_ gxz[_t]* M_ betayy[_t]; //rhs[_t] for gij // invert tilted metric M_ gupzz[_t]= M_ gxx[_t]* M_ gyy[_t]* M_ gzz[_t]+ M_ gxy[_t]* M_ gyz[_t]* M_ gxz[_t]+ M_ gxz[_t]* M_ gxy[_t]* M_ gyz[_t]- M_ gxz[_t]* M_ gyy[_t]* M_ gxz[_t]- M_ gxy[_t]* M_ gxy[_t]* M_ gzz[_t]- M_ gxx[_t]* M_ gyz[_t]* M_ gyz[_t]; M_ gupxx[_t]= ( M_ gyy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gyz[_t]) / M_ gupzz[_t]; M_ gupxy[_t]= - ( M_ gxy[_t]* M_ gzz[_t]- M_ gyz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; M_ gupxz[_t]= ( M_ gxy[_t]* M_ gyz[_t]- M_ gyy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; M_ gupyy[_t]= ( M_ gxx[_t]* M_ gzz[_t]- M_ gxz[_t]* M_ gxz[_t]) / M_ gupzz[_t]; M_ gupyz[_t]= - ( M_ gxx[_t]* M_ gyz[_t]- M_ gxy[_t]* M_ gxz[_t]) / M_ gupzz[_t]; M_ gupzz[_t]= ( M_ gxx[_t]* M_ gyy[_t]- M_ gxy[_t]* M_ gxy[_t]) / M_ gupzz[_t]; //if(threadIdx.x == 0){ // judge = co_c; //} //__syncthreads(); if(co_c == 0) { // M_ Gam^i_Res = M_ Gam^i + M_ gup^ij_,j M_ Gmx_Res[_t] = M_ Gamx[_t] - (M_ gupxx[_t]*(M_ gupxx[_t]*M_ gxxx[_t]+M_ gupxy[_t]*M_ gxyx[_t]+M_ gupxz[_t]*M_ gxzx[_t]) +M_ gupxy[_t]*(M_ gupxx[_t]*M_ gxyx[_t]+M_ gupxy[_t]*M_ gyyx[_t]+M_ gupxz[_t]*M_ gyzx[_t]) +M_ gupxz[_t]*(M_ gupxx[_t]*M_ gxzx[_t]+M_ gupxy[_t]*M_ gyzx[_t]+M_ gupxz[_t]*M_ gzzx[_t]) +M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) +M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); M_ Gmy_Res[_t] = M_ Gamy[_t] - (M_ gupxx[_t]*(M_ gupxy[_t]*M_ gxxx[_t]+M_ gupyy[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gxzx[_t]) +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxyx[_t]+M_ gupyy[_t]*M_ gyyx[_t]+M_ gupyz[_t]*M_ gyzx[_t]) +M_ gupxz[_t]*(M_ gupxy[_t]*M_ gxzx[_t]+M_ gupyy[_t]*M_ gyzx[_t]+M_ gupyz[_t]*M_ gzzx[_t]) +M_ gupxy[_t]*(M_ gupxy[_t]*M_ gxxy[_t]+M_ gupyy[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gxzy[_t]) +M_ gupyy[_t]*(M_ gupxy[_t]*M_ gxyy[_t]+M_ gupyy[_t]*M_ gyyy[_t]+M_ gupyz[_t]*M_ gyzy[_t]) +M_ gupyz[_t]*(M_ gupxy[_t]*M_ gxzy[_t]+M_ gupyy[_t]*M_ gyzy[_t]+M_ gupyz[_t]*M_ gzzy[_t]) +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); M_ Gmz_Res[_t] = M_ Gamz[_t] - (M_ gupxx[_t]*(M_ gupxz[_t]*M_ gxxx[_t]+M_ gupyz[_t]*M_ gxyx[_t]+M_ gupzz[_t]*M_ gxzx[_t]) +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxyx[_t]+M_ gupyz[_t]*M_ gyyx[_t]+M_ gupzz[_t]*M_ gyzx[_t]) +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxzx[_t]+M_ gupyz[_t]*M_ gyzx[_t]+M_ gupzz[_t]*M_ gzzx[_t]) +M_ gupxy[_t]*(M_ gupxz[_t]*M_ gxxy[_t]+M_ gupyz[_t]*M_ gxyy[_t]+M_ gupzz[_t]*M_ gxzy[_t]) +M_ gupyy[_t]*(M_ gupxz[_t]*M_ gxyy[_t]+M_ gupyz[_t]*M_ gyyy[_t]+M_ gupzz[_t]*M_ gyzy[_t]) +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxzy[_t]+M_ gupyz[_t]*M_ gyzy[_t]+M_ gupzz[_t]*M_ gzzy[_t]) +M_ gupxz[_t]*(M_ gupxz[_t]*M_ gxxz[_t]+M_ gupyz[_t]*M_ gxyz[_t]+M_ gupzz[_t]*M_ gxzz[_t]) +M_ gupyz[_t]*(M_ gupxz[_t]*M_ gxyz[_t]+M_ gupyz[_t]*M_ gyyz[_t]+M_ gupzz[_t]*M_ gyzz[_t]) +M_ gupzz[_t]*(M_ gupxz[_t]*M_ gxzz[_t]+M_ gupyz[_t]*M_ gyzz[_t]+M_ gupzz[_t]*M_ gzzz[_t])); }//if(co == 0) // second kind of connection M_ Gamxxx[_t]=HALF*( M_ gupxx[_t]*M_ gxxx[_t]+ M_ gupxy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupxz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); M_ Gamyxx[_t]=HALF*( M_ gupxy[_t]*M_ gxxx[_t]+ M_ gupyy[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupyz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); M_ Gamzxx[_t]=HALF*( M_ gupxz[_t]*M_ gxxx[_t]+ M_ gupyz[_t]*(2*M_ gxyx[_t]- M_ gxxy[_t]) + M_ gupzz[_t]*(2*M_ gxzx[_t]- M_ gxxz[_t])); M_ Gamxyy[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupxy[_t]*M_ gyyy[_t]+ M_ gupxz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); M_ Gamyyy[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupyz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); M_ Gamzyy[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxyy[_t]- M_ gyyx[_t]) + M_ gupyz[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*(2*M_ gyzy[_t]- M_ gyyz[_t])); M_ Gamxzz[_t]=HALF*( M_ gupxx[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupxy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupxz[_t]*M_ gzzz[_t]); M_ Gamyzz[_t]=HALF*( M_ gupxy[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyy[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupyz[_t]*M_ gzzz[_t]); M_ Gamzzz[_t]=HALF*( M_ gupxz[_t]*(2*M_ gxzz[_t]- M_ gzzx[_t]) + M_ gupyz[_t]*(2*M_ gyzz[_t]- M_ gzzy[_t]) + M_ gupzz[_t]*M_ gzzz[_t]); M_ Gamxxy[_t]=HALF*( M_ gupxx[_t]*M_ gxxy[_t]+ M_ gupxy[_t]*M_ gyyx[_t]+ M_ gupxz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); M_ Gamyxy[_t]=HALF*( M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupyy[_t]*M_ gyyx[_t]+ M_ gupyz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); M_ Gamzxy[_t]=HALF*( M_ gupxz[_t]*M_ gxxy[_t]+ M_ gupyz[_t]*M_ gyyx[_t]+ M_ gupzz[_t]*( M_ gxzy[_t]+ M_ gyzx[_t]- M_ gxyz[_t]) ); M_ Gamxxz[_t]=HALF*( M_ gupxx[_t]*M_ gxxz[_t]+ M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupxz[_t]*M_ gzzx[_t]); M_ Gamyxz[_t]=HALF*( M_ gupxy[_t]*M_ gxxz[_t]+ M_ gupyy[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupyz[_t]*M_ gzzx[_t]); M_ Gamzxz[_t]=HALF*( M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*( M_ gxyz[_t]+ M_ gyzx[_t]- M_ gxzy[_t]) + M_ gupzz[_t]*M_ gzzx[_t]); M_ Gamxyz[_t]=HALF*( M_ gupxx[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupxy[_t]*M_ gyyz[_t]+ M_ gupxz[_t]*M_ gzzy[_t]); M_ Gamyyz[_t]=HALF*( M_ gupxy[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyy[_t]*M_ gyyz[_t]+ M_ gupyz[_t]*M_ gzzy[_t]); M_ Gamzyz[_t]=HALF*( M_ gupxz[_t]*( M_ gxyz[_t]+ M_ gxzy[_t]- M_ gyzx[_t]) + M_ gupyz[_t]*M_ gyyz[_t]+ M_ gupzz[_t]*M_ gzzy[_t]); // Raise indices of \tilde A_{ij} and store in R_ij M_ Rxx[_t]= M_ gupxx[_t]* M_ gupxx[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupxy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupxz[_t]* M_ Azz[_t]+ 2*(M_ gupxx[_t]* M_ gupxy[_t]* M_ Axy[_t]+ M_ gupxx[_t]* M_ gupxz[_t]* M_ Axz[_t]+ M_ gupxy[_t]* M_ gupxz[_t]* M_ Ayz[_t]); M_ Ryy[_t]= M_ gupxy[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ 2*(M_ gupxy[_t]* M_ gupyy[_t]* M_ Axy[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayz[_t]); M_ Rzz[_t]= M_ gupxz[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyz[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ 2*(M_ gupxz[_t]* M_ gupyz[_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Ayz[_t]); M_ Rxy[_t]= M_ gupxx[_t]* M_ gupxy[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupyz[_t]* M_ Azz[_t]+ (M_ gupxx[_t]* M_ gupyy[_t] + M_ gupxy[_t]* M_ gupxy[_t])* M_ Axy[_t] + (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupxy[_t])* M_ Axz[_t] + (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupxz[_t]* M_ gupyy[_t])* M_ Ayz[_t]; M_ Rxz[_t]= M_ gupxx[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupxy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ (M_ gupxx[_t]* M_ gupyz[_t] + M_ gupxy[_t]* M_ gupxz[_t])* M_ Axy[_t] + (M_ gupxx[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupxz[_t])* M_ Axz[_t] + (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupxz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; M_ Ryz[_t]= M_ gupxy[_t]* M_ gupxz[_t]* M_ Axx[_t]+ M_ gupyy[_t]* M_ gupyz[_t]* M_ Ayy[_t]+ M_ gupyz[_t]* M_ gupzz[_t]* M_ Azz[_t]+ (M_ gupxy[_t]* M_ gupyz[_t] + M_ gupyy[_t]* M_ gupxz[_t])* M_ Axy[_t] + (M_ gupxy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupxz[_t])* M_ Axz[_t] + (M_ gupyy[_t]* M_ gupzz[_t] + M_ gupyz[_t]* M_ gupyz[_t])* M_ Ayz[_t]; // Right hand side for M_ Gam^i without shift terms... M_ Gamx_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxx[_t]+ M_ Lapy[_t] * M_ Rxy[_t]+ M_ Lapz[_t] * M_ Rxz[_t]) + 2 * M_ alpn1[_t] * ( -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxx[_t]+ M_ chiy[_t] * M_ Rxy[_t]+ M_ chiz[_t] * M_ Rxz[_t]) - M_ gupxx[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - M_ gupxy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - M_ gupxz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + M_ Gamxxx[_t]* M_ Rxx[_t]+ M_ Gamxyy[_t]* M_ Ryy[_t]+ M_ Gamxzz[_t]* M_ Rzz[_t] + 2 * ( M_ Gamxxy[_t]* M_ Rxy[_t]+ M_ Gamxxz[_t]* M_ Rxz[_t]+ M_ Gamxyz[_t]* M_ Ryz[_t]) ); M_ Gamy_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxy[_t]+ M_ Lapy[_t] * M_ Ryy[_t]+ M_ Lapz[_t] * M_ Ryz[_t]) + 2 * M_ alpn1[_t] * ( -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxy[_t]+ M_ chiy[_t] * M_ Ryy[_t]+ M_ chiz[_t] * M_ Ryz[_t]) - M_ gupxy[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - M_ gupyy[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - M_ gupyz[_t]* ( F2o3 * M_ Kz [_t] + 8 * PI * M_ Sz[_t] ) + M_ Gamyxx[_t]* M_ Rxx[_t]+ M_ Gamyyy[_t]* M_ Ryy[_t]+ M_ Gamyzz[_t]* M_ Rzz[_t] + 2 * ( M_ Gamyxy[_t]* M_ Rxy[_t]+ M_ Gamyxz[_t]* M_ Rxz[_t]+ M_ Gamyyz[_t]* M_ Ryz[_t]) ); M_ Gamz_rhs[_t] = - 2 * ( M_ Lapx[_t] * M_ Rxz[_t]+ M_ Lapy[_t] * M_ Ryz[_t]+ M_ Lapz[_t] * M_ Rzz[_t]) + 2 * M_ alpn1[_t] * ( -F3o2/M_ chin1[_t] * ( M_ chix[_t] * M_ Rxz[_t]+ M_ chiy[_t] * M_ Ryz[_t]+ M_ chiz[_t] * M_ Rzz[_t]) - M_ gupxz[_t]* ( F2o3 * M_ Kx[_t] + 8 * PI * M_ Sx[_t] ) - M_ gupyz[_t]* ( F2o3 * M_ Ky[_t] + 8 * PI * M_ Sy[_t] ) - M_ gupzz[_t]* ( F2o3 * M_ Kz[_t] + 8 * PI * M_ Sz[_t] ) + M_ Gamzxx[_t]* M_ Rxx[_t]+ M_ Gamzyy[_t]* M_ Ryy[_t]+ M_ Gamzzz[_t]* M_ Rzz[_t] + 2 * ( M_ Gamzxy[_t]* M_ Rxy[_t]+ M_ Gamzxz[_t]* M_ Rxz[_t]+ M_ Gamzyz[_t]* M_ Ryz[_t]) ); _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part3() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ fxx [_t]= M_ gxxx[_t]+ M_ gxyy[_t]+ M_ gxzz[_t]; M_ fxy[_t]= M_ gxyx[_t]+ M_ gyyy[_t]+ M_ gyzz[_t]; M_ fxz[_t]= M_ gxzx[_t]+ M_ gyzy[_t]+ M_ gzzz[_t]; M_ Gamxa[_t]= M_ gupxx [_t]* M_ Gamxxx [_t]+ M_ gupyy[_t]* M_ Gamxyy[_t]+ M_ gupzz[_t]* M_ Gamxzz[_t]+ 2*( M_ gupxy[_t]* M_ Gamxxy[_t]+ M_ gupxz[_t]* M_ Gamxxz[_t]+ M_ gupyz[_t]* M_ Gamxyz[_t]); M_ Gamya[_t]= M_ gupxx [_t]* M_ Gamyxx [_t]+ M_ gupyy[_t]* M_ Gamyyy[_t]+ M_ gupzz[_t]* M_ Gamyzz[_t]+ 2*( M_ gupxy[_t]* M_ Gamyxy[_t]+ M_ gupxz[_t]* M_ Gamyxz[_t]+ M_ gupyz[_t]* M_ Gamyyz[_t]); M_ Gamza[_t]= M_ gupxx [_t]* M_ Gamzxx [_t]+ M_ gupyy[_t]* M_ Gamzyy[_t]+ M_ gupzz[_t]* M_ Gamzzz[_t]+ 2*( M_ gupxy[_t]* M_ Gamzxy[_t]+ M_ gupxz[_t]* M_ Gamzxz[_t]+ M_ gupyz[_t]* M_ Gamzyz[_t]); M_ Gamx_rhs[_t] = M_ Gamx_rhs[_t] + F2o3 * M_ Gamxa[_t]* M_ div_beta[_t] - M_ Gamxa[_t]* M_ betaxx [_t]- M_ Gamya[_t]* M_ betaxy[_t]- M_ Gamza[_t]* M_ betaxz[_t] + F1o3 * (M_ gupxx [_t]* M_ fxx [_t] + M_ gupxy[_t]* M_ fxy[_t] + M_ gupxz[_t]* M_ fxz[_t] ) + M_ gupxx [_t]* M_ gxxx [_t] + M_ gupyy[_t]* M_ gyyx [_t] + M_ gupzz[_t]* M_ gzzx [_t] + 2 * (M_ gupxy[_t]* M_ gxyx [_t] + M_ gupxz[_t]* M_ gxzx [_t] + M_ gupyz[_t]* M_ gyzx [_t] ); M_ Gamy_rhs[_t] = M_ Gamy_rhs[_t] + F2o3 * M_ Gamya[_t]* M_ div_beta[_t] - M_ Gamxa[_t]* M_ betayx [_t]- M_ Gamya[_t]* M_ betayy[_t]- M_ Gamza[_t]* M_ betayz[_t] + F1o3 * (M_ gupxy[_t]* M_ fxx [_t] + M_ gupyy[_t]* M_ fxy[_t] + M_ gupyz[_t]* M_ fxz[_t] ) + M_ gupxx [_t]* M_ gxxy[_t] + M_ gupyy[_t]* M_ gyyy[_t] + M_ gupzz[_t]* M_ gzzy[_t] + 2 * (M_ gupxy[_t]* M_ gxyy[_t] + M_ gupxz[_t]* M_ gxzy[_t] + M_ gupyz[_t]* M_ gyzy[_t] ); M_ Gamz_rhs[_t] = M_ Gamz_rhs[_t] + F2o3 * M_ Gamza[_t]* M_ div_beta[_t] - M_ Gamxa[_t]* M_ betazx [_t]- M_ Gamya[_t]* M_ betazy[_t]- M_ Gamza[_t]* M_ betazz[_t] + F1o3 * (M_ gupxz[_t]* M_ fxx [_t] + M_ gupyz[_t]* M_ fxy[_t] + M_ gupzz[_t]* M_ fxz[_t] ) + M_ gupxx [_t]* M_ gxxz[_t] + M_ gupyy[_t]* M_ gyyz[_t] + M_ gupzz[_t]* M_ gzzz[_t] + 2 * (M_ gupxy[_t]* M_ gxyz[_t] + M_ gupxz[_t]* M_ gxzz[_t] + M_ gupyz[_t]* M_ gyzz[_t] ) ; //rhs M_ for M_ Gam^i //first kind of connection stored in M_ gij,k M_ gxxx [_t]= M_ gxx [_t]* M_ Gamxxx [_t]+ M_ gxy[_t]* M_ Gamyxx [_t]+ M_ gxz[_t]* M_ Gamzxx[_t]; M_ gxyx [_t]= M_ gxx [_t]* M_ Gamxxy[_t]+ M_ gxy[_t]* M_ Gamyxy[_t]+ M_ gxz[_t]* M_ Gamzxy[_t]; M_ gxzx [_t]= M_ gxx [_t]* M_ Gamxxz[_t]+ M_ gxy[_t]* M_ Gamyxz[_t]+ M_ gxz[_t]* M_ Gamzxz[_t]; M_ gyyx [_t]= M_ gxx [_t]* M_ Gamxyy[_t]+ M_ gxy[_t]* M_ Gamyyy[_t]+ M_ gxz[_t]* M_ Gamzyy[_t]; M_ gyzx [_t]= M_ gxx [_t]* M_ Gamxyz[_t]+ M_ gxy[_t]* M_ Gamyyz[_t]+ M_ gxz[_t]* M_ Gamzyz[_t]; M_ gzzx [_t]= M_ gxx [_t]* M_ Gamxzz[_t]+ M_ gxy[_t]* M_ Gamyzz[_t]+ M_ gxz[_t]* M_ Gamzzz[_t]; M_ gxxy[_t]= M_ gxy[_t]* M_ Gamxxx [_t]+ M_ gyy[_t]* M_ Gamyxx [_t]+ M_ gyz[_t]* M_ Gamzxx[_t]; M_ gxyy[_t]= M_ gxy[_t]* M_ Gamxxy[_t]+ M_ gyy[_t]* M_ Gamyxy[_t]+ M_ gyz[_t]* M_ Gamzxy[_t]; M_ gxzy[_t]= M_ gxy[_t]* M_ Gamxxz[_t]+ M_ gyy[_t]* M_ Gamyxz[_t]+ M_ gyz[_t]* M_ Gamzxz[_t]; M_ gyyy[_t]= M_ gxy[_t]* M_ Gamxyy[_t]+ M_ gyy[_t]* M_ Gamyyy[_t]+ M_ gyz[_t]* M_ Gamzyy[_t]; M_ gyzy[_t]= M_ gxy[_t]* M_ Gamxyz[_t]+ M_ gyy[_t]* M_ Gamyyz[_t]+ M_ gyz[_t]* M_ Gamzyz[_t]; M_ gzzy[_t]= M_ gxy[_t]* M_ Gamxzz[_t]+ M_ gyy[_t]* M_ Gamyzz[_t]+ M_ gyz[_t]* M_ Gamzzz[_t]; M_ gxxz[_t]= M_ gxz[_t]* M_ Gamxxx [_t]+ M_ gyz[_t]* M_ Gamyxx [_t]+ M_ gzz[_t]* M_ Gamzxx[_t]; M_ gxyz[_t]= M_ gxz[_t]* M_ Gamxxy[_t]+ M_ gyz[_t]* M_ Gamyxy[_t]+ M_ gzz[_t]* M_ Gamzxy[_t]; M_ gxzz[_t]= M_ gxz[_t]* M_ Gamxxz[_t]+ M_ gyz[_t]* M_ Gamyxz[_t]+ M_ gzz[_t]* M_ Gamzxz[_t]; M_ gyyz[_t]= M_ gxz[_t]* M_ Gamxyy[_t]+ M_ gyz[_t]* M_ Gamyyy[_t]+ M_ gzz[_t]* M_ Gamzyy[_t]; M_ gyzz[_t]= M_ gxz[_t]* M_ Gamxyz[_t]+ M_ gyz[_t]* M_ Gamyyz[_t]+ M_ gzz[_t]* M_ Gamzyz[_t]; M_ gzzz[_t]= M_ gxz[_t]* M_ Gamxzz[_t]+ M_ gyz[_t]* M_ Gamyzz[_t]+ M_ gzz[_t]* M_ Gamzzz[_t]; _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part4() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ Rxx [_t]= - HALF *M_ Rxx [_t] + M_ gxx [_t]* M_ Gamxx[_t] +M_ gxy[_t]* M_ Gamyx [_t] + M_ gxz[_t]* M_ Gamzx [_t]+ M_ Gamxa[_t]*M_ gxxx [_t]+ M_ Gamya[_t]*M_ gxyx [_t]+ M_ Gamza[_t]*M_ gxzx [_t] + M_ gupxx [_t]*( 2*(M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxyx [_t]+ M_ Gamzxx [_t]*M_ gxzx[_t]) + M_ Gamxxx [_t]*M_ gxxx [_t]+ M_ Gamyxx [_t]*M_ gxxy[_t]+ M_ Gamzxx [_t]*M_ gxxz[_t])+ M_ gupxy[_t]*( 2*(M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gyyx [_t]+ M_ Gamzxx [_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx[_t]) + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxxy[_t]+ M_ Gamzxy[_t]*M_ gxxz[_t] + M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ M_ gupxz[_t]*( 2*(M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gyzx [_t]+ M_ Gamzxx [_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx[_t]) + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxxy[_t]+ M_ Gamzxz[_t]*M_ gxxz[_t] + M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ M_ gupyy[_t]*( 2*(M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx[_t]) + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ M_ gupyz[_t]*( 2*(M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx[_t]) + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ M_ gupzz[_t]*( 2*(M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx[_t]) + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]); M_ Ryy[_t]= - HALF *M_ Ryy[_t] + M_ gxy[_t]* M_ Gamxy[_t]+ M_ gyy[_t]* M_ Gamyy[_t] + M_ gyz[_t]* M_ Gamzy[_t] + M_ Gamxa[_t]*M_ gxyy[_t]+ M_ Gamya[_t]*M_ gyyy[_t]+ M_ Gamza[_t]*M_ gyzy[_t] + M_ gupxx [_t]*( 2*(M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t]) + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t])+ M_ gupxy[_t]*( 2*(M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + M_ Gamxyy[_t]*M_ gxxy[_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxzy[_t]) + M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gxyy[_t]+ M_ Gamzyy[_t]*M_ gxyz[_t] + M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ M_ gupxz[_t]*( 2*(M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t]) + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxyz[_t] + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ M_ gupyy[_t]*( 2*(M_ Gamxyy[_t]*M_ gxyy[_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyzy[_t]) + M_ Gamxyy[_t]*M_ gyyx [_t]+ M_ Gamyyy[_t]*M_ gyyy[_t]+ M_ Gamzyy[_t]*M_ gyyz[_t])+ M_ gupyz[_t]*( 2*(M_ Gamxyy[_t]*M_ gxzy[_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t]) + M_ Gamxyz[_t]*M_ gyyx [_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyyz[_t] + M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ M_ gupzz[_t]*( 2*(M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t]) + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]); M_ Rzz[_t]= - HALF *M_ Rzz[_t] + M_ gxz[_t]* M_ Gamxz[_t] +M_ gyz[_t]* M_ Gamyz[_t] + M_ gzz[_t]* M_ Gamzz[_t] + M_ Gamxa[_t]*M_ gxzz[_t]+ M_ Gamya[_t]*M_ gyzz[_t]+ M_ Gamza[_t]*M_ gzzz[_t] + M_ gupxx [_t]*( 2*(M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t]) + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t])+ M_ gupxy[_t]*( 2*(M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t]) + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t])+ M_ gupxz[_t]*( 2*(M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + M_ Gamxzz[_t]*M_ gxxz[_t]+ M_ Gamyzz[_t]*M_ gxyz[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t]) + M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gxzy[_t]+ M_ Gamzzz[_t]*M_ gxzz[_t] + M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t])+ M_ gupyy[_t]*( 2*(M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t]) + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t])+ M_ gupyz[_t]*( 2*(M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + M_ Gamxzz[_t]*M_ gxyz[_t]+ M_ Gamyzz[_t]*M_ gyyz[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t]) + M_ Gamxzz[_t]*M_ gyzx [_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gyzz[_t] + M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t])+ M_ gupzz[_t]*( 2*(M_ Gamxzz[_t]*M_ gxzz[_t]+ M_ Gamyzz[_t]*M_ gyzz[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]) + M_ Gamxzz[_t]*M_ gzzx [_t]+ M_ Gamyzz[_t]*M_ gzzy[_t]+ M_ Gamzzz[_t]*M_ gzzz[_t]); M_ Rxy[_t]= HALF*( -M_ Rxy[_t] + M_ gxx [_t]* M_ Gamxy[_t]+ M_ gxy[_t]* M_ Gamyy[_t]+M_ gxz[_t]* M_ Gamzy[_t] + M_ gxy[_t]* M_ Gamxx [_t]+ M_ gyy[_t]* M_ Gamyx [_t]+M_ gyz[_t]* M_ Gamzx [_t] + M_ Gamxa[_t]*M_ gxyx [_t]+ M_ Gamya[_t]*M_ gyyx [_t]+ M_ Gamza[_t]*M_ gyzx [_t] + M_ Gamxa[_t]*M_ gxxy[_t]+ M_ Gamya[_t]*M_ gxyy[_t]+ M_ Gamza[_t]*M_ gxzy[_t])+ M_ gupxx [_t]*( M_ Gamxxx [_t]*M_ gxxy[_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxzy[_t] + M_ Gamxxy[_t]*M_ gxxx [_t]+ M_ Gamyxy[_t]*M_ gxyx [_t]+ M_ Gamzxy[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gxyx [_t]+ M_ Gamyxx [_t]*M_ gxyy[_t]+ M_ Gamzxx [_t]*M_ gxyz[_t])+ M_ gupxy[_t]*( M_ Gamxxx [_t]*M_ gxyy[_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyzy[_t] + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gyyx [_t]+ M_ Gamzxy[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gxyx [_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxyz[_t] + M_ Gamxxy[_t]*M_ gxxy[_t]+ M_ Gamyxy[_t]*M_ gxyy[_t]+ M_ Gamzxy[_t]*M_ gxzy[_t] + M_ Gamxyy[_t]*M_ gxxx [_t]+ M_ Gamyyy[_t]*M_ gxyx [_t]+ M_ Gamzyy[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gyyx [_t]+ M_ Gamyxx [_t]*M_ gyyy[_t]+ M_ Gamzxx [_t]*M_ gyyz[_t])+ M_ gupxz[_t]*( M_ Gamxxx [_t]*M_ gxzy[_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gzzy[_t] + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gyzx [_t]+ M_ Gamzxy[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxyz[_t] + M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ M_ gupyy[_t]*( M_ Gamxxy[_t]*M_ gxyy[_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyzy[_t] + M_ Gamxyy[_t]*M_ gxyx [_t]+ M_ Gamyyy[_t]*M_ gyyx [_t]+ M_ Gamzyy[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gyyx [_t]+ M_ Gamyxy[_t]*M_ gyyy[_t]+ M_ Gamzxy[_t]*M_ gyyz[_t])+ M_ gupyz[_t]*( M_ Gamxxy[_t]*M_ gxzy[_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gzzy[_t] + M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gyzx [_t]+ M_ Gamzyy[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gyyx [_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyyz[_t] + M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ M_ gupzz[_t]*( M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t]); M_ Rxz[_t]= HALF*( -M_ Rxz[_t] + M_ gxx [_t]* M_ Gamxz[_t]+ M_ gxy[_t]* M_ Gamyz[_t]+M_ gxz[_t]* M_ Gamzz[_t] + M_ gxz[_t]* M_ Gamxx [_t]+ M_ gyz[_t]* M_ Gamyx [_t]+M_ gzz[_t]* M_ Gamzx [_t] + M_ Gamxa[_t]*M_ gxzx [_t]+ M_ Gamya[_t]*M_ gyzx [_t]+ M_ Gamza[_t]*M_ gzzx [_t] + M_ Gamxa[_t]*M_ gxxz[_t]+ M_ Gamya[_t]*M_ gxyz[_t]+ M_ Gamza[_t]*M_ gxzz[_t])+ M_ gupxx [_t]*( M_ Gamxxx [_t]*M_ gxxz[_t]+ M_ Gamyxx [_t]*M_ gxyz[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t] + M_ Gamxxz[_t]*M_ gxxx [_t]+ M_ Gamyxz[_t]*M_ gxyx [_t]+ M_ Gamzxz[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gxzx [_t]+ M_ Gamyxx [_t]*M_ gxzy[_t]+ M_ Gamzxx [_t]*M_ gxzz[_t])+ M_ gupxy[_t]*( M_ Gamxxx [_t]*M_ gxyz[_t]+ M_ Gamyxx [_t]*M_ gyyz[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t] + M_ Gamxxz[_t]*M_ gxyx [_t]+ M_ Gamyxz[_t]*M_ gyyx [_t]+ M_ Gamzxz[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + M_ Gamxyz[_t]*M_ gxxx [_t]+ M_ Gamyyz[_t]*M_ gxyx [_t]+ M_ Gamzyz[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gyzx [_t]+ M_ Gamyxx [_t]*M_ gyzy[_t]+ M_ Gamzxx [_t]*M_ gyzz[_t])+ M_ gupxz[_t]*( M_ Gamxxx [_t]*M_ gxzz[_t]+ M_ Gamyxx [_t]*M_ gyzz[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t] + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gyzx [_t]+ M_ Gamzxz[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gxzx [_t]+ M_ Gamyxz[_t]*M_ gxzy[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + M_ Gamxxz[_t]*M_ gxxz[_t]+ M_ Gamyxz[_t]*M_ gxyz[_t]+ M_ Gamzxz[_t]*M_ gxzz[_t] + M_ Gamxzz[_t]*M_ gxxx [_t]+ M_ Gamyzz[_t]*M_ gxyx [_t]+ M_ Gamzzz[_t]*M_ gxzx [_t] + M_ Gamxxx [_t]*M_ gzzx [_t]+ M_ Gamyxx [_t]*M_ gzzy[_t]+ M_ Gamzxx [_t]*M_ gzzz[_t])+ M_ gupyy[_t]*( M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + M_ Gamxyz[_t]*M_ gxyx [_t]+ M_ Gamyyz[_t]*M_ gyyx [_t]+ M_ Gamzyz[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ M_ gupyz[_t]*( M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gyzx [_t]+ M_ Gamzyz[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gyzx [_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + M_ Gamxxz[_t]*M_ gxyz[_t]+ M_ Gamyxz[_t]*M_ gyyz[_t]+ M_ Gamzxz[_t]*M_ gyzz[_t] + M_ Gamxzz[_t]*M_ gxyx [_t]+ M_ Gamyzz[_t]*M_ gyyx [_t]+ M_ Gamzzz[_t]*M_ gyzx [_t] + M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ M_ gupzz[_t]*( M_ Gamxxz[_t]*M_ gxzz[_t]+ M_ Gamyxz[_t]*M_ gyzz[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t] + M_ Gamxzz[_t]*M_ gxzx [_t]+ M_ Gamyzz[_t]*M_ gyzx [_t]+ M_ Gamzzz[_t]*M_ gzzx [_t] + M_ Gamxxz[_t]*M_ gzzx [_t]+ M_ Gamyxz[_t]*M_ gzzy[_t]+ M_ Gamzxz[_t]*M_ gzzz[_t]); M_ Ryz[_t]= HALF*( -M_ Ryz[_t] + M_ gxy[_t]* M_ Gamxz[_t]+M_ gyy[_t]* M_ Gamyz[_t]+M_ gyz[_t]* M_ Gamzz[_t] + M_ gxz[_t]* M_ Gamxy[_t]+M_ gyz[_t]* M_ Gamyy[_t]+M_ gzz[_t]* M_ Gamzy[_t] + M_ Gamxa[_t]*M_ gxzy[_t]+ M_ Gamya[_t]*M_ gyzy[_t]+ M_ Gamza[_t]*M_ gzzy[_t] + M_ Gamxa[_t]*M_ gxyz[_t]+ M_ Gamya[_t]*M_ gyyz[_t]+ M_ Gamza[_t]*M_ gyzz[_t])+ M_ gupxx [_t]*( M_ Gamxxy[_t]*M_ gxxz[_t]+ M_ Gamyxy[_t]*M_ gxyz[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t] + M_ Gamxxz[_t]*M_ gxxy[_t]+ M_ Gamyxz[_t]*M_ gxyy[_t]+ M_ Gamzxz[_t]*M_ gxzy[_t] + M_ Gamxxy[_t]*M_ gxzx [_t]+ M_ Gamyxy[_t]*M_ gxzy[_t]+ M_ Gamzxy[_t]*M_ gxzz[_t])+ M_ gupxy[_t]*( M_ Gamxxy[_t]*M_ gxyz[_t]+ M_ Gamyxy[_t]*M_ gyyz[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t] + M_ Gamxxz[_t]*M_ gxyy[_t]+ M_ Gamyxz[_t]*M_ gyyy[_t]+ M_ Gamzxz[_t]*M_ gyzy[_t] + M_ Gamxyy[_t]*M_ gxzx [_t]+ M_ Gamyyy[_t]*M_ gxzy[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + M_ Gamxyy[_t]*M_ gxxz[_t]+ M_ Gamyyy[_t]*M_ gxyz[_t]+ M_ Gamzyy[_t]*M_ gxzz[_t] + M_ Gamxyz[_t]*M_ gxxy[_t]+ M_ Gamyyz[_t]*M_ gxyy[_t]+ M_ Gamzyz[_t]*M_ gxzy[_t] + M_ Gamxxy[_t]*M_ gyzx [_t]+ M_ Gamyxy[_t]*M_ gyzy[_t]+ M_ Gamzxy[_t]*M_ gyzz[_t])+ M_ gupxz[_t]*( M_ Gamxxy[_t]*M_ gxzz[_t]+ M_ Gamyxy[_t]*M_ gyzz[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t] + M_ Gamxxz[_t]*M_ gxzy[_t]+ M_ Gamyxz[_t]*M_ gyzy[_t]+ M_ Gamzxz[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gxzx [_t]+ M_ Gamyyz[_t]*M_ gxzy[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + M_ Gamxyz[_t]*M_ gxxz[_t]+ M_ Gamyyz[_t]*M_ gxyz[_t]+ M_ Gamzyz[_t]*M_ gxzz[_t] + M_ Gamxzz[_t]*M_ gxxy[_t]+ M_ Gamyzz[_t]*M_ gxyy[_t]+ M_ Gamzzz[_t]*M_ gxzy[_t] + M_ Gamxxy[_t]*M_ gzzx [_t]+ M_ Gamyxy[_t]*M_ gzzy[_t]+ M_ Gamzxy[_t]*M_ gzzz[_t])+ M_ gupyy[_t]*( M_ Gamxyy[_t]*M_ gxyz[_t]+ M_ Gamyyy[_t]*M_ gyyz[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t] + M_ Gamxyz[_t]*M_ gxyy[_t]+ M_ Gamyyz[_t]*M_ gyyy[_t]+ M_ Gamzyz[_t]*M_ gyzy[_t] + M_ Gamxyy[_t]*M_ gyzx [_t]+ M_ Gamyyy[_t]*M_ gyzy[_t]+ M_ Gamzyy[_t]*M_ gyzz[_t])+ M_ gupyz[_t]*( M_ Gamxyy[_t]*M_ gxzz[_t]+ M_ Gamyyy[_t]*M_ gyzz[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t] + M_ Gamxyz[_t]*M_ gxzy[_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gyzx [_t]+ M_ Gamyyz[_t]*M_ gyzy[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + M_ Gamxyz[_t]*M_ gxyz[_t]+ M_ Gamyyz[_t]*M_ gyyz[_t]+ M_ Gamzyz[_t]*M_ gyzz[_t] + M_ Gamxzz[_t]*M_ gxyy[_t]+ M_ Gamyzz[_t]*M_ gyyy[_t]+ M_ Gamzzz[_t]*M_ gyzy[_t] + M_ Gamxyy[_t]*M_ gzzx [_t]+ M_ Gamyyy[_t]*M_ gzzy[_t]+ M_ Gamzyy[_t]*M_ gzzz[_t])+ M_ gupzz[_t]*( M_ Gamxyz[_t]*M_ gxzz[_t]+ M_ Gamyyz[_t]*M_ gyzz[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t] + M_ Gamxzz[_t]*M_ gxzy[_t]+ M_ Gamyzz[_t]*M_ gyzy[_t]+ M_ Gamzzz[_t]*M_ gzzy[_t] + M_ Gamxyz[_t]*M_ gzzx [_t]+ M_ Gamyyz[_t]*M_ gzzy[_t]+ M_ Gamzyz[_t]*M_ gzzz[_t]); _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part5() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx [_t]* M_ chix [_t]- M_ Gamyxx [_t]* M_ chiy[_t]- M_ Gamzxx [_t]* M_ chiz[_t]; M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]* M_ chix [_t]- M_ Gamyxy[_t]* M_ chiy[_t]- M_ Gamzxy[_t]* M_ chiz[_t]; M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]* M_ chix [_t]- M_ Gamyxz[_t]* M_ chiy[_t]- M_ Gamzxz[_t]* M_ chiz[_t]; M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]* M_ chix [_t]- M_ Gamyyy[_t]* M_ chiy[_t]- M_ Gamzyy[_t]* M_ chiz[_t]; M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]* M_ chix [_t]- M_ Gamyyz[_t]* M_ chiy[_t]- M_ Gamzyz[_t]* M_ chiz[_t]; M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]* M_ chix [_t]- M_ Gamyzz[_t]* M_ chiy[_t]- M_ Gamzzz[_t]* M_ chiz[_t]; // M_ Store D^l D_l M_ chi - 3/(2*M_ chi) D^l M_ chi D_l M_ chi inM_ f[_t] M_ f[_t] = M_ gupxx [_t]* (M_ fxx [_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chix [_t]) + M_ gupyy[_t]* (M_ fyy[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiy[_t]) + M_ gupzz[_t]* (M_ fzz[_t]- F3o2/M_ chin1[_t] * M_ chiz[_t]* M_ chiz[_t]) + 2 *M_ gupxy[_t]* (M_ fxy[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiy[_t]) + 2 *M_ gupxz[_t]* (M_ fxz[_t]- F3o2/M_ chin1[_t] * M_ chix [_t]* M_ chiz[_t]) + 2 *M_ gupyz[_t]* (M_ fyz[_t]- F3o2/M_ chin1[_t] * M_ chiy[_t]* M_ chiz[_t]); // M_ Add M_ chi part toM_ Ricci tensor: M_ Rxx [_t]=M_ Rxx [_t]+ (M_ fxx [_t]- M_ chix[_t]*M_ chix[_t]/M_ chin1[_t]/2 +M_ gxx [_t]*M_ f[_t])/M_ chin1[_t]/2; M_ Ryy[_t]=M_ Ryy[_t]+ (M_ fyy[_t]- M_ chiy[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gyy[_t]*M_ f[_t])/M_ chin1[_t]/2; M_ Rzz[_t]=M_ Rzz[_t]+ (M_ fzz[_t]- M_ chiz[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gzz[_t]*M_ f[_t])/M_ chin1[_t]/2; M_ Rxy[_t]=M_ Rxy[_t]+ (M_ fxy[_t]- M_ chix[_t]*M_ chiy[_t]/M_ chin1[_t]/2 +M_ gxy[_t]*M_ f[_t])/M_ chin1[_t]/2; M_ Rxz[_t]=M_ Rxz[_t]+ (M_ fxz[_t]- M_ chix[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gxz[_t]*M_ f[_t])/M_ chin1[_t]/2; M_ Ryz[_t]=M_ Ryz[_t]+ (M_ fyz[_t]- M_ chiy[_t]*M_ chiz[_t]/M_ chin1[_t]/2 +M_ gyz[_t]*M_ f[_t])/M_ chin1[_t]/2; _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part6() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ gxxx [_t]= (M_ gupxx [_t]* M_ chix [_t]+M_ gupxy[_t]* M_ chiy[_t]+M_ gupxz[_t]* M_ chiz[_t])/M_ chin1[_t]; M_ gxxy[_t]= (M_ gupxy[_t]* M_ chix [_t]+M_ gupyy[_t]* M_ chiy[_t]+M_ gupyz[_t]* M_ chiz[_t])/M_ chin1[_t]; M_ gxxz[_t]= (M_ gupxz[_t]* M_ chix [_t]+M_ gupyz[_t]* M_ chiy[_t]+M_ gupzz[_t]* M_ chiz[_t])/M_ chin1[_t]; // nowM_ get physical second kind of connection M_ Gamxxx [_t]= M_ Gamxxx [_t]- ( (M_ chix [_t]+ M_ chix[_t])/M_ chin1[_t] -M_ gxx [_t]*M_ gxxx [_t])*HALF; M_ Gamyxx [_t]= M_ Gamyxx [_t]- ( -M_ gxx [_t]*M_ gxxy[_t])*HALF; M_ Gamzxx [_t]= M_ Gamzxx [_t]- ( -M_ gxx [_t]*M_ gxxz[_t])*HALF; M_ Gamxyy[_t]= M_ Gamxyy[_t]- ( -M_ gyy[_t]*M_ gxxx [_t])*HALF; M_ Gamyyy[_t]= M_ Gamyyy[_t]- ( (M_ chiy[_t]+ M_ chiy[_t])/M_ chin1[_t] -M_ gyy[_t]*M_ gxxy[_t])*HALF; M_ Gamzyy[_t]= M_ Gamzyy[_t]- ( -M_ gyy[_t]*M_ gxxz[_t])*HALF; M_ Gamxzz[_t]= M_ Gamxzz[_t]- ( -M_ gzz[_t]*M_ gxxx [_t])*HALF; M_ Gamyzz[_t]= M_ Gamyzz[_t]- ( -M_ gzz[_t]*M_ gxxy[_t])*HALF; M_ Gamzzz[_t]= M_ Gamzzz[_t]- ( (M_ chiz[_t]+ M_ chiz[_t])/M_ chin1[_t] -M_ gzz[_t]*M_ gxxz[_t])*HALF; M_ Gamxxy[_t]= M_ Gamxxy[_t]- ( M_ chiy[_t] /M_ chin1[_t] -M_ gxy[_t]*M_ gxxx [_t])*HALF; M_ Gamyxy[_t]= M_ Gamyxy[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxy[_t]*M_ gxxy[_t])*HALF; M_ Gamzxy[_t]= M_ Gamzxy[_t]- ( -M_ gxy[_t]*M_ gxxz[_t])*HALF; M_ Gamxxz[_t]= M_ Gamxxz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gxz[_t]*M_ gxxx [_t])*HALF; M_ Gamyxz[_t]= M_ Gamyxz[_t]- ( -M_ gxz[_t]*M_ gxxy[_t])*HALF; M_ Gamzxz[_t]= M_ Gamzxz[_t]- ( M_ chix [_t]/M_ chin1[_t] -M_ gxz[_t]*M_ gxxz[_t])*HALF; M_ Gamxyz[_t]= M_ Gamxyz[_t]- ( -M_ gyz[_t]*M_ gxxx [_t])*HALF; M_ Gamyyz[_t]= M_ Gamyyz[_t]- ( M_ chiz[_t] /M_ chin1[_t] -M_ gyz[_t]*M_ gxxy[_t])*HALF; M_ Gamzyz[_t]= M_ Gamzyz[_t]- ( M_ chiy[_t]/M_ chin1[_t] -M_ gyz[_t]*M_ gxxz[_t])*HALF; M_ fxx [_t]=M_ fxx [_t]- M_ Gamxxx[_t]*M_ Lapx [_t]- M_ Gamyxx[_t]*M_ Lapy[_t]- M_ Gamzxx[_t]*M_ Lapz[_t]; M_ fyy[_t]=M_ fyy[_t]- M_ Gamxyy[_t]*M_ Lapx [_t]- M_ Gamyyy[_t]*M_ Lapy[_t]- M_ Gamzyy[_t]*M_ Lapz[_t]; M_ fzz[_t]=M_ fzz[_t]- M_ Gamxzz[_t]*M_ Lapx [_t]- M_ Gamyzz[_t]*M_ Lapy[_t]- M_ Gamzzz[_t]*M_ Lapz[_t]; M_ fxy[_t]=M_ fxy[_t]- M_ Gamxxy[_t]*M_ Lapx [_t]- M_ Gamyxy[_t]*M_ Lapy[_t]- M_ Gamzxy[_t]*M_ Lapz[_t]; M_ fxz[_t]=M_ fxz[_t]- M_ Gamxxz[_t]*M_ Lapx [_t]- M_ Gamyxz[_t]*M_ Lapy[_t]- M_ Gamzxz[_t]*M_ Lapz[_t]; M_ fyz[_t]=M_ fyz[_t]- M_ Gamxyz[_t]*M_ Lapx [_t]- M_ Gamyyz[_t]*M_ Lapy[_t]- M_ Gamzyz[_t]*M_ Lapz[_t]; // store D^i D_i Lap in M_ trK_rhs[_t] upto M_ chi M_ trK_rhs[_t] = M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]); // M_ Add lapse and M_ S_ij parts toM_ Ricci tensor: //follow bam code M_ S[_t] = M_ chin1[_t] * ( M_ gupxx[_t] * M_ Sxx[_t] + M_ gupyy[_t] * M_ Syy[_t] + M_ gupzz[_t] * M_ Szz[_t] + 2 * ( M_ gupxy[_t] * M_ Sxy[_t] + M_ gupxz[_t] * M_ Sxz[_t] + M_ gupyz[_t] * M_ Syz[_t] ) ); M_ f[_t] = F2o3 * M_ trK[_t] * M_ trK[_t] -( M_ gupxx[_t] * ( M_ gupxx[_t] * M_ Axx[_t] * M_ Axx[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Axy[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Axz[_t] + 2 * (M_ gupxy[_t] * M_ Axx[_t] * M_ Axy[_t] + M_ gupxz[_t] * M_ Axx[_t] * M_ Axz[_t] + M_ gupyz[_t] * M_ Axy[_t] * M_ Axz[_t]) ) + M_ gupyy[_t] * ( M_ gupxx[_t] * M_ Axy[_t] * M_ Axy[_t] + M_ gupyy[_t] * M_ Ayy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Ayz[_t] * M_ Ayz[_t] + 2 * (M_ gupxy[_t] * M_ Axy[_t] * M_ Ayy[_t] + M_ gupxz[_t] * M_ Axy[_t] * M_ Ayz[_t] + M_ gupyz[_t] * M_ Ayy[_t] * M_ Ayz[_t]) ) + M_ gupzz[_t] * ( M_ gupxx[_t] * M_ Axz[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Ayz[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Azz[_t] * M_ Azz[_t] + 2 * (M_ gupxy[_t] * M_ Axz[_t] * M_ Ayz[_t] + M_ gupxz[_t] * M_ Axz[_t] * M_ Azz[_t] + M_ gupyz[_t] * M_ Ayz[_t] * M_ Azz[_t]) ) + 2 * ( M_ gupxy[_t] * ( M_ gupxx[_t] * M_ Axx[_t] * M_ Axy[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Ayy[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Ayz[_t] + M_ gupxy[_t] * (M_ Axx[_t] * M_ Ayy[_t] + M_ Axy[_t] * M_ Axy[_t]) + M_ gupxz[_t] * (M_ Axx[_t] * M_ Ayz[_t] + M_ Axz[_t] * M_ Axy[_t]) + M_ gupyz[_t] * (M_ Axy[_t] * M_ Ayz[_t] + M_ Axz[_t] * M_ Ayy[_t]) ) + M_ gupxz[_t] * ( M_ gupxx[_t] * M_ Axx[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Axy[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Axz[_t] * M_ Azz[_t] + M_ gupxy[_t] * (M_ Axx[_t] * M_ Ayz[_t] + M_ Axy[_t] * M_ Axz[_t]) + M_ gupxz[_t] * (M_ Axx[_t] * M_ Azz[_t] + M_ Axz[_t] * M_ Axz[_t]) + M_ gupyz[_t] * (M_ Axy[_t] * M_ Azz[_t] + M_ Axz[_t] * M_ Ayz[_t]) ) + M_ gupyz[_t] * ( M_ gupxx[_t] * M_ Axy[_t] * M_ Axz[_t] + M_ gupyy[_t] * M_ Ayy[_t] * M_ Ayz[_t] + M_ gupzz[_t] * M_ Ayz[_t] * M_ Azz[_t] + M_ gupxy[_t] * (M_ Axy[_t] * M_ Ayz[_t] + M_ Ayy[_t] * M_ Axz[_t]) + M_ gupxz[_t] * (M_ Axy[_t] * M_ Azz[_t] + M_ Ayz[_t] * M_ Axz[_t]) + M_ gupyz[_t] * (M_ Ayy[_t] * M_ Azz[_t] + M_ Ayz[_t] * M_ Ayz[_t]) ) )) -16 * PI * M_ rho[_t] + 8 * PI * M_ S[_t]; M_ f[_t] = - F1o3 *( M_ gupxx[_t] * M_ fxx[_t] + M_ gupyy[_t] * M_ fyy[_t] + M_ gupzz[_t] * M_ fzz[_t] + 2* ( M_ gupxy[_t] * M_ fxy[_t] + M_ gupxz[_t] * M_ fxz[_t] + M_ gupyz[_t] * M_ fyz[_t] ) + M_ alpn1[_t] / M_ chin1[_t] * M_ f[_t]); M_ fxx[_t] = M_ alpn1[_t] * (M_ Rxx[_t] - 8 * PI * M_ Sxx[_t]) - M_ fxx[_t]; M_ fxy[_t] = M_ alpn1[_t] * (M_ Rxy[_t] - 8 * PI * M_ Sxy[_t]) - M_ fxy[_t]; M_ fxz[_t] = M_ alpn1[_t] * (M_ Rxz[_t] - 8 * PI * M_ Sxz[_t]) - M_ fxz[_t]; M_ fyy[_t] = M_ alpn1[_t] * (M_ Ryy[_t] - 8 * PI * M_ Syy[_t]) - M_ fyy[_t]; M_ fyz[_t] = M_ alpn1[_t] * (M_ Ryz[_t] - 8 * PI * M_ Syz[_t]) - M_ fyz[_t]; M_ fzz[_t] = M_ alpn1[_t] * (M_ Rzz[_t] - 8 * PI * M_ Szz[_t]) - M_ fzz[_t]; /* M_ fxx [_t]= M_ alpn1[_t]* (M_ Rxx [_t]- 8 * PI * M_ Sxx[_t]) -M_ fxx[_t]; M_ fxy[_t]= M_ alpn1[_t]* (M_ Rxy[_t]- 8 * PI * M_ Sxy[_t]) -M_ fxy[_t]; M_ fxz[_t]= M_ alpn1[_t]* (M_ Rxz[_t]- 8 * PI * M_ Sxz[_t]) -M_ fxz[_t]; M_ fyy[_t]= M_ alpn1[_t]* (M_ Ryy[_t]- 8 * PI * M_ Syy[_t]) -M_ fyy[_t]; M_ fyz[_t]= M_ alpn1[_t]* (M_ Ryz[_t]- 8 * PI * M_ Syz[_t]) -M_ fyz[_t]; M_ fzz[_t]= M_ alpn1[_t]* (M_ Rzz[_t]- 8 * PI * M_ Szz[_t]) -M_ fzz[_t]; // Compute trace-free part (note: M_ chi^-1 and M_ chi cancel//): M_ f[_t] = F1o3 *( M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t]+ 2* (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) ); */ M_ Axx_rhs[_t] =M_ fxx [_t]-M_ gxx [_t]*M_ f[_t]; M_ Ayy_rhs[_t] =M_ fyy[_t]-M_ gyy[_t]*M_ f[_t]; M_ Azz_rhs[_t] =M_ fzz[_t]-M_ gzz[_t]*M_ f[_t]; M_ Axy_rhs[_t] =M_ fxy[_t]-M_ gxy[_t]*M_ f[_t]; M_ Axz_rhs[_t] =M_ fxz[_t]-M_ gxz[_t]*M_ f[_t]; M_ Ayz_rhs[_t] =M_ fyz[_t]-M_ gyz[_t]*M_ f[_t]; // Now: store M_ A_il M_ A^l_j intoM_ fij: M_ fxx [_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]); M_ fyy[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]); M_ fzz[_t]= M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]); M_ fxy[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + M_ gupxz[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + M_ gupyz[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]); M_ fxz[_t]= M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ M_ gupxy[_t]*(M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + M_ gupxz[_t]*(M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + M_ gupyz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]); M_ fyz[_t]= M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ M_ gupxy[_t]*(M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + M_ gupxz[_t]*(M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + M_ gupyz[_t]*(M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]); M_ f[_t] = M_ chin1[_t]; // store D^i D_i Lap in M_ trK_rhs[_t] M_ trK_rhs[_t] =M_ f[_t]*M_ trK_rhs[_t]; M_ Axx_rhs[_t] = M_ f[_t] * M_ Axx_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Axx [_t]- 2 *M_ fxx[_t]) + 2 * ( M_ Axx [_t]* M_ betaxx [_t]+ M_ Axy[_t]* M_ betayx [_t]+ M_ Axz[_t]* M_ betazx [_t])- F2o3 * M_ Axx [_t]* M_ div_beta[_t]; M_ Ayy_rhs[_t] = M_ f[_t] * M_ Ayy_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Ayy[_t]- 2 *M_ fyy[_t]) + 2 * ( M_ Axy[_t]* M_ betaxy[_t]+ M_ Ayy[_t]* M_ betayy[_t]+ M_ Ayz[_t]* M_ betazy[_t])- F2o3 * M_ Ayy[_t]* M_ div_beta[_t]; M_ Azz_rhs[_t] = M_ f[_t] * M_ Azz_rhs[_t]+ M_ alpn1[_t]* (M_ trK[_t]* M_ Azz[_t]- 2 *M_ fzz[_t]) + 2 * ( M_ Axz[_t]* M_ betaxz[_t]+ M_ Ayz[_t]* M_ betayz[_t]+ M_ Azz[_t]* M_ betazz[_t])- F2o3 * M_ Azz[_t]* M_ div_beta[_t]; M_ Axy_rhs[_t] = M_ f[_t] * M_ Axy_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axy[_t] - 2 *M_ fxy[_t])+ M_ Axx [_t]* M_ betaxy[_t] + M_ Axz[_t]* M_ betazy[_t] + M_ Ayy[_t]* M_ betayx [_t]+ M_ Ayz[_t]* M_ betazx [_t] + F1o3 * M_ Axy[_t]* M_ div_beta[_t] - M_ Axy[_t]* M_ betazz[_t]; M_ Ayz_rhs[_t] = M_ f[_t] * M_ Ayz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Ayz[_t] - 2 *M_ fyz[_t])+ M_ Axy[_t]* M_ betaxz[_t]+ M_ Ayy[_t]* M_ betayz[_t] + M_ Axz[_t]* M_ betaxy[_t] + M_ Azz[_t]* M_ betazy[_t] + F1o3 * M_ Ayz[_t]* M_ div_beta[_t] - M_ Ayz[_t]* M_ betaxx[_t]; M_ Axz_rhs[_t] = M_ f[_t] * M_ Axz_rhs[_t]+ M_ alpn1[_t]*( M_ trK[_t]* M_ Axz[_t] - 2 *M_ fxz[_t])+ M_ Axx [_t]* M_ betaxz[_t]+ M_ Axy[_t]* M_ betayz[_t] + M_ Ayz[_t]* M_ betayx [_t]+ M_ Azz[_t]* M_ betazx [_t] + F1o3 * M_ Axz[_t]* M_ div_beta[_t] - M_ Axz[_t]* M_ betayy[_t] ; //rhsM_ for M_ Aij // Compute trace of M_ S_ij M_ S[_t] = M_ f[_t] * (M_ gupxx [_t]* M_ Sxx [_t]+M_ gupyy[_t]* M_ Syy[_t]+M_ gupzz[_t]* M_ Szz[_t]+ 2 * (M_ gupxy[_t]* M_ Sxy[_t]+M_ gupxz[_t]* M_ Sxz[_t]+M_ gupyz[_t]* M_ Syz[_t]) ); M_ trK_rhs[_t] = - M_ trK_rhs[_t] + M_ alpn1[_t]*( F1o3 * M_ trK[_t]* M_ trK[_t] + M_ gupxx [_t]*M_ fxx [_t]+M_ gupyy[_t]*M_ fyy[_t]+M_ gupzz[_t]*M_ fzz[_t] + 2 * (M_ gupxy[_t]*M_ fxy[_t]+M_ gupxz[_t]*M_ fxz[_t]+M_ gupyz[_t]*M_ fyz[_t]) + 4 * PI * ( M_ rho[_t] + M_ S[_t] )) ; //rhsM_ for M_ trK[_t] ////////M_ gauge variable part M_ Lap_rhs[_t] = -2*M_ alpn1[_t] * M_ trK[_t]; #if (GAUGE == 0) M_ betax_rhs[_t] =0.75*M_ dtSfx[_t]; M_ betay_rhs[_t] =0.75*M_ dtSfy[_t]; M_ betaz_rhs[_t] =0.75*M_ dtSfz[_t]; M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] -2*M_ dtSfx[_t]; M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] -2*M_ dtSfy[_t]; M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] -2*M_ dtSfz[_t]; #elif (GAUGE == 1) M_ betax_rhs[_t] =M_ Gamx[_t] - 2 * M_ betax[_t] ; M_ betay_rhs[_t] =M_ Gamy[_t] - 2 * M_ betay[_t] ; M_ betaz_rhs[_t] =M_ Gamz[_t] - 2 * M_ betaz[_t] ; M_ dtSfx_rhs[_t] = 0; M_ dtSfy_rhs[_t] = 0; M_ dtSfz_rhs[_t] = 0; #elif (GAUGE == 2 || GAUGE == 3) M_ betax_rhs[_t] = 0.75* M_ dtSfx[_t]; M_ betay_rhs[_t] = 0.75* M_ dtSfy[_t]; M_ betaz_rhs[_t] = 0.75* M_ dtSfz[_t]; #elif (GAUGE == 6) if(BHN==2) { int k = _t / _2D_SIZE[0]; int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); r1 = ( pow2((Porg[0]-X[i]))+ pow2((Porg[1]-Y[j]))+ pow2((Porg[2]-Z[k])) ) / ( pow2((Porg[0]-Porg[3]))+ pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); reta[i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1/(1 + 12 * r1) + C2/(1 + 12 *r2); }//BHN == 2 M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t] * M_ dtSfx[_t]; M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t] * M_ dtSfy[_t]; M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t] * M_ dtSfz[_t]; #elif (GAUGE == 7) if(BHN==2){ int k = _t / _2D_SIZE[0]; int ps = _t - (_2D_SIZE[0] * k); //TOTRY: = curr % _2D_SIZE[0]; int j = ps / ex_c[0]; int i = ps - (j * ex_c[0]); r1 = ( pow2((Porg[0]-X[i])) + pow2((Porg[1]-Y[j])) + pow2((Porg[2]-Z[k])) )/ ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); r2 = ( pow2((Porg[3]-X[i])) + pow2((Porg[4]-Y[j])) + pow2((Porg[5]-Z[k])) )/ ( pow2((Porg[0]-Porg[3])) + pow2((Porg[1]-Porg[4])) + pow2((Porg[2]-Porg[5])) ); M_ reta[_t][i+ j*_1D_SIZE[0]+ k*_2D_SIZE[0] ] = A + C1* exp(-12 *r1) + C2*exp(- 12*r2); }//BHN ==2 M_ betax_rhs[_t] = 0.75*M_ dtSfx[_t]; M_ betay_rhs[_t] = 0.75*M_ dtSfy[_t]; M_ betaz_rhs[_t] = 0.75*M_ dtSfz[_t]; M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]*M_ dtSfx[_t]; M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]*M_ dtSfy[_t]; M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]*M_ dtSfz[_t]; #endif //if (GAUGE == ?) _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part6_gauge() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { #if (GAUGE == 2) M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow2( ( 1-sqrt(M_ chin1[_t]) ) ); M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; #elif (GAUGE == 3) M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); M_ reta[_t] = 1.13/2 * sqrt( M_ reta[_t]/ M_ chin1[_t])/ pow2((1-M_ chin1[_t])); M_ dtSfx_rhs[_t] = M_ Gamx_rhs[_t] - M_ reta[_t]* M_ dtSfx[_t]; M_ dtSfy_rhs[_t] = M_ Gamy_rhs[_t] - M_ reta[_t]* M_ dtSfy[_t]; M_ dtSfz_rhs[_t] = M_ Gamz_rhs[_t] - M_ reta[_t]* M_ dtSfz[_t]; #elif (GAUGE == 4) M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1[_t])/ pow( (1-sqrt(M_ chin1[_t]))); M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; #elif (GAUGE == 5) M_ reta[_t] = M_ gupxx[_t] * M_ dtSfx_rhs[_t] * M_ dtSfx_rhs[_t] + M_ gupyy[_t] * M_ dtSfy_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupzz[_t] * M_ dtSfz_rhs[_t] * M_ dtSfz_rhs[_t] + 2 * ( M_ gupxy[_t] * M_ dtSfx_rhs[_t] * M_ dtSfy_rhs[_t] + M_ gupxz[_t] * M_ dtSfx_rhs[_t] * M_ dtSfz_rhs[_t] + M_ gupyz[_t] * M_ dtSfy_rhs[_t] * M_ dtSfz_rhs[_t]); M_ reta[_t] = 1.13 / 2 * sqrt( M_ reta[_t]/M_ chin1)/ pow( (1-M_ chin1[_t]) ); M_ betax_rhs[_t] = 0.75* M_ Gamx[_t] - M_ reta[_t]*M_ betax[_t]; M_ betay_rhs[_t] = 0.75* M_ Gamy[_t] - M_ reta[_t]*M_ betay[_t]; M_ betaz_rhs[_t] = 0.75* M_ Gamz[_t] - M_ reta[_t]*M_ betaz[_t]; M_ dtSfx_rhs[_t] = 0; M_ dtSfy_rhs[_t] = 0; M_ dtSfz_rhs[_t] = 0; #endif _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part7() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ ham_Res[_t] = M_ gupxx [_t]* M_ Rxx [_t]+ M_ gupyy[_t]* M_ Ryy[_t]+ M_ gupzz[_t]* M_ Rzz[_t]+ 2* ( M_ gupxy[_t]* M_ Rxy[_t]+ M_ gupxz[_t]* M_ Rxz[_t]+ M_ gupyz[_t]* M_ Ryz[_t]); M_ ham_Res[_t] = M_ chin1[_t]*M_ ham_Res[_t] + F2o3 * M_ trK[_t] * M_ trK[_t] -( M_ gupxx [_t]* ( M_ gupxx [_t]* M_ Axx [_t]* M_ Axx [_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Axz[_t]+ 2 * (M_ gupxy[_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupxz[_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyz[_t]* M_ Axy[_t]* M_ Axz[_t]) ) + M_ gupyy[_t]* ( M_ gupxx [_t]* M_ Axy[_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ 2 * (M_ gupxy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupxz[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupyz[_t]* M_ Ayy[_t]* M_ Ayz[_t]) ) + M_ gupzz[_t]* ( M_ gupxx [_t]* M_ Axz[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayz[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Azz[_t]* M_ Azz[_t]+ 2 * (M_ gupxy[_t]* M_ Axz[_t]* M_ Ayz[_t]+ M_ gupxz[_t]* M_ Axz[_t]* M_ Azz[_t]+ M_ gupyz[_t]* M_ Ayz[_t]* M_ Azz[_t]) ) + 2 * ( M_ gupxy[_t]* ( M_ gupxx [_t]* M_ Axx [_t]* M_ Axy[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayy[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Ayz[_t]+ M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayy[_t]+ M_ Axy[_t]* M_ Axy[_t]) + M_ gupxz[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Axy[_t]) + M_ gupyz[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Axz[_t]* M_ Ayy[_t]) ) + M_ gupxz[_t]* ( M_ gupxx [_t]* M_ Axx [_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Axy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Axz[_t]* M_ Azz[_t]+ M_ gupxy[_t]* (M_ Axx [_t]* M_ Ayz[_t]+ M_ Axy[_t]* M_ Axz[_t]) + M_ gupxz[_t]* (M_ Axx [_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Axz[_t]) + M_ gupyz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Axz[_t]* M_ Ayz[_t]) ) + M_ gupyz[_t]* ( M_ gupxx [_t]* M_ Axy[_t]* M_ Axz[_t]+ M_ gupyy[_t]* M_ Ayy[_t]* M_ Ayz[_t]+ M_ gupzz[_t]* M_ Ayz[_t]* M_ Azz[_t]+ M_ gupxy[_t]* (M_ Axy[_t]* M_ Ayz[_t]+ M_ Ayy[_t]* M_ Axz[_t]) + M_ gupxz[_t]* (M_ Axy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Axz[_t]) + M_ gupyz[_t]* (M_ Ayy[_t]* M_ Azz[_t]+ M_ Ayz[_t]* M_ Ayz[_t]) ) ))- 16 * PI * M_ rho[_t]; _t += STEP_SIZE; } } __global__ void compute_rhs_bssn_part8() { int _t = blockIdx.x*blockDim.x+threadIdx.x; while(_t < _3D_SIZE[0]) { M_ gxxx [_t]= M_ gxxx [_t]- ( M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t] + M_ Gamxxx [_t]* M_ Axx [_t]+ M_ Gamyxx [_t]* M_ Axy[_t]+ M_ Gamzxx [_t]* M_ Axz[_t]) - M_ chix[_t]*M_ Axx[_t]/M_ chin1[_t]; M_ gxyx [_t]= M_ gxyx [_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] + M_ Gamxxx [_t]* M_ Axy[_t]+ M_ Gamyxx [_t]* M_ Ayy[_t]+ M_ Gamzxx [_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Axy[_t]/M_ chin1[_t]; M_ gxzx [_t]= M_ gxzx [_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] + M_ Gamxxx [_t]* M_ Axz[_t]+ M_ Gamyxx [_t]* M_ Ayz[_t]+ M_ Gamzxx [_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Axz[_t]/M_ chin1[_t]; M_ gyyx [_t]= M_ gyyx [_t]- ( M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t] + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chix[_t]*M_ Ayy[_t]/M_ chin1[_t]; M_ gyzx [_t]= M_ gyzx [_t]- ( M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t] + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Ayz[_t]/M_ chin1[_t]; M_ gzzx [_t]= M_ gzzx [_t]- ( M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t] + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chix[_t]*M_ Azz[_t]/M_ chin1[_t]; M_ gxxy[_t]= M_ gxxy[_t]- ( M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t] + M_ Gamxxy[_t]* M_ Axx [_t]+ M_ Gamyxy[_t]* M_ Axy[_t]+ M_ Gamzxy[_t]* M_ Axz[_t]) - M_ chiy[_t]*M_ Axx[_t]/M_ chin1[_t]; M_ gxyy[_t]= M_ gxyy[_t]- ( M_ Gamxyy[_t]* M_ Axx [_t]+ M_ Gamyyy[_t]* M_ Axy[_t]+ M_ Gamzyy[_t]* M_ Axz[_t] + M_ Gamxxy[_t]* M_ Axy[_t]+ M_ Gamyxy[_t]* M_ Ayy[_t]+ M_ Gamzxy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Axy[_t]/M_ chin1[_t]; M_ gxzy[_t]= M_ gxzy[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] + M_ Gamxxy[_t]* M_ Axz[_t]+ M_ Gamyxy[_t]* M_ Ayz[_t]+ M_ Gamzxy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Axz[_t]/M_ chin1[_t]; M_ gyyy[_t]= M_ gyyy[_t]- ( M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t] + M_ Gamxyy[_t]* M_ Axy[_t]+ M_ Gamyyy[_t]* M_ Ayy[_t]+ M_ Gamzyy[_t]* M_ Ayz[_t]) - M_ chiy[_t]*M_ Ayy[_t]/M_ chin1[_t]; M_ gyzy[_t]= M_ gyzy[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] + M_ Gamxyy[_t]* M_ Axz[_t]+ M_ Gamyyy[_t]* M_ Ayz[_t]+ M_ Gamzyy[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Ayz[_t]/M_ chin1[_t]; M_ gzzy[_t]= M_ gzzy[_t]- ( M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t] + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiy[_t]*M_ Azz[_t]/M_ chin1[_t]; M_ gxxz[_t]= M_ gxxz[_t]- ( M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t] + M_ Gamxxz[_t]* M_ Axx [_t]+ M_ Gamyxz[_t]* M_ Axy[_t]+ M_ Gamzxz[_t]* M_ Axz[_t]) - M_ chiz[_t]*M_ Axx[_t]/M_ chin1[_t]; M_ gxyz[_t]= M_ gxyz[_t]- ( M_ Gamxyz[_t]* M_ Axx [_t]+ M_ Gamyyz[_t]* M_ Axy[_t]+ M_ Gamzyz[_t]* M_ Axz[_t] + M_ Gamxxz[_t]* M_ Axy[_t]+ M_ Gamyxz[_t]* M_ Ayy[_t]+ M_ Gamzxz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Axy[_t]/M_ chin1[_t]; M_ gxzz[_t]= M_ gxzz[_t]- ( M_ Gamxzz[_t]* M_ Axx [_t]+ M_ Gamyzz[_t]* M_ Axy[_t]+ M_ Gamzzz[_t]* M_ Axz[_t] + M_ Gamxxz[_t]* M_ Axz[_t]+ M_ Gamyxz[_t]* M_ Ayz[_t]+ M_ Gamzxz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Axz[_t]/M_ chin1[_t]; M_ gyyz[_t]= M_ gyyz[_t]- ( M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t] + M_ Gamxyz[_t]* M_ Axy[_t]+ M_ Gamyyz[_t]* M_ Ayy[_t]+ M_ Gamzyz[_t]* M_ Ayz[_t]) - M_ chiz[_t]*M_ Ayy[_t]/M_ chin1[_t]; M_ gyzz[_t]= M_ gyzz[_t]- ( M_ Gamxzz[_t]* M_ Axy[_t]+ M_ Gamyzz[_t]* M_ Ayy[_t]+ M_ Gamzzz[_t]* M_ Ayz[_t] + M_ Gamxyz[_t]* M_ Axz[_t]+ M_ Gamyyz[_t]* M_ Ayz[_t]+ M_ Gamzyz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Ayz[_t]/M_ chin1[_t]; M_ gzzz[_t]= M_ gzzz[_t]- ( M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t] + M_ Gamxzz[_t]* M_ Axz[_t]+ M_ Gamyzz[_t]* M_ Ayz[_t]+ M_ Gamzzz[_t]* M_ Azz[_t]) - M_ chiz[_t]*M_ Azz[_t]/M_ chin1[_t]; M_ movx_Res[_t] = M_ gupxx[_t]*M_ gxxx [_t]+ M_ gupyy[_t]*M_ gxyy[_t]+ M_ gupzz[_t]*M_ gxzz[_t] +M_ gupxy[_t]*M_ gxyx [_t]+ M_ gupxz[_t]*M_ gxzx [_t]+ M_ gupyz[_t]*M_ gxzy[_t] +M_ gupxy[_t]*M_ gxxy[_t]+ M_ gupxz[_t]*M_ gxxz[_t]+ M_ gupyz[_t]*M_ gxyz[_t]; M_ movy_Res[_t] = M_ gupxx[_t]*M_ gxyx [_t]+ M_ gupyy[_t]*M_ gyyy[_t]+ M_ gupzz[_t]*M_ gyzz[_t] +M_ gupxy[_t]*M_ gyyx [_t]+ M_ gupxz[_t]*M_ gyzx [_t]+ M_ gupyz[_t]*M_ gyzy[_t] +M_ gupxy[_t]*M_ gxyy[_t]+ M_ gupxz[_t]*M_ gxyz[_t]+ M_ gupyz[_t]*M_ gyyz[_t]; M_ movz_Res[_t] = M_ gupxx[_t]*M_ gxzx [_t]+ M_ gupyy[_t]*M_ gyzy[_t]+ M_ gupzz[_t]*M_ gzzz[_t] +M_ gupxy[_t]*M_ gyzx [_t]+ M_ gupxz[_t]*M_ gzzx [_t]+ M_ gupyz[_t]*M_ gzzy[_t] +M_ gupxy[_t]*M_ gxzy[_t]+ M_ gupxz[_t]*M_ gxzz[_t]+ M_ gupyz[_t]*M_ gyzz[_t]; M_ movx_Res[_t] = M_ movx_Res[_t] - F2o3*M_ Kx [_t]- 8*PI*M_ Sx[_t]; M_ movy_Res[_t] = M_ movy_Res[_t] - F2o3*M_ Ky[_t]- 8*PI*M_ Sy[_t]; M_ movz_Res[_t] = M_ movz_Res[_t] - F2o3*M_ Kz[_t]- 8*PI*M_ Sz[_t]; _t += STEP_SIZE; } } __global__ void device_test(double * result, double * Xt){ /*result[0] = MAXSIZE; result[1] = STEP; result[2] = ex_c[0]; result[3] = ex_c[1]; result[4] = ex_c[2]; result[5] = Xt[0]; result[6] = Xt[1]; result[7] = metac.X[0]; result[8] = metac.X[1]; */ result[0] = metac.gzz[0]; result[1] = metac.gzz[1]; result[2] = metac.gzz[2]; result[3] = metac.gyy[0]; result[4] = metac.gyy[1]; result[5] = metac.gyy[2]; result[6] = _3D_SIZE[0]; result[7] = STEP_SIZE; result[8] = blockDim.x * gridDim.x; } void destroy_meta(Meta *meta) { /* if(Mh_ X) CUDA_SAFE_CALL(cudaFree(Mh_ X)); if(Mh_ Y) CUDA_SAFE_CALL(cudaFree(Mh_ Y)); if(Mh_ Z) CUDA_SAFE_CALL(cudaFree(Mh_ Z)); if(Mh_ chi) CUDA_SAFE_CALL(cudaFree(Mh_ chi)); if(Mh_ dxx) CUDA_SAFE_CALL(cudaFree(Mh_ dxx)); if(Mh_ dyy) CUDA_SAFE_CALL(cudaFree(Mh_ dyy)); if(Mh_ dzz) CUDA_SAFE_CALL(cudaFree(Mh_ dzz)); if(Mh_ trK) CUDA_SAFE_CALL(cudaFree(Mh_ trK)); if(Mh_ gxy) CUDA_SAFE_CALL(cudaFree(Mh_ gxy)); if(Mh_ gxz) CUDA_SAFE_CALL(cudaFree(Mh_ gxz)); if(Mh_ gyz) CUDA_SAFE_CALL(cudaFree(Mh_ gyz)); if(Mh_ Axx) CUDA_SAFE_CALL(cudaFree(Mh_ Axx)); if(Mh_ Axy) CUDA_SAFE_CALL(cudaFree(Mh_ Axy)); if(Mh_ Axz) CUDA_SAFE_CALL(cudaFree(Mh_ Axz)); if(Mh_ Ayz) CUDA_SAFE_CALL(cudaFree(Mh_ Ayz)); if(Mh_ Ayy) CUDA_SAFE_CALL(cudaFree(Mh_ Ayy)); if(Mh_ Azz) CUDA_SAFE_CALL(cudaFree(Mh_ Azz)); if(Mh_ Gamx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamx)); if(Mh_ Gamy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamy)); if(Mh_ Gamz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamz)); if(Mh_ Lap) CUDA_SAFE_CALL(cudaFree(Mh_ Lap)); if(Mh_ betax) CUDA_SAFE_CALL(cudaFree(Mh_ betax)); if(Mh_ betay) CUDA_SAFE_CALL(cudaFree(Mh_ betay)); if(Mh_ betaz) CUDA_SAFE_CALL(cudaFree(Mh_ betaz)); if(Mh_ dtSfx) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfx)); if(Mh_ dtSfy) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfy)); if(Mh_ dtSfz) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfz)); if(Mh_ chi_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ chi_rhs)); if(Mh_ trK_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ trK_rhs)); if(Mh_ gxy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxy_rhs)); if(Mh_ gxz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxz_rhs)); if(Mh_ gyz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gyz_rhs)); if(Mh_ Axx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axx_rhs)); if(Mh_ Axy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axy_rhs)); if(Mh_ Axz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Axz_rhs)); if(Mh_ Ayz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Ayz_rhs)); if(Mh_ Ayy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Ayy_rhs)); if(Mh_ Azz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Azz_rhs)); if(Mh_ Gamx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamx_rhs)); if(Mh_ Gamy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamy_rhs)); if(Mh_ Gamz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Gamz_rhs)); if(Mh_ Lap_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ Lap_rhs)); if(Mh_ betax_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betax_rhs)); if(Mh_ betay_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betay_rhs)); if(Mh_ betaz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ betaz_rhs)); if(Mh_ dtSfx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfx_rhs)); if(Mh_ dtSfy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfy_rhs)); if(Mh_ dtSfz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ dtSfz_rhs)); if(Mh_ rho) CUDA_SAFE_CALL(cudaFree(Mh_ rho)); if(Mh_ Sx) CUDA_SAFE_CALL(cudaFree(Mh_ Sx)); if(Mh_ Sy) CUDA_SAFE_CALL(cudaFree(Mh_ Sy)); if(Mh_ Sz) CUDA_SAFE_CALL(cudaFree(Mh_ Sz)); if(Mh_ Sxx) CUDA_SAFE_CALL(cudaFree(Mh_ Sxx)); if(Mh_ Sxy) CUDA_SAFE_CALL(cudaFree(Mh_ Sxy)); if(Mh_ Sxz) CUDA_SAFE_CALL(cudaFree(Mh_ Sxz)); if(Mh_ Syz) CUDA_SAFE_CALL(cudaFree(Mh_ Syz)); if(Mh_ Syy) CUDA_SAFE_CALL(cudaFree(Mh_ Syy)); if(Mh_ Szz) CUDA_SAFE_CALL(cudaFree(Mh_ Szz)); if(Mh_ Gamxxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxx)); if(Mh_ Gamxxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxy)); if(Mh_ Gamxxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxxz)); if(Mh_ Gamxyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxyy)); if(Mh_ Gamxyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxyz)); if(Mh_ Gamxzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxzz)); if(Mh_ Gamyxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxx)); if(Mh_ Gamyxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxy)); if(Mh_ Gamyxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyxz)); if(Mh_ Gamyyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyyy)); if(Mh_ Gamyyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyyz)); if(Mh_ Gamyzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyzz)); if(Mh_ Gamzxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxx)); if(Mh_ Gamzxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxy)); if(Mh_ Gamzxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzxz)); if(Mh_ Gamzyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzyz)); if(Mh_ Gamzyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzyy)); if(Mh_ Gamzzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzzz)); if(Mh_ Rxx) CUDA_SAFE_CALL(cudaFree(Mh_ Rxx)); if(Mh_ Rxy) CUDA_SAFE_CALL(cudaFree(Mh_ Rxy)); if(Mh_ Rxz) CUDA_SAFE_CALL(cudaFree(Mh_ Rxz)); if(Mh_ Ryy) CUDA_SAFE_CALL(cudaFree(Mh_ Ryy)); if(Mh_ Ryz) CUDA_SAFE_CALL(cudaFree(Mh_ Ryz)); if(Mh_ Rzz) CUDA_SAFE_CALL(cudaFree(Mh_ Rzz)); if(Mh_ ham_Res) CUDA_SAFE_CALL(cudaFree(Mh_ ham_Res)); if(Mh_ movx_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movx_Res)); if(Mh_ movy_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movy_Res)); if(Mh_ movz_Res) CUDA_SAFE_CALL(cudaFree(Mh_ movz_Res)); if(Mh_ Gmx_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmx_Res)); if(Mh_ Gmy_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmy_Res)); if(Mh_ Gmz_Res) CUDA_SAFE_CALL(cudaFree(Mh_ Gmz_Res)); if(Mh_ gxx) CUDA_SAFE_CALL(cudaFree(Mh_ gxx)); if(Mh_ gyy) CUDA_SAFE_CALL(cudaFree(Mh_ gyy)); if(Mh_ gzz) CUDA_SAFE_CALL(cudaFree(Mh_ gzz)); if(Mh_ chix) CUDA_SAFE_CALL(cudaFree(Mh_ chix)); if(Mh_ chiy) CUDA_SAFE_CALL(cudaFree(Mh_ chiy)); if(Mh_ chiz) CUDA_SAFE_CALL(cudaFree(Mh_ chiz)); if(Mh_ gxxx) CUDA_SAFE_CALL(cudaFree(Mh_ gxxx)); if(Mh_ gxyx) CUDA_SAFE_CALL(cudaFree(Mh_ gxyx)); if(Mh_ gxzx) CUDA_SAFE_CALL(cudaFree(Mh_ gxzx)); if(Mh_ gyyx) CUDA_SAFE_CALL(cudaFree(Mh_ gyyx)); if(Mh_ gyzx) CUDA_SAFE_CALL(cudaFree(Mh_ gyzx)); if(Mh_ gzzx) CUDA_SAFE_CALL(cudaFree(Mh_ gzzx)); if(Mh_ gxxy) CUDA_SAFE_CALL(cudaFree(Mh_ gxxy)); if(Mh_ gxyy) CUDA_SAFE_CALL(cudaFree(Mh_ gxyy)); if(Mh_ gxzy) CUDA_SAFE_CALL(cudaFree(Mh_ gxzy)); if(Mh_ gyyy) CUDA_SAFE_CALL(cudaFree(Mh_ gyyy)); if(Mh_ gyzy) CUDA_SAFE_CALL(cudaFree(Mh_ gyzy)); if(Mh_ gzzy) CUDA_SAFE_CALL(cudaFree(Mh_ gzzy)); if(Mh_ gxxz) CUDA_SAFE_CALL(cudaFree(Mh_ gxxz)); if(Mh_ gxyz) CUDA_SAFE_CALL(cudaFree(Mh_ gxyz)); if(Mh_ gxzz) CUDA_SAFE_CALL(cudaFree(Mh_ gxzz)); if(Mh_ gyyz) CUDA_SAFE_CALL(cudaFree(Mh_ gyyz)); if(Mh_ gyzz) CUDA_SAFE_CALL(cudaFree(Mh_ gyzz)); if(Mh_ gzzz) CUDA_SAFE_CALL(cudaFree(Mh_ gzzz)); if(Mh_ Lapx) CUDA_SAFE_CALL(cudaFree(Mh_ Lapx)); if(Mh_ Lapy) CUDA_SAFE_CALL(cudaFree(Mh_ Lapy)); if(Mh_ Lapz) CUDA_SAFE_CALL(cudaFree(Mh_ Lapz)); if(Mh_ betaxx) CUDA_SAFE_CALL(cudaFree(Mh_ betaxx)); if(Mh_ betaxy) CUDA_SAFE_CALL(cudaFree(Mh_ betaxy)); if(Mh_ betaxz) CUDA_SAFE_CALL(cudaFree(Mh_ betaxz)); if(Mh_ betayy) CUDA_SAFE_CALL(cudaFree(Mh_ betayy)); if(Mh_ betayz) CUDA_SAFE_CALL(cudaFree(Mh_ betayz)); if(Mh_ betazz) CUDA_SAFE_CALL(cudaFree(Mh_ betazz)); if(Mh_ betayx) CUDA_SAFE_CALL(cudaFree(Mh_ betayx)); if(Mh_ betazy) CUDA_SAFE_CALL(cudaFree(Mh_ betazy)); if(Mh_ betazx) CUDA_SAFE_CALL(cudaFree(Mh_ betazx)); if(Mh_ Kx) CUDA_SAFE_CALL(cudaFree(Mh_ Kx)); if(Mh_ Ky) CUDA_SAFE_CALL(cudaFree(Mh_ Ky)); if(Mh_ Kz) CUDA_SAFE_CALL(cudaFree(Mh_ Kz)); if(Mh_ Gamxx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxx)); if(Mh_ Gamxy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxy)); if(Mh_ Gamxz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxz)); if(Mh_ Gamyy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyy)); if(Mh_ Gamyz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyz)); if(Mh_ Gamzz) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzz)); if(Mh_ Gamyx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamyx)); if(Mh_ Gamzy) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzy)); if(Mh_ Gamzx) CUDA_SAFE_CALL(cudaFree(Mh_ Gamzx)); if(Mh_ div_beta) CUDA_SAFE_CALL(cudaFree(Mh_ div_beta)); if(Mh_ S) CUDA_SAFE_CALL(cudaFree(Mh_ S)); if(Mh_ f) CUDA_SAFE_CALL(cudaFree(Mh_ f)); if(Mh_ fxx) CUDA_SAFE_CALL(cudaFree(Mh_ fxx)); if(Mh_ fxy) CUDA_SAFE_CALL(cudaFree(Mh_ fxy)); if(Mh_ fxz) CUDA_SAFE_CALL(cudaFree(Mh_ fxz)); if(Mh_ fyy) CUDA_SAFE_CALL(cudaFree(Mh_ fyy)); if(Mh_ fyz) CUDA_SAFE_CALL(cudaFree(Mh_ fyz)); if(Mh_ fzz) CUDA_SAFE_CALL(cudaFree(Mh_ fzz)); if(Mh_ gupxx) CUDA_SAFE_CALL(cudaFree(Mh_ gupxx)); if(Mh_ gupxy) CUDA_SAFE_CALL(cudaFree(Mh_ gupxy)); if(Mh_ gupxz) CUDA_SAFE_CALL(cudaFree(Mh_ gupxz)); if(Mh_ gupyy) CUDA_SAFE_CALL(cudaFree(Mh_ gupyy)); if(Mh_ gupyz) CUDA_SAFE_CALL(cudaFree(Mh_ gupyz)); if(Mh_ gupzz) CUDA_SAFE_CALL(cudaFree(Mh_ gupzz)); if(Mh_ Gamxa) CUDA_SAFE_CALL(cudaFree(Mh_ Gamxa)); if(Mh_ Gamya) CUDA_SAFE_CALL(cudaFree(Mh_ Gamya)); if(Mh_ Gamza) CUDA_SAFE_CALL(cudaFree(Mh_ Gamza)); if(Mh_ alpn1) CUDA_SAFE_CALL(cudaFree(Mh_ alpn1)); if(Mh_ chin1) CUDA_SAFE_CALL(cudaFree(Mh_ chin1)); if(Mh_ fh) CUDA_SAFE_CALL(cudaFree(Mh_ fh)); if(Mh_ fh2) CUDA_SAFE_CALL(cudaFree(Mh_ fh2)); if(Mh_ gxx_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gxx_rhs)); if(Mh_ gyy_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gyy_rhs)); if(Mh_ gzz_rhs) CUDA_SAFE_CALL(cudaFree(Mh_ gzz_rhs)); */ if(Mh_ X) cudaFree(Mh_ X); if(Mh_ Y) cudaFree(Mh_ Y); if(Mh_ Z) cudaFree(Mh_ Z); if(Mh_ chi) cudaFree(Mh_ chi); if(Mh_ dxx) cudaFree(Mh_ dxx); if(Mh_ dyy) cudaFree(Mh_ dyy); if(Mh_ dzz) cudaFree(Mh_ dzz); if(Mh_ trK) cudaFree(Mh_ trK); if(Mh_ gxy) cudaFree(Mh_ gxy); if(Mh_ gxz) cudaFree(Mh_ gxz); if(Mh_ gyz) cudaFree(Mh_ gyz); if(Mh_ Axx) cudaFree(Mh_ Axx); if(Mh_ Axy) cudaFree(Mh_ Axy); if(Mh_ Axz) cudaFree(Mh_ Axz); if(Mh_ Ayz) cudaFree(Mh_ Ayz); if(Mh_ Ayy) cudaFree(Mh_ Ayy); if(Mh_ Azz) cudaFree(Mh_ Azz); if(Mh_ Gamx) cudaFree(Mh_ Gamx); if(Mh_ Gamy) cudaFree(Mh_ Gamy); if(Mh_ Gamz) cudaFree(Mh_ Gamz); if(Mh_ Lap) cudaFree(Mh_ Lap); if(Mh_ betax) cudaFree(Mh_ betax); if(Mh_ betay) cudaFree(Mh_ betay); if(Mh_ betaz) cudaFree(Mh_ betaz); if(Mh_ dtSfx) cudaFree(Mh_ dtSfx); if(Mh_ dtSfy) cudaFree(Mh_ dtSfy); if(Mh_ dtSfz) cudaFree(Mh_ dtSfz); if(Mh_ chi_rhs) cudaFree(Mh_ chi_rhs); if(Mh_ trK_rhs) cudaFree(Mh_ trK_rhs); if(Mh_ gxy_rhs) cudaFree(Mh_ gxy_rhs); if(Mh_ gxz_rhs) cudaFree(Mh_ gxz_rhs); if(Mh_ gyz_rhs) cudaFree(Mh_ gyz_rhs); if(Mh_ Axx_rhs) cudaFree(Mh_ Axx_rhs); if(Mh_ Axy_rhs) cudaFree(Mh_ Axy_rhs); if(Mh_ Axz_rhs) cudaFree(Mh_ Axz_rhs); if(Mh_ Ayz_rhs) cudaFree(Mh_ Ayz_rhs); if(Mh_ Ayy_rhs) cudaFree(Mh_ Ayy_rhs); if(Mh_ Azz_rhs) cudaFree(Mh_ Azz_rhs); if(Mh_ Gamx_rhs) cudaFree(Mh_ Gamx_rhs); if(Mh_ Gamy_rhs) cudaFree(Mh_ Gamy_rhs); if(Mh_ Gamz_rhs) cudaFree(Mh_ Gamz_rhs); if(Mh_ Lap_rhs) cudaFree(Mh_ Lap_rhs); if(Mh_ betax_rhs) cudaFree(Mh_ betax_rhs); if(Mh_ betay_rhs) cudaFree(Mh_ betay_rhs); if(Mh_ betaz_rhs) cudaFree(Mh_ betaz_rhs); if(Mh_ dtSfx_rhs) cudaFree(Mh_ dtSfx_rhs); if(Mh_ dtSfy_rhs) cudaFree(Mh_ dtSfy_rhs); if(Mh_ dtSfz_rhs) cudaFree(Mh_ dtSfz_rhs); if(Mh_ rho) cudaFree(Mh_ rho); if(Mh_ Sx) cudaFree(Mh_ Sx); if(Mh_ Sy) cudaFree(Mh_ Sy); if(Mh_ Sz) cudaFree(Mh_ Sz); if(Mh_ Sxx) cudaFree(Mh_ Sxx); if(Mh_ Sxy) cudaFree(Mh_ Sxy); if(Mh_ Sxz) cudaFree(Mh_ Sxz); if(Mh_ Syz) cudaFree(Mh_ Syz); if(Mh_ Syy) cudaFree(Mh_ Syy); if(Mh_ Szz) cudaFree(Mh_ Szz); if(Mh_ Gamxxx) cudaFree(Mh_ Gamxxx); if(Mh_ Gamxxy) cudaFree(Mh_ Gamxxy); if(Mh_ Gamxxz) cudaFree(Mh_ Gamxxz); if(Mh_ Gamxyy) cudaFree(Mh_ Gamxyy); if(Mh_ Gamxyz) cudaFree(Mh_ Gamxyz); if(Mh_ Gamxzz) cudaFree(Mh_ Gamxzz); if(Mh_ Gamyxx) cudaFree(Mh_ Gamyxx); if(Mh_ Gamyxy) cudaFree(Mh_ Gamyxy); if(Mh_ Gamyxz) cudaFree(Mh_ Gamyxz); if(Mh_ Gamyyy) cudaFree(Mh_ Gamyyy); if(Mh_ Gamyyz) cudaFree(Mh_ Gamyyz); if(Mh_ Gamyzz) cudaFree(Mh_ Gamyzz); if(Mh_ Gamzxx) cudaFree(Mh_ Gamzxx); if(Mh_ Gamzxy) cudaFree(Mh_ Gamzxy); if(Mh_ Gamzxz) cudaFree(Mh_ Gamzxz); if(Mh_ Gamzyz) cudaFree(Mh_ Gamzyz); if(Mh_ Gamzyy) cudaFree(Mh_ Gamzyy); if(Mh_ Gamzzz) cudaFree(Mh_ Gamzzz); if(Mh_ Rxx) cudaFree(Mh_ Rxx); if(Mh_ Rxy) cudaFree(Mh_ Rxy); if(Mh_ Rxz) cudaFree(Mh_ Rxz); if(Mh_ Ryy) cudaFree(Mh_ Ryy); if(Mh_ Ryz) cudaFree(Mh_ Ryz); if(Mh_ Rzz) cudaFree(Mh_ Rzz); if(Mh_ ham_Res) cudaFree(Mh_ ham_Res); if(Mh_ movx_Res) cudaFree(Mh_ movx_Res); if(Mh_ movy_Res) cudaFree(Mh_ movy_Res); if(Mh_ movz_Res) cudaFree(Mh_ movz_Res); if(Mh_ Gmx_Res) cudaFree(Mh_ Gmx_Res); if(Mh_ Gmy_Res) cudaFree(Mh_ Gmy_Res); if(Mh_ Gmz_Res) cudaFree(Mh_ Gmz_Res); if(Mh_ gxx) cudaFree(Mh_ gxx); if(Mh_ gyy) cudaFree(Mh_ gyy); if(Mh_ gzz) cudaFree(Mh_ gzz); if(Mh_ chix) cudaFree(Mh_ chix); if(Mh_ chiy) cudaFree(Mh_ chiy); if(Mh_ chiz) cudaFree(Mh_ chiz); if(Mh_ gxxx) cudaFree(Mh_ gxxx); if(Mh_ gxyx) cudaFree(Mh_ gxyx); if(Mh_ gxzx) cudaFree(Mh_ gxzx); if(Mh_ gyyx) cudaFree(Mh_ gyyx); if(Mh_ gyzx) cudaFree(Mh_ gyzx); if(Mh_ gzzx) cudaFree(Mh_ gzzx); if(Mh_ gxxy) cudaFree(Mh_ gxxy); if(Mh_ gxyy) cudaFree(Mh_ gxyy); if(Mh_ gxzy) cudaFree(Mh_ gxzy); if(Mh_ gyyy) cudaFree(Mh_ gyyy); if(Mh_ gyzy) cudaFree(Mh_ gyzy); if(Mh_ gzzy) cudaFree(Mh_ gzzy); if(Mh_ gxxz) cudaFree(Mh_ gxxz); if(Mh_ gxyz) cudaFree(Mh_ gxyz); if(Mh_ gxzz) cudaFree(Mh_ gxzz); if(Mh_ gyyz) cudaFree(Mh_ gyyz); if(Mh_ gyzz) cudaFree(Mh_ gyzz); if(Mh_ gzzz) cudaFree(Mh_ gzzz); if(Mh_ Lapx) cudaFree(Mh_ Lapx); if(Mh_ Lapy) cudaFree(Mh_ Lapy); if(Mh_ Lapz) cudaFree(Mh_ Lapz); if(Mh_ betaxx) cudaFree(Mh_ betaxx); if(Mh_ betaxy) cudaFree(Mh_ betaxy); if(Mh_ betaxz) cudaFree(Mh_ betaxz); if(Mh_ betayy) cudaFree(Mh_ betayy); if(Mh_ betayz) cudaFree(Mh_ betayz); if(Mh_ betazz) cudaFree(Mh_ betazz); if(Mh_ betayx) cudaFree(Mh_ betayx); if(Mh_ betazy) cudaFree(Mh_ betazy); if(Mh_ betazx) cudaFree(Mh_ betazx); if(Mh_ Kx) cudaFree(Mh_ Kx); if(Mh_ Ky) cudaFree(Mh_ Ky); if(Mh_ Kz) cudaFree(Mh_ Kz); if(Mh_ Gamxx) cudaFree(Mh_ Gamxx); if(Mh_ Gamxy) cudaFree(Mh_ Gamxy); if(Mh_ Gamxz) cudaFree(Mh_ Gamxz); if(Mh_ Gamyy) cudaFree(Mh_ Gamyy); if(Mh_ Gamyz) cudaFree(Mh_ Gamyz); if(Mh_ Gamzz) cudaFree(Mh_ Gamzz); if(Mh_ Gamyx) cudaFree(Mh_ Gamyx); if(Mh_ Gamzy) cudaFree(Mh_ Gamzy); if(Mh_ Gamzx) cudaFree(Mh_ Gamzx); if(Mh_ div_beta) cudaFree(Mh_ div_beta); if(Mh_ S) cudaFree(Mh_ S); if(Mh_ f) cudaFree(Mh_ f); if(Mh_ fxx) cudaFree(Mh_ fxx); if(Mh_ fxy) cudaFree(Mh_ fxy); if(Mh_ fxz) cudaFree(Mh_ fxz); if(Mh_ fyy) cudaFree(Mh_ fyy); if(Mh_ fyz) cudaFree(Mh_ fyz); if(Mh_ fzz) cudaFree(Mh_ fzz); if(Mh_ gupxx) cudaFree(Mh_ gupxx); if(Mh_ gupxy) cudaFree(Mh_ gupxy); if(Mh_ gupxz) cudaFree(Mh_ gupxz); if(Mh_ gupyy) cudaFree(Mh_ gupyy); if(Mh_ gupyz) cudaFree(Mh_ gupyz); if(Mh_ gupzz) cudaFree(Mh_ gupzz); if(Mh_ Gamxa) cudaFree(Mh_ Gamxa); if(Mh_ Gamya) cudaFree(Mh_ Gamya); if(Mh_ Gamza) cudaFree(Mh_ Gamza); if(Mh_ alpn1) cudaFree(Mh_ alpn1); if(Mh_ chin1) cudaFree(Mh_ chin1); if(Mh_ fh) cudaFree(Mh_ fh); if(Mh_ fh2) cudaFree(Mh_ fh2); if(Mh_ gxx_rhs) cudaFree(Mh_ gxx_rhs); if(Mh_ gyy_rhs) cudaFree(Mh_ gyy_rhs); if(Mh_ gzz_rhs) cudaFree(Mh_ gzz_rhs); #if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) // if(Mh_ reta) CUDA_SAFE_CALL(cudaFree(Mh_ reta)); if(Mh_ reta) cudaFree(Mh_ reta); #endif //if(Mh_ other_int) cudaFree(Mh_ other_int); //if(Mh_ other_double) cudaFree(Mh_ other_double); //cout<<"Address of meta:"<<&meta<(ex[0])}, {Mh_ Y, Y, static_cast(ex[1])}, {Mh_ Z, Z, static_cast(ex[2])}, }; if (!copy_buffers_to_device(coord_copies, sizeof(coord_copies) / sizeof(coord_copies[0]))) return 1; cache.last_x = X; cache.last_y = Y; cache.last_z = Z; } reset_buffer_map(cache); Meta saved_meta = *meta; auto bind_or_copy_input = [&](double *&slot, const double *host_ptr, size_t count) -> bool { const double *mapped = bssn_gpu_find_device_buffer(host_ptr); if (mapped) { slot = const_cast(mapped); return true; } return copy_buffer_to_device(slot, host_ptr, count); }; auto bind_or_keep_output = [&](double *&slot, const double *host_ptr) { const double *mapped = bssn_gpu_find_device_buffer(host_ptr); if (mapped) slot = const_cast(mapped); }; if (!(bind_or_copy_input(meta->chi, chi, static_cast(matrix_size)) && bind_or_copy_input(meta->dxx, dxx, static_cast(matrix_size)) && bind_or_copy_input(meta->dyy, dyy, static_cast(matrix_size)) && bind_or_copy_input(meta->dzz, dzz, static_cast(matrix_size)) && bind_or_copy_input(meta->trK, trK, static_cast(matrix_size)) && bind_or_copy_input(meta->gxy, gxy, static_cast(matrix_size)) && bind_or_copy_input(meta->gxz, gxz, static_cast(matrix_size)) && bind_or_copy_input(meta->gyz, gyz, static_cast(matrix_size)) && bind_or_copy_input(meta->Axx, Axx, static_cast(matrix_size)) && bind_or_copy_input(meta->Axy, Axy, static_cast(matrix_size)) && bind_or_copy_input(meta->Axz, Axz, static_cast(matrix_size)) && bind_or_copy_input(meta->Ayz, Ayz, static_cast(matrix_size)) && bind_or_copy_input(meta->Ayy, Ayy, static_cast(matrix_size)) && bind_or_copy_input(meta->Azz, Azz, static_cast(matrix_size)) && bind_or_copy_input(meta->Gamx, Gamx, static_cast(matrix_size)) && bind_or_copy_input(meta->Gamy, Gamy, static_cast(matrix_size)) && bind_or_copy_input(meta->Gamz, Gamz, static_cast(matrix_size)) && bind_or_copy_input(meta->betax, betax, static_cast(matrix_size)) && bind_or_copy_input(meta->betay, betay, static_cast(matrix_size)) && bind_or_copy_input(meta->betaz, betaz, static_cast(matrix_size)) && bind_or_copy_input(meta->Lap, Lap, static_cast(matrix_size)) && bind_or_copy_input(meta->dtSfx, dtSfx, static_cast(matrix_size)) && bind_or_copy_input(meta->dtSfy, dtSfy, static_cast(matrix_size)) && bind_or_copy_input(meta->dtSfz, dtSfz, static_cast(matrix_size)))) { *meta = saved_meta; return 1; } bind_or_keep_output(meta->chi_rhs, chi_rhs); bind_or_keep_output(meta->trK_rhs, trK_rhs); bind_or_keep_output(meta->gxx_rhs, gxx_rhs); bind_or_keep_output(meta->gxy_rhs, gxy_rhs); bind_or_keep_output(meta->gxz_rhs, gxz_rhs); bind_or_keep_output(meta->gyy_rhs, gyy_rhs); bind_or_keep_output(meta->gyz_rhs, gyz_rhs); bind_or_keep_output(meta->gzz_rhs, gzz_rhs); bind_or_keep_output(meta->Axx_rhs, Axx_rhs); bind_or_keep_output(meta->Axy_rhs, Axy_rhs); bind_or_keep_output(meta->Axz_rhs, Axz_rhs); bind_or_keep_output(meta->Ayy_rhs, Ayy_rhs); bind_or_keep_output(meta->Ayz_rhs, Ayz_rhs); bind_or_keep_output(meta->Azz_rhs, Azz_rhs); bind_or_keep_output(meta->Gamx_rhs, Gamx_rhs); bind_or_keep_output(meta->Gamy_rhs, Gamy_rhs); bind_or_keep_output(meta->Gamz_rhs, Gamz_rhs); bind_or_keep_output(meta->Lap_rhs, Lap_rhs); bind_or_keep_output(meta->betax_rhs, betax_rhs); bind_or_keep_output(meta->betay_rhs, betay_rhs); bind_or_keep_output(meta->betaz_rhs, betaz_rhs); bind_or_keep_output(meta->dtSfx_rhs, dtSfx_rhs); bind_or_keep_output(meta->dtSfy_rhs, dtSfy_rhs); bind_or_keep_output(meta->dtSfz_rhs, dtSfz_rhs); cudaMemcpyToSymbol(metac, meta, sizeof(Meta)); const ZeroSpec zero_specs[] = { {Mh_ rho, static_cast(matrix_size)}, {Mh_ Sxx, static_cast(matrix_size)}, {Mh_ Sxy, static_cast(matrix_size)}, {Mh_ Sxz, static_cast(matrix_size)}, {Mh_ Syz, static_cast(matrix_size)}, {Mh_ Syy, static_cast(matrix_size)}, {Mh_ Szz, static_cast(matrix_size)}, {Mh_ Sx, static_cast(matrix_size)}, {Mh_ Sy, static_cast(matrix_size)}, {Mh_ Sz, static_cast(matrix_size)}, }; if (!zero_buffers(zero_specs, sizeof(zero_specs) / sizeof(zero_specs[0]))) { *meta = saved_meta; return 1; } map_buffer(cache, chi, Mh_ chi); map_buffer(cache, trK, Mh_ trK); map_buffer(cache, dxx, Mh_ dxx); map_buffer(cache, gxy, Mh_ gxy); map_buffer(cache, gxz, Mh_ gxz); map_buffer(cache, dyy, Mh_ dyy); map_buffer(cache, gyz, Mh_ gyz); map_buffer(cache, dzz, Mh_ dzz); map_buffer(cache, Axx, Mh_ Axx); map_buffer(cache, Axy, Mh_ Axy); map_buffer(cache, Axz, Mh_ Axz); map_buffer(cache, Ayy, Mh_ Ayy); map_buffer(cache, Ayz, Mh_ Ayz); map_buffer(cache, Azz, Mh_ Azz); map_buffer(cache, Gamx, Mh_ Gamx); map_buffer(cache, Gamy, Mh_ Gamy); map_buffer(cache, Gamz, Mh_ Gamz); map_buffer(cache, Lap, Mh_ Lap); map_buffer(cache, betax, Mh_ betax); map_buffer(cache, betay, Mh_ betay); map_buffer(cache, betaz, Mh_ betaz); map_buffer(cache, dtSfx, Mh_ dtSfx); map_buffer(cache, dtSfy, Mh_ dtSfy); map_buffer(cache, dtSfz, Mh_ dtSfz); map_buffer(cache, chi_rhs, Mh_ chi_rhs); map_buffer(cache, trK_rhs, Mh_ trK_rhs); map_buffer(cache, gxx_rhs, Mh_ gxx_rhs); map_buffer(cache, gxy_rhs, Mh_ gxy_rhs); map_buffer(cache, gxz_rhs, Mh_ gxz_rhs); map_buffer(cache, gyy_rhs, Mh_ gyy_rhs); map_buffer(cache, gyz_rhs, Mh_ gyz_rhs); map_buffer(cache, gzz_rhs, Mh_ gzz_rhs); map_buffer(cache, Axx_rhs, Mh_ Axx_rhs); map_buffer(cache, Axy_rhs, Mh_ Axy_rhs); map_buffer(cache, Axz_rhs, Mh_ Axz_rhs); map_buffer(cache, Ayy_rhs, Mh_ Ayy_rhs); map_buffer(cache, Ayz_rhs, Mh_ Ayz_rhs); map_buffer(cache, Azz_rhs, Mh_ Azz_rhs); map_buffer(cache, Gamx_rhs, Mh_ Gamx_rhs); map_buffer(cache, Gamy_rhs, Mh_ Gamy_rhs); map_buffer(cache, Gamz_rhs, Mh_ Gamz_rhs); map_buffer(cache, Lap_rhs, Mh_ Lap_rhs); map_buffer(cache, betax_rhs, Mh_ betax_rhs); map_buffer(cache, betay_rhs, Mh_ betay_rhs); map_buffer(cache, betaz_rhs, Mh_ betaz_rhs); map_buffer(cache, dtSfx_rhs, Mh_ dtSfx_rhs); map_buffer(cache, dtSfy_rhs, Mh_ dtSfy_rhs); map_buffer(cache, dtSfz_rhs, Mh_ dtSfz_rhs); double sss[3] = {1,1,1}; double aas[3] = {-1,-1,1}; double asa[3] = {-1,1,-1}; double saa[3] = {1,-1,-1}; double ass[3] = {-1,1,1}; double sas[3] = {1,-1,1}; double ssa[3] = {1,1,-1}; //3 --------------------Init constant memory--------------------- #if (GAUGE == 6 || GAUGE == 7) int BHN_h; double Porg_h[9]; double Mass_h[3]; //call getpbh(BHN,Porg,Mass) #ifdef fortran1 getpbh #endif #ifdef fortran2 GETPBH #endif #ifdef fortran3 getpbh_ #endif (BHN_h,double Porg_h,double Mass_h); cudaMemcpyToSymbol(BHN,&BHN_h, sizeof(int)); cudaMemcpyToSymbol(Porg,&Porg_h,9 * sizeof(double)); cudaMemcpyToSymbol(Mass,&Mass_h,3 * sizeof(double)); double tmp_con = Mass[0] + Mass[1]; //t = M cudaMemcpyToSymbol(M, &tmp_con, sizeof(double)); tmp_con = 2 / tmp_con; //t = A cudaMemcpyToSymbol(A, &tmp_con, sizeof(double)); double tmp_con2 = 1/Mass[0] - tmp_con; cudaMemcpyToSymbol(C1, &tmp_con2, sizeof(double)); double tmp_con2 = 1/Mass[1] - tmp_con; cudaMemcpyToSymbol(C2, &tmp_con2, sizeof(double)); #endif//if (GAUGE == 6 || GAUGE == 7) //3.1-----for compute_rhs_bssn--------- //cout<<"Size of Meta:"< 1 && abs[0] < dXh) {ijkmin_h[0] = -2; ijkmin2_h[0] = -3;} if(Symmetry > 1 && abs[1] < dYh) {ijkmin_h[1] = -2; ijkmin2_h[1] = -3;} if(Symmetry > 0 && abs[2] < dZh) {ijkmin_h[2] = -2; ijkmin2_h[2] = -3;} if(Symmetry > 2 && abs[0] < dXh) {ijkmin3_h[0] = -3;} if(Symmetry > 2 && abs[1] < dYh) {ijkmin3_h[1] = -3;} if(Symmetry > 0 && abs[2] < dZh) {ijkmin3_h[2] = -3;} cudaMemcpyToSymbol(ijk_max,ijkmax_h,3*sizeof(int)); cudaMemcpyToSymbol(ijk_min,ijkmin_h,3*sizeof(int)); cudaMemcpyToSymbol(ijk_min2,ijkmin2_h,3*sizeof(int)); cudaMemcpyToSymbol(ijk_min3,ijkmin3_h,3*sizeof(int)); double d12dxyz_h[3] = {1.0,1.0,1.0}; double d2dxyz_h[3] = {1.0,1.0,1.0}; d12dxyz_h[0] /= 12; d12dxyz_h[1] /= 12; d12dxyz_h[2] /= 12; d12dxyz_h[0] /= dXh; d12dxyz_h[1] /= dYh; d12dxyz_h[2] /= dZh; d2dxyz_h[0] /= 2; d2dxyz_h[1] /= 2; d2dxyz_h[2] /= 2; d2dxyz_h[0] /= dXh; d2dxyz_h[1] /= dYh; d2dxyz_h[2] /= dZh; cudaMemcpyToSymbol(d12dxyz,d12dxyz_h,3*sizeof(double)); cudaMemcpyToSymbol(d2dxyz,d2dxyz_h,3*sizeof(double)); //3.3--------for fdderivs------------ double Sdxdxh = 1.0 /( dXh * dXh ); double Sdydyh = 1.0 /( dYh * dYh ); double Sdzdzh = 1.0 /( dZh * dZh ); double Fdxdxh = 1.0 / 12.0 /( dXh * dXh ); double Fdydyh = 1.0 / 12.0 /( dYh * dYh ); double Fdzdzh = 1.0 / 12.0 /( dZh * dZh ); double Sdxdyh = 1.0/4.0 /( dXh * dYh ); double Sdxdzh = 1.0/4.0 /( dXh * dZh ); double Sdydzh = 1.0/4.0 /( dYh * dZh ); double Fdxdyh = 1.0/144.0 /( dXh * dYh ); double Fdxdzh = 1.0/144.0 /( dXh * dZh ); double Fdydzh = 1.0/144.0 /( dYh * dZh ); cudaMemcpyToSymbol(Sdxdx,&Sdxdxh,sizeof(double)); cudaMemcpyToSymbol(Sdydy,&Sdydyh,sizeof(double)); cudaMemcpyToSymbol(Sdzdz,&Sdzdzh,sizeof(double)); cudaMemcpyToSymbol(Sdxdy,&Sdxdyh,sizeof(double)); cudaMemcpyToSymbol(Sdxdz,&Sdxdzh,sizeof(double)); cudaMemcpyToSymbol(Sdydz,&Sdydzh,sizeof(double)); cudaMemcpyToSymbol(Fdxdx,&Fdxdxh,sizeof(double)); cudaMemcpyToSymbol(Fdydy,&Fdydyh,sizeof(double)); cudaMemcpyToSymbol(Fdzdz,&Fdzdzh,sizeof(double)); cudaMemcpyToSymbol(Fdxdy,&Fdxdyh,sizeof(double)); cudaMemcpyToSymbol(Fdxdz,&Fdxdzh,sizeof(double)); cudaMemcpyToSymbol(Fdydz,&Fdydzh,sizeof(double)); //3.4---------for lopsided--------------------------- #ifdef TIMING1 cudaThreadSynchronize(); gettimeofday(&tv2, NULL); cout<<"TIME USED"<>>(ctest_d); cudaMemcpy(ctest, ctest_d, sizeof(double), cudaMemcpyDeviceToHost); cout<<"My rank is: "<>>(); sub_fderivs(Mh_ betax,Mh_ fh,Mh_ betaxx,Mh_ betaxy,Mh_ betaxz,ass); sub_fderivs(Mh_ betay,Mh_ fh,Mh_ betayx,Mh_ betayy,Mh_ betayz,sas); sub_fderivs(Mh_ betaz,Mh_ fh,Mh_ betazx,Mh_ betazy,Mh_ betazz,ssa); sub_fderivs(Mh_ chi,Mh_ fh,Mh_ chix,Mh_ chiy,Mh_ chiz, sss); sub_fderivs(Mh_ Lap,Mh_ fh,Mh_ Lapx,Mh_ Lapy,Mh_ Lapz, sss); sub_fderivs(Mh_ trK,Mh_ fh,Mh_ Kx,Mh_ Ky,Mh_ Kz, sss); sub_fderivs(Mh_ dxx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz, sss); sub_fderivs(Mh_ dyy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz, sss); sub_fderivs(Mh_ dzz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz, sss); sub_fderivs(Mh_ gxy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz, aas); sub_fderivs(Mh_ gxz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz, asa); sub_fderivs(Mh_ gyz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz, saa); compute_rhs_bssn_part2<<>>(); sub_fdderivs(Mh_ betax,Mh_ fh,Mh_ gxxx,Mh_ gxyx,Mh_ gxzx,Mh_ gyyx,Mh_ gyzx,Mh_ gzzx,ass); sub_fdderivs(Mh_ betay,Mh_ fh,Mh_ gxxy,Mh_ gxyy,Mh_ gxzy,Mh_ gyyy,Mh_ gyzy,Mh_ gzzy,sas); sub_fdderivs(Mh_ betaz,Mh_ fh,Mh_ gxxz,Mh_ gxyz,Mh_ gxzz,Mh_ gyyz,Mh_ gyzz,Mh_ gzzz,ssa); sub_fderivs( Mh_ Gamx, Mh_ fh,Mh_ Gamxx, Mh_ Gamxy, Mh_ Gamxz,ass); sub_fderivs( Mh_ Gamy, Mh_ fh,Mh_ Gamyx, Mh_ Gamyy, Mh_ Gamyz,sas); sub_fderivs( Mh_ Gamz, Mh_ fh,Mh_ Gamzx, Mh_ Gamzy, Mh_ Gamzz,ssa); compute_rhs_bssn_part3<<>>(); computeRicci(Mh_ dxx,Mh_ Rxx,sss, meta); computeRicci(Mh_ dyy,Mh_ Ryy,sss, meta); computeRicci(Mh_ dzz,Mh_ Rzz,sss, meta); computeRicci(Mh_ gxy,Mh_ Rxy,aas, meta); computeRicci(Mh_ gxz,Mh_ Rxz,asa, meta); computeRicci(Mh_ gyz,Mh_ Ryz,saa, meta); compute_rhs_bssn_part4<<>>(); sub_fdderivs(Mh_ chi,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); compute_rhs_bssn_part5<<>>(); sub_fdderivs(Mh_ Lap,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); compute_rhs_bssn_part6<<>>(); #if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5) sub_fderivs(Mh_ chi,Mh_ fh, Mh_ dtSfx_rhs, Mh_ dtSfy_rhs, Mh_ dtSfz_rhs,sss); compute_rhs_bssn_part6_gauge<<>>(); #endif sub_lopsided(Mh_ gxx,Mh_ fh2,Mh_ gxx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ gxy,Mh_ fh2,Mh_ gxy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,aas); sub_lopsided(Mh_ gxz,Mh_ fh2,Mh_ gxz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,asa); sub_lopsided(Mh_ gyy,Mh_ fh2,Mh_ gyy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ gyz,Mh_ fh2,Mh_ gyz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,saa); sub_lopsided(Mh_ gzz,Mh_ fh2,Mh_ gzz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ Axx,Mh_ fh2,Mh_ Axx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ Axy,Mh_ fh2,Mh_ Axy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,aas); sub_lopsided(Mh_ Axz,Mh_ fh2,Mh_ Axz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,asa); sub_lopsided(Mh_ Ayy,Mh_ fh2,Mh_ Ayy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ Ayz,Mh_ fh2,Mh_ Ayz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,saa); sub_lopsided(Mh_ Azz,Mh_ fh2,Mh_ Azz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ chi,Mh_ fh2,Mh_ chi_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ trK,Mh_ fh2,Mh_ trK_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); sub_lopsided(Mh_ Gamx,Mh_ fh2,Mh_ Gamx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); sub_lopsided(Mh_ Gamy,Mh_ fh2,Mh_ Gamy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); sub_lopsided(Mh_ Gamz,Mh_ fh2,Mh_ Gamz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); sub_lopsided(Mh_ Lap,Mh_ fh2,Mh_ Lap_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sss); #if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) sub_lopsided(Mh_ betax,Mh_ fh2,Mh_ betax_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); sub_lopsided(Mh_ betay,Mh_ fh2,Mh_ betay_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); sub_lopsided(Mh_ betaz,Mh_ fh2,Mh_ betaz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); #endif #if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) sub_lopsided(Mh_ dtSfx,Mh_ fh2,Mh_ dtSfx_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ass); sub_lopsided(Mh_ dtSfy,Mh_ fh2,Mh_ dtSfy_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,sas); sub_lopsided(Mh_ dtSfz,Mh_ fh2,Mh_ dtSfz_rhs,Mh_ betax,Mh_ betay,Mh_ betaz,ssa); #endif if(eps > 0){ sub_kodis(Mh_ chi,Mh_ fh2, Mh_ chi_rhs,sss); sub_kodis(Mh_ trK,Mh_ fh2, Mh_ trK_rhs,sss); sub_kodis(Mh_ dxx,Mh_ fh2, Mh_ gxx_rhs,sss); sub_kodis(Mh_ gxy,Mh_ fh2, Mh_ gxy_rhs,aas); sub_kodis(Mh_ gxz,Mh_ fh2, Mh_ gxz_rhs,asa); sub_kodis(Mh_ dyy,Mh_ fh2, Mh_ gyy_rhs,sss); sub_kodis(Mh_ gyz,Mh_ fh2, Mh_ gyz_rhs,saa); sub_kodis(Mh_ dzz,Mh_ fh2, Mh_ gzz_rhs,sss); sub_kodis(Mh_ Axx,Mh_ fh2, Mh_ Axx_rhs,sss); sub_kodis(Mh_ Axy,Mh_ fh2, Mh_ Axy_rhs,aas); sub_kodis(Mh_ Axz,Mh_ fh2, Mh_ Axz_rhs,asa); sub_kodis(Mh_ Ayy,Mh_ fh2, Mh_ Ayy_rhs,sss); sub_kodis(Mh_ Ayz,Mh_ fh2, Mh_ Ayz_rhs,saa); sub_kodis(Mh_ Azz,Mh_ fh2, Mh_ Azz_rhs,sss); sub_kodis(Mh_ Gamx,Mh_ fh2, Mh_ Gamx_rhs,ass); sub_kodis(Mh_ Gamy,Mh_ fh2, Mh_ Gamy_rhs,sas); sub_kodis(Mh_ Gamz,Mh_ fh2, Mh_ Gamz_rhs,ssa); sub_kodis(Mh_ Lap,Mh_ fh2, Mh_ Lap_rhs,sss); sub_kodis(Mh_ betax,Mh_ fh2, Mh_ betax_rhs,ass); sub_kodis(Mh_ betay,Mh_ fh2, Mh_ betay_rhs,sas); sub_kodis(Mh_ betaz,Mh_ fh2, Mh_ betaz_rhs,ssa); #if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) sub_kodis(Mh_ dtSfx,Mh_ fh2, Mh_ dtSfx_rhs,ass); sub_kodis(Mh_ dtSfy,Mh_ fh2, Mh_ dtSfy_rhs,sas); sub_kodis(Mh_ dtSfz,Mh_ fh2, Mh_ dtSfz_rhs,ssa); #endif } if(effective_co == 0){ compute_rhs_bssn_part7<<>>(); sub_fderivs(Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss); sub_fderivs(Mh_ Axy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz,aas); sub_fderivs(Mh_ Axz,Mh_ fh,Mh_ gxzx,Mh_ gxzy,Mh_ gxzz,asa); sub_fderivs(Mh_ Ayy,Mh_ fh,Mh_ gyyx,Mh_ gyyy,Mh_ gyyz,sss); sub_fderivs(Mh_ Ayz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz,saa); sub_fderivs(Mh_ Azz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz,sss); compute_rhs_bssn_part8<<>>(); } #if (ABV == 1) cout<<"TODO: bssn_gpu.cu::2373 (ABV == 1)"<