Compare commits

..

14 Commits

226 changed files with 171038 additions and 186095 deletions

4
.gitignore vendored
View File

@@ -1,6 +1,6 @@
__pycache__ __pycache__
GW150914 GW150914
GW150914* GW150914-origin
docs docs
*.tmp *.tmp
.codex

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -16,9 +16,9 @@ import numpy
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 2 ## number of mpi processes used in the simulation MPI_processes = 64 ## number of mpi processes used in the simulation
GPU_Calculation = "yes" ## Use GPU or not GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0 CPU_Part = 1.0
GPU_Part = 0.0 GPU_Part = 0.0
@@ -158,7 +158,7 @@ Detector_Rmax = 160.0 ## farest dector distance
## Setting the apprent horizon ## Setting the apprent horizon
AHF_Find = "yes" ## whether to find the apparent horizon: choose "yes" or "no" AHF_Find = "no" ## whether to find the apparent horizon: choose "yes" or "no"
AHF_Find_Every = 24 AHF_Find_Every = 24
AHF_Dump_Time = 20.0 AHF_Dump_Time = 20.0

View File

@@ -58,36 +58,31 @@ File_directory = os.path.join(input_data.File_directory)
## If the specified output directory exists, ask the user whether to continue ## If the specified output directory exists, ask the user whether to continue
if os.path.exists(File_directory): if os.path.exists(File_directory):
auto_overwrite = str(getattr(input_data, "Auto_Overwrite_Output", "yes")).strip().lower() print( " Output dictionary has been existed !!! " )
if auto_overwrite in ("1", "yes", "y", "true", "on", "continue"): print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
print( " Output dictionary has been existed; Auto_Overwrite_Output=yes, continue the calculation. " ) print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
print( ) print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
else: print( )
print( " Output dictionary has been existed !!! " ) ## Prompt whether to overwrite the existing directory
print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " ) while True:
print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " ) try:
print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " ) inputvalue = input()
print( ) ## If the user agrees to overwrite, proceed and remove the existing directory
## Prompt whether to overwrite the existing directory if ( inputvalue == "continue" ):
while True: print( " Continue the calculation !!! " )
try: print( )
inputvalue = input() break
## If the user agrees to overwrite, proceed and remove the existing directory ## If the user chooses not to overwrite, exit and keep the existing directory
if ( inputvalue == "continue" ): elif ( inputvalue == "stop" ):
print( " Continue the calculation !!! " ) print( " Stop the calculation !!! " )
print( ) sys.exit()
break ## If the user input is invalid, prompt again
## If the user chooses not to overwrite, exit and keep the existing directory else:
elif ( inputvalue == "stop" ):
print( " Stop the calculation !!! " )
sys.exit()
## If the user input is invalid, prompt again
else:
print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " )
except ValueError:
print( " Please input your choice !!! " ) print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " ) print( " Input 'continue' or 'stop' in the terminal !!! " )
except ValueError:
print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " )
## Remove the existing output directory if present ## Remove the existing output directory if present
shutil.rmtree(File_directory, ignore_errors=True) shutil.rmtree(File_directory, ignore_errors=True)
@@ -131,11 +126,6 @@ setup.generate_AMSSNCKU_input()
#inputvalue = input() ## Wait for user input (press Enter) to proceed #inputvalue = input() ## Wait for user input (press Enter) to proceed
#print() #print()
setup.print_puncture_information()
##################################################################
## Generate AMSS-NCKU program input files based on the configured parameters ## Generate AMSS-NCKU program input files based on the configured parameters
print( ) print( )
@@ -263,7 +253,7 @@ print()
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE") ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE")
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABE_CUDA") ABE_file = os.path.join(AMSS_NCKU_source_copy, "ABEGPU")
if not os.path.exists( ABE_file ): if not os.path.exists( ABE_file ):
print( ) print( )
@@ -317,7 +307,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import generate_TwoPuncture_input import generate_TwoPuncture_input
generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input() generate_TwoPuncture_input.generate_AMSSNCKU_TwoPuncture_input(numerical_grid.puncture_data)
print( ) print( )
print( " The input parfile for the TwoPunctureABE executable has been generated. " ) print( " The input parfile for the TwoPunctureABE executable has been generated. " )
@@ -359,7 +349,7 @@ if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
import renew_puncture_parameter import renew_puncture_parameter
renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory) renew_puncture_parameter.append_AMSSNCKU_BSSN_input(File_directory, output_directory, numerical_grid.puncture_data)
## Generated AMSS-NCKU input filename ## Generated AMSS-NCKU input filename

View File

@@ -1,100 +0,0 @@
##################################################################
##
## AMSS-NCKU Plot-Only Restart Script
## Author: Xiaoqu / Claude
## 2026/05/12
##
## This script checks for existing output data from AMSS_NCKU_Program.py.
## If data exists, it skips all computation and goes directly to plotting,
## saving time when plotting was interrupted.
## If no data is found, it exits with a message.
##
##################################################################
## Guard against re-execution by multiprocessing child processes.
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
import os
import sys
import AMSS_NCKU_Input as input_data
##################################################################
## Construct paths from input configuration
File_directory = os.path.join(input_data.File_directory)
output_directory = os.path.join(File_directory, "AMSS_NCKU_output")
binary_results_directory = os.path.join(output_directory, input_data.Output_directory)
figure_directory = os.path.join(File_directory, "figure")
##################################################################
## Check whether the required output data files exist
required_files = [
os.path.join(binary_results_directory, "bssn_BH.dat"),
os.path.join(binary_results_directory, "bssn_ADMQs.dat"),
os.path.join(binary_results_directory, "bssn_psi4.dat"),
os.path.join(binary_results_directory, "bssn_constraint.dat"),
]
missing_files = [f for f in required_files if not os.path.exists(f)]
if missing_files:
print(" No existing AMSS_NCKU_Program.py output data found. ")
print(" The following required files are missing: ")
for f in missing_files:
print(f" {f}")
print()
print(" Please run AMSS_NCKU_Program.py first to generate the simulation data. ")
print(" Exiting. ")
sys.exit(1)
print(" Found existing AMSS_NCKU_Program.py output data. " )
print(" Skipping all computation and going directly to plotting. " )
print()
## Ensure the figure directory exists (it should, but be safe)
os.makedirs(figure_directory, exist_ok=True)
##################################################################
## Plot the AMSS-NCKU program results
import plot_xiaoqu
import plot_GW_strain_amplitude_xiaoqu
from parallel_plot_helper import run_plot_tasks_parallel
plot_tasks = []
## Plot black hole trajectory
plot_tasks.append((plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory)))
plot_tasks.append((plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory)))
## Plot black hole separation vs. time
plot_tasks.append((plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory)))
## Plot gravitational waveforms (psi4 and strain amplitude)
for i in range(input_data.Detector_Number):
plot_tasks.append((plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i)))
plot_tasks.append((plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i)))
## Plot ADM mass evolution
for i in range(input_data.Detector_Number):
plot_tasks.append((plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i)))
## Plot Hamiltonian constraint violation over time
for i in range(input_data.grid_level):
plot_tasks.append((plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i)))
run_plot_tasks_parallel(plot_tasks)
## Plot stored binary data (runs serially, not in the parallel pool)
plot_xiaoqu.generate_binary_data_plot(binary_results_directory, figure_directory)
print()
print(" Plotting completed successfully. ")
print()

View File

@@ -198,16 +198,16 @@ int main(int argc, char *argv[])
if (myrank == 0) if (myrank == 0)
{ {
string out_dir; string out_dir;
string filename; char filename[50];
map<string, string>::iterator iter; map<string, string>::iterator iter;
iter = parameters::str_par.find("output dir"); iter = parameters::str_par.find("output dir");
if (iter != parameters::str_par.end()) if (iter != parameters::str_par.end())
{ {
out_dir = iter->second; out_dir = iter->second;
} }
filename = out_dir + "/setting.par"; sprintf(filename, "%s/setting.par", out_dir.c_str());
ofstream setfile; ofstream setfile;
setfile.open(filename.c_str(), ios::trunc); setfile.open(filename, ios::trunc);
if (!setfile.good()) if (!setfile.good())
{ {
@@ -484,11 +484,7 @@ int main(int argc, char *argv[])
cout << endl; cout << endl;
} }
// Let the process teardown reclaim the simulation object. Some derived delete ADM;
// equation classes keep MPI/CUDA-backed state whose destructor ordering
// is fragile at program shutdown.
if (getenv("AMSS_DELETE_ADM_ON_EXIT"))
delete ADM;
//=======================caculation done============================================================= //=======================caculation done=============================================================

View File

@@ -2,9 +2,7 @@
#ifdef newc #ifdef newc
#include <sstream> #include <sstream>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <map> #include <map>
#include <string>
using namespace std; using namespace std;
#else #else
#include <stdio.h> #include <stdio.h>
@@ -12,7 +10,6 @@ using namespace std;
#endif #endif
#include <time.h> #include <time.h>
#include <cstring>
#include "macrodef.h" #include "macrodef.h"
#include "misc.h" #include "misc.h"
@@ -22,9 +19,6 @@ using namespace std;
#include "bssnEM_class.h" #include "bssnEM_class.h"
#include "bssn_rhs.h" #include "bssn_rhs.h"
#include "empart.h" #include "empart.h"
#if USE_CUDA_BSSN
#include "bssn_rhs_cuda.h"
#endif
#include "initial_puncture.h" #include "initial_puncture.h"
#include "initial_maxwell.h" #include "initial_maxwell.h"
#include "enforce_algebra.h" #include "enforce_algebra.h"
@@ -42,387 +36,6 @@ using namespace std;
//================================================================================================ //================================================================================================
namespace
{
MyList<var> *advance_var_list(MyList<var> *vars, int count)
{
while (vars && count > 0)
{
vars = vars->next;
--count;
}
return vars;
}
bool bssn_em_step_timing_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_EM_STEP_TIMING");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_em_step_timing_all_levels_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_EM_STEP_TIMING_ALL_LEVELS");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
#if USE_CUDA_BSSN
bool bssn_em_zero_analysis_fastpath_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_EM_ZERO_ANALYSIS_FASTPATH");
enabled = (!env || atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_em_zero_resident_download_fastpath_enabled()
{
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_EM_ZERO_RESIDENT_DOWNLOAD_FASTPATH");
enabled = (!env || atoi(env) != 0) ? 1 : 0;
}
return enabled != 0;
}
bool bssn_em_resident_zero_fastpath_ready(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
#else
ShellPatch * /*shell*/,
#endif
int rank)
{
int local_ok = 1;
int local_seen = 0;
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (rank == cg->rank)
{
local_seen = 1;
if (!bssn_em_cuda_resident_zero_fast_state(cg))
local_ok = 0;
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (shell && shell->PatL)
{
MyList<ss_patch> *SP = shell->PatL;
while (SP)
{
MyList<Block> *BP = SP->data->blb;
while (BP)
{
Block *cg = BP->data;
if (rank == cg->rank)
{
local_seen = 1;
if (!bssn_em_cuda_resident_zero_fast_state(cg))
local_ok = 0;
}
if (BP == SP->data->ble)
break;
BP = BP->next;
}
SP = SP->next;
}
}
#endif
int global_ok = 0;
int global_seen = 0;
MPI_Allreduce(&local_ok, &global_ok, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(&local_seen, &global_seen, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
return global_seen && global_ok;
}
bool bssn_em_analysis_zero_fastpath_ready(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
#else
ShellPatch *shell,
#endif
int rank)
{
if (!bssn_em_zero_analysis_fastpath_enabled())
return false;
return bssn_em_resident_zero_fastpath_ready(PatL, shell, rank);
}
void zero_em_analysis_outputs(MyList<Patch> *PatL,
#ifdef WithShell
ShellPatch *shell,
#else
ShellPatch * /*shell*/,
#endif
int rank,
var *Rphi2_var, var *Iphi2_var,
var *Rphi1_var, var *Iphi1_var)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (rank == cg->rank)
{
const size_t all = (size_t)cg->shape[0] * cg->shape[1] * cg->shape[2];
std::memset(cg->fgfs[Rphi2_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Iphi2_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Rphi1_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Iphi1_var->sgfn], 0, all * sizeof(double));
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
#ifdef WithShell
if (shell && shell->PatL)
{
MyList<ss_patch> *SP = shell->PatL;
while (SP)
{
MyList<Block> *BP = SP->data->blb;
while (BP)
{
Block *cg = BP->data;
if (rank == cg->rank)
{
const size_t all = (size_t)cg->shape[0] * cg->shape[1] * cg->shape[2];
std::memset(cg->fgfs[Rphi2_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Iphi2_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Rphi1_var->sgfn], 0, all * sizeof(double));
std::memset(cg->fgfs[Iphi1_var->sgfn], 0, all * sizeof(double));
}
if (BP == SP->data->ble)
break;
BP = BP->next;
}
SP = SP->next;
}
}
#endif
}
#endif
int bssn_em_step_timing_every()
{
static int every = -1;
if (every < 0)
{
const char *env = getenv("AMSS_EM_STEP_TIMING_EVERY");
every = (env && atoi(env) > 0) ? atoi(env) : 1;
}
return every;
}
#if USE_CUDA_BSSN
bool fill_bssn_em_bssn_cuda_views(Block *cg, MyList<var> *vars,
double **host_views,
double *propspeeds = 0,
double *soa_flat = 0)
{
int idx = 0;
while (vars && idx < BSSN_CUDA_STATE_COUNT)
{
host_views[idx] = cg->fgfs[vars->data->sgfn];
if (propspeeds)
propspeeds[idx] = vars->data->propspeed;
if (soa_flat)
{
soa_flat[3 * idx + 0] = vars->data->SoA[0];
soa_flat[3 * idx + 1] = vars->data->SoA[1];
soa_flat[3 * idx + 2] = vars->data->SoA[2];
}
vars = vars->next;
++idx;
}
return idx == BSSN_CUDA_STATE_COUNT;
}
bool fill_bssn_em_cuda_views(Block *cg, MyList<var> *vars,
double **host_views,
double *propspeeds = 0,
double *soa_flat = 0)
{
int idx = 0;
while (vars && idx < BSSN_EM_CUDA_STATE_COUNT)
{
host_views[idx] = cg->fgfs[vars->data->sgfn];
if (propspeeds)
propspeeds[idx] = vars->data->propspeed;
if (soa_flat)
{
soa_flat[3 * idx + 0] = vars->data->SoA[0];
soa_flat[3 * idx + 1] = vars->data->SoA[1];
soa_flat[3 * idx + 2] = vars->data->SoA[2];
}
vars = vars->next;
++idx;
}
return idx == BSSN_EM_CUDA_STATE_COUNT && vars == 0;
}
void fill_bssn_em_fixed_source_cuda_views(Block *cg, double **sources,
var *Jx, var *Jy, var *Jz, var *qchar)
{
sources[0] = cg->fgfs[Jx->sgfn];
sources[1] = cg->fgfs[Jy->sgfn];
sources[2] = cg->fgfs[Jz->sgfn];
sources[3] = cg->fgfs[qchar->sgfn];
}
void fill_bssn_em_matter_cuda_views(Block *cg, double **matter,
var *rho, var *Sx, var *Sy, var *Sz,
var *Sxx, var *Sxy, var *Sxz,
var *Syy, var *Syz, var *Szz)
{
matter[0] = cg->fgfs[rho->sgfn];
matter[1] = cg->fgfs[Sx->sgfn];
matter[2] = cg->fgfs[Sy->sgfn];
matter[3] = cg->fgfs[Sz->sgfn];
matter[4] = cg->fgfs[Sxx->sgfn];
matter[5] = cg->fgfs[Sxy->sgfn];
matter[6] = cg->fgfs[Sxz->sgfn];
matter[7] = cg->fgfs[Syy->sgfn];
matter[8] = cg->fgfs[Syz->sgfn];
matter[9] = cg->fgfs[Szz->sgfn];
}
bool bssn_em_cuda_use_resident_sync(int lev)
{
#ifdef WithShell
(void)lev;
return false;
#else
return true;
#endif
}
bool bssn_em_cuda_keep_resident_after_step(int lev, int trfls_in, int analysis_lev)
{
static int keep_all_levels = -1;
if (keep_all_levels < 0)
{
const char *env = getenv("AMSS_CUDA_EM_KEEP_ALL_LEVELS");
keep_all_levels = (env && atoi(env) != 0) ? 1 : 0;
}
static int enabled = -1;
if (enabled < 0)
{
const char *env = getenv("AMSS_CUDA_EM_KEEP_RESIDENT_AFTER_STEP");
if (!env)
env = getenv("AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP");
enabled = (env && atoi(env) != 0) ? 1 : 0;
}
if (!enabled)
return false;
if (lev == analysis_lev)
return false;
if (keep_all_levels)
return true;
return lev < trfls_in;
}
void bssn_em_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars,
int myrank, bool release_ctx)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_out[BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_em_cuda_views(cg, vars, state_out))
{
cout << "CUDA BSSN-EM resident state list mismatch during download" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (bssn_cuda_download_resident_state_count_if_present(cg, cg->shape,
state_out,
BSSN_EM_CUDA_STATE_COUNT))
{
cout << "CUDA BSSN-EM resident state download failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (release_ctx)
bssn_cuda_release_step_ctx(cg);
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
void bssn_em_cuda_keep_only_level_state(MyList<Patch> *PatL, MyList<var> *vars,
int myrank)
{
MyList<Patch> *Pp = PatL;
while (Pp)
{
MyList<Block> *BP = Pp->data->blb;
while (BP)
{
Block *cg = BP->data;
if (myrank == cg->rank && bssn_cuda_has_resident_state(cg))
{
double *state_key[BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_em_cuda_views(cg, vars, state_key))
{
cout << "CUDA BSSN-EM resident state list mismatch during prune" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
if (bssn_cuda_keep_only_resident_state_count(cg, cg->shape,
state_key,
BSSN_EM_CUDA_STATE_COUNT))
{
cout << "CUDA BSSN-EM keep-only resident state failed" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
if (BP == Pp->data->ble)
break;
BP = BP->next;
}
Pp = Pp->next;
}
}
#endif
}
// Define bssnEM_class // Define bssnEM_class
// It inherits some members and methods from the parent class bssn_class and modifies others. // It inherits some members and methods from the parent class bssn_class and modifies others.
@@ -645,13 +258,6 @@ void bssnEM_class::Initialize()
PhysTime = StartTime; PhysTime = StartTime;
Setup_Black_Hole_position(); Setup_Black_Hole_position();
} }
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
} }
//================================================================================================ //================================================================================================
@@ -1227,25 +833,9 @@ void bssnEM_class::Step(int lev, int YN)
int iter_count = 0; // count RK4 substeps int iter_count = 0; // count RK4 substeps
int pre = 0, cor = 1; int pre = 0, cor = 1;
int ERROR = 0; int ERROR = 0;
#if USE_CUDA_BSSN
const bool use_cuda_resident_sync = bssn_em_cuda_use_resident_sync(lev);
#endif
const bool em_step_timing = bssn_em_step_timing_enabled();
const double em_step_t0 = em_step_timing ? MPI_Wtime() : 0.0;
double em_t0 = 0.0;
double em_t_predictor = 0.0;
double em_t_predictor_sync = 0.0;
double em_t_corrector = 0.0;
double em_t_corrector_sync = 0.0;
double em_t_analysis = 0.0;
double em_t_bh = 0.0;
double em_t_swap = 0.0;
double em_t_resident = 0.0;
double em_t_rp = 0.0;
MyList<ss_patch> *sPp; MyList<ss_patch> *sPp;
// Predictor // Predictor
em_t0 = em_step_timing ? MPI_Wtime() : 0.0;
MyList<Patch> *Pp = GH->PatL[lev]; MyList<Patch> *Pp = GH->PatL[lev];
while (Pp) while (Pp)
{ {
@@ -1255,20 +845,15 @@ void bssnEM_class::Step(int lev, int YN)
Block *cg = BP->data; Block *cg = BP->data;
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
#if !USE_CUDA_BSSN
#if (AGM == 0) #if (AGM == 0)
f_enforce_ga(cg->shape, f_enforce_ga(cg->shape,
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn], cg->fgfs[Axx0->sgfn], cg->fgfs[Axy0->sgfn], cg->fgfs[Axz0->sgfn],
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
#endif #endif
int em_rhs_error = 0; if (
bool used_gpu_substep = false;
#if !USE_CUDA_BSSN
em_rhs_error =
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2], f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[phi0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
@@ -1288,52 +873,8 @@ void bssnEM_class::Step(int lev, int YN)
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
Symmetry, lev, ndeps); Symmetry, lev, ndeps) ||
#endif f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
#if USE_CUDA_BSSN
if (!em_rhs_error)
{
double *state_in[BSSN_EM_CUDA_STATE_COUNT];
double *state_out[BSSN_EM_CUDA_STATE_COUNT];
double *sources[BSSN_EM_CUDA_SOURCE_COUNT];
double propspeed[BSSN_EM_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_em_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
!fill_bssn_em_cuda_views(cg, SynchList_pre, state_out))
{
cout << "CUDA BSSN-EM state list mismatch on predictor step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
fill_bssn_em_fixed_source_cuda_views(cg, sources, Jx, Jy, Jz, qchar);
int apply_bam_bc = 0;
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
#endif
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
if (bssn_em_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, sources,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, pre,
keep_resident_state, apply_enforce_ga, chitiny))
{
ERROR = 1;
}
used_gpu_substep = true;
}
#endif
if (em_rhs_error ||
(!used_gpu_substep &&
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -1366,7 +907,7 @@ void bssnEM_class::Step(int lev, int YN)
cg->fgfs[Cons_Ham->sgfn], cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, pre))) Symmetry, lev, ndeps, pre))
{ {
cout << "find NaN in domain: (" cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[0] << ":" << cg->bbox[3] << ","
@@ -1375,8 +916,6 @@ void bssnEM_class::Step(int lev, int YN)
ERROR = 1; ERROR = 1;
} }
if (!used_gpu_substep)
{
// rk4 substep and boundary // rk4 substep and boundary
{ {
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
@@ -1418,7 +957,6 @@ void bssnEM_class::Step(int lev, int YN)
} }
} }
f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny); f_lowerboundset(cg->shape, cg->fgfs[phi->sgfn], chitiny);
}
} }
if (BP == Pp->data->ble) if (BP == Pp->data->ble)
break; break;
@@ -1426,8 +964,6 @@ void bssnEM_class::Step(int lev, int YN)
} }
Pp = Pp->next; Pp = Pp->next;
} }
if (em_step_timing)
em_t_predictor += MPI_Wtime() - em_t0;
// check error information // check error information
{ {
int erh = ERROR; int erh = ERROR;
@@ -1685,11 +1221,7 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
if (em_step_timing) Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
em_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
if (em_step_timing)
em_t_predictor_sync += MPI_Wtime() - em_t0;
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -1712,8 +1244,6 @@ void bssnEM_class::Step(int lev, int YN)
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {
if (em_step_timing)
em_t0 = MPI_Wtime();
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev); compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++) for (int ithBH = 0; ithBH < BH_num; ithBH++)
{ {
@@ -1742,24 +1272,16 @@ void bssnEM_class::Step(int lev, int YN)
DG_List->clearList(); DG_List->clearList();
} }
} }
if (em_step_timing)
em_t_bh += MPI_Wtime() - em_t0;
} }
// data analysis part // data analysis part
// Warning NOTE: the variables1 are used as temp storege room // Warning NOTE: the variables1 are used as temp storege room
if (lev == a_lev) if (lev == a_lev)
{ {
if (em_step_timing)
em_t0 = MPI_Wtime();
AnalysisStuff_EM(lev, dT_lev); AnalysisStuff_EM(lev, dT_lev);
if (em_step_timing)
em_t_analysis += MPI_Wtime() - em_t0;
} }
// corrector // corrector
for (iter_count = 1; iter_count < 4; iter_count++) for (iter_count = 1; iter_count < 4; iter_count++)
{ {
if (em_step_timing)
em_t0 = MPI_Wtime();
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt; // for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
if (iter_count == 1 || iter_count == 3) if (iter_count == 1 || iter_count == 3)
TRK4 += dT_lev / 2; TRK4 += dT_lev / 2;
@@ -1772,7 +1294,6 @@ void bssnEM_class::Step(int lev, int YN)
Block *cg = BP->data; Block *cg = BP->data;
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
#if !USE_CUDA_BSSN
#if (AGM == 0) #if (AGM == 0)
f_enforce_ga(cg->shape, f_enforce_ga(cg->shape,
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
@@ -1786,13 +1307,9 @@ void bssnEM_class::Step(int lev, int YN)
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn], cg->fgfs[Axx->sgfn], cg->fgfs[Axy->sgfn], cg->fgfs[Axz->sgfn],
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
#endif #endif
int em_rhs_error = 0; if (
bool used_gpu_substep = false;
#if !USE_CUDA_BSSN
em_rhs_error =
f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2], f_compute_rhs_empart(cg->shape, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[phi->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
@@ -1812,55 +1329,8 @@ void bssnEM_class::Step(int lev, int YN)
cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn], cg->fgfs[Sx->sgfn], cg->fgfs[Sy->sgfn], cg->fgfs[Sz->sgfn],
cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn], cg->fgfs[Sxx->sgfn], cg->fgfs[Sxy->sgfn], cg->fgfs[Sxz->sgfn],
cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn], cg->fgfs[Syy->sgfn], cg->fgfs[Syz->sgfn], cg->fgfs[Szz->sgfn],
Symmetry, lev, ndeps); Symmetry, lev, ndeps) ||
#endif f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
#if USE_CUDA_BSSN
if (!em_rhs_error)
{
double *state_in[BSSN_EM_CUDA_STATE_COUNT];
double *state_out[BSSN_EM_CUDA_STATE_COUNT];
double *sources[BSSN_EM_CUDA_SOURCE_COUNT];
double propspeed[BSSN_EM_CUDA_STATE_COUNT];
double soa_flat[3 * BSSN_EM_CUDA_STATE_COUNT];
if (!fill_bssn_em_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
!fill_bssn_em_cuda_views(cg, SynchList_cor, state_out))
{
cout << "CUDA BSSN-EM state list mismatch on corrector step" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
fill_bssn_em_fixed_source_cuda_views(cg, sources, Jx, Jy, Jz, qchar);
int apply_bam_bc = 0;
#if (SommerType == 0)
#ifndef WithShell
apply_bam_bc = (lev == 0) ? 1 : 0;
#endif
#endif
int apply_enforce_ga = 0;
#if (AGM == 0)
apply_enforce_ga = 1;
#elif (AGM == 1)
if (iter_count == 3)
apply_enforce_ga = 1;
#endif
int keep_resident_state = use_cuda_resident_sync ? 1 : 0;
if (bssn_em_cuda_rk4_substep(cg,
cg->shape, cg->X[0], cg->X[1], cg->X[2],
state_in, state_out, sources,
propspeed, soa_flat, Pp->data->bbox,
dT_lev, TRK4, iter_count, apply_bam_bc,
Symmetry, lev, ndeps, cor,
keep_resident_state, apply_enforce_ga, chitiny))
{
ERROR = 1;
}
used_gpu_substep = true;
}
#endif
if (em_rhs_error ||
(!used_gpu_substep &&
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -1892,7 +1362,7 @@ void bssnEM_class::Step(int lev, int YN)
cg->fgfs[Cons_Ham->sgfn], cg->fgfs[Cons_Ham->sgfn],
cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn], cg->fgfs[Cons_Px->sgfn], cg->fgfs[Cons_Py->sgfn], cg->fgfs[Cons_Pz->sgfn],
cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn], cg->fgfs[Cons_Gx->sgfn], cg->fgfs[Cons_Gy->sgfn], cg->fgfs[Cons_Gz->sgfn],
Symmetry, lev, ndeps, cor))) Symmetry, lev, ndeps, cor))
{ {
cout << "find NaN in domain: (" cout << "find NaN in domain: ("
<< cg->bbox[0] << ":" << cg->bbox[3] << "," << cg->bbox[0] << ":" << cg->bbox[3] << ","
@@ -1900,8 +1370,6 @@ void bssnEM_class::Step(int lev, int YN)
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1; ERROR = 1;
} }
if (!used_gpu_substep)
{
// rk4 substep and boundary // rk4 substep and boundary
{ {
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList; MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varl1 = SynchList_cor, *varlrhs = RHSList;
@@ -1944,7 +1412,6 @@ void bssnEM_class::Step(int lev, int YN)
} }
} }
f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny); f_lowerboundset(cg->shape, cg->fgfs[phi1->sgfn], chitiny);
}
} }
if (BP == Pp->data->ble) if (BP == Pp->data->ble)
break; break;
@@ -2216,13 +1683,7 @@ void bssnEM_class::Step(int lev, int YN)
} }
#endif #endif
if (em_step_timing) Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
em_t_corrector += MPI_Wtime() - em_t0;
if (em_step_timing)
em_t0 = MPI_Wtime();
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
if (em_step_timing)
em_t_corrector_sync += MPI_Wtime() - em_t0;
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -2244,8 +1705,6 @@ void bssnEM_class::Step(int lev, int YN)
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {
if (em_step_timing)
em_t0 = MPI_Wtime();
compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev); compute_Porg_rhs(Porg, Porg1, Sfx, Sfy, Sfz, lev);
for (int ithBH = 0; ithBH < BH_num; ithBH++) for (int ithBH = 0; ithBH < BH_num; ithBH++)
{ {
@@ -2274,14 +1733,10 @@ void bssnEM_class::Step(int lev, int YN)
DG_List->clearList(); DG_List->clearList();
} }
} }
if (em_step_timing)
em_t_bh += MPI_Wtime() - em_t0;
} }
// swap time level // swap time level
if (iter_count < 3) if (iter_count < 3)
{ {
if (em_step_timing)
em_t0 = MPI_Wtime();
Pp = GH->PatL[lev]; Pp = GH->PatL[lev];
while (Pp) while (Pp)
{ {
@@ -2325,32 +1780,12 @@ void bssnEM_class::Step(int lev, int YN)
Porg[ithBH][2] = Porg1[ithBH][2]; Porg[ithBH][2] = Porg1[ithBH][2];
} }
} }
if (em_step_timing)
em_t_swap += MPI_Wtime() - em_t0;
} }
} }
#if USE_CUDA_BSSN
if (use_cuda_resident_sync)
{
if (em_step_timing)
em_t0 = MPI_Wtime();
const bool needs_resident_download =
!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev);
if (needs_resident_download)
bssn_em_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
if (em_step_timing)
em_t_resident += MPI_Wtime() - em_t0;
}
#endif
#if (RPS == 0) #if (RPS == 0)
// mesh refinement boundary part // mesh refinement boundary part
if (em_step_timing)
em_t0 = MPI_Wtime();
RestrictProlong(lev, YN, BB); RestrictProlong(lev, YN, BB);
if (em_step_timing)
em_t_rp += MPI_Wtime() - em_t0;
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -2378,8 +1813,6 @@ void bssnEM_class::Step(int lev, int YN)
// //
// OldStateList old ----------- // OldStateList old -----------
// update // update
if (em_step_timing)
em_t0 = MPI_Wtime();
Pp = GH->PatL[lev]; Pp = GH->PatL[lev];
while (Pp) while (Pp)
{ {
@@ -2425,26 +1858,6 @@ void bssnEM_class::Step(int lev, int YN)
Porg0[ithBH][2] = Porg1[ithBH][2]; Porg0[ithBH][2] = Porg1[ithBH][2];
} }
} }
if (em_step_timing)
{
em_t_swap += MPI_Wtime() - em_t0;
static int em_step_report_count = 0;
const int em_timing_every = bssn_em_step_timing_every();
const bool report_all_levels = bssn_em_step_timing_all_levels_enabled();
if (lev == GH->levels - 1)
++em_step_report_count;
if ((report_all_levels || lev == GH->levels - 1) &&
(em_timing_every <= 1 || em_step_report_count % em_timing_every == 0))
{
fprintf(stderr,
"[AMSS-EM-STEP-TIMING] lev=%d wall=%.6f predictor=%.6f pre_sync=%.6f "
"analysis=%.6f corrector=%.6f cor_sync=%.6f bh=%.6f swap=%.6f resident=%.6f rp=%.6f\n",
lev, MPI_Wtime() - em_step_t0,
em_t_predictor, em_t_predictor_sync,
em_t_analysis, em_t_corrector, em_t_corrector_sync,
em_t_bh, em_t_swap, em_t_resident, em_t_rp);
}
}
} }
//================================================================================================ //================================================================================================
@@ -2623,59 +2036,6 @@ void bssnEM_class::AnalysisStuff_EM(int lev, double dT_lev)
if (LastAnas >= AnasTime) if (LastAnas >= AnasTime)
{ {
#if USE_CUDA_BSSN
const bool zero_em_analysis =
bssn_em_analysis_zero_fastpath_ready(GH->PatL[lev],
#ifdef WithShell
SH
#else
0
#endif
, myrank
);
#else
const bool zero_em_analysis = false;
#endif
if (zero_em_analysis)
{
#if USE_CUDA_BSSN
zero_em_analysis_outputs(GH->PatL[lev],
#ifdef WithShell
SH,
#else
0,
#endif
myrank,
Rphi2, Iphi2, Rphi1, Iphi1);
#endif
int NN = 0;
for (int pl = 1; pl < maxl + 1; pl++)
for (int pm = -pl; pm < pl + 1; pm++)
NN++;
double *RP = new double[NN];
double *IP = new double[NN];
std::memset(RP, 0, NN * sizeof(double));
std::memset(IP, 0, NN * sizeof(double));
for (int i = 0; i < decn; i++)
Phi2Monitor->writefile(PhysTime, NN, RP, IP);
delete[] RP;
delete[] IP;
NN = 0;
for (int pl = 0; pl < maxl + 1; pl++)
for (int pm = -pl; pm < pl + 1; pm++)
NN++;
RP = new double[NN];
IP = new double[NN];
std::memset(RP, 0, NN * sizeof(double));
std::memset(IP, 0, NN * sizeof(double));
for (int i = 0; i < decn; i++)
Phi1Monitor->writefile(PhysTime, NN, RP, IP);
delete[] RP;
delete[] IP;
}
else
{
Compute_Phi2(lev); Compute_Phi2(lev);
double *RP, *IP; double *RP, *IP;
int NN = 0; int NN = 0;
@@ -2764,7 +2124,6 @@ void bssnEM_class::AnalysisStuff_EM(int lev, double dT_lev)
} }
delete[] RP; delete[] RP;
delete[] IP; delete[] IP;
}
} }
AnalysisStuff(lev, dT_lev); // LastAnas need and only need control here AnalysisStuff(lev, dT_lev); // LastAnas need and only need control here
@@ -2945,12 +2304,11 @@ void bssnEM_class::Interp_Constraint()
} }
ofstream outfile; ofstream outfile;
char suffix[64]; char filename[50];
sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5)); sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off // 0.5 for round off
outfile.open(filename.c_str()); outfile.open(filename);
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl;
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {

View File

@@ -48,7 +48,6 @@ public:
double StartTime, TotalTime; double StartTime, TotalTime;
double AnasTime, DumpTime, d2DumpTime, CheckTime; double AnasTime, DumpTime, d2DumpTime, CheckTime;
double LastAnas, LastConsOut; double LastAnas, LastConsOut;
bool cuda_level0_constraint_cache_valid;
int *ConstraintRefreshLevels; int *ConstraintRefreshLevels;
double Courant; double Courant;
double numepss, numepsb, numepsh; double numepss, numepsb, numepsh;
@@ -144,7 +143,7 @@ public:
bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi, int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
int a_levi, int maxli, int decni, double maxrexi, double drexi); int a_levi, int maxli, int decni, double maxrexi, double drexi);
virtual ~bssn_class(); ~bssn_class();
void Evolve(int Steps); void Evolve(int Steps);
void RecursiveStep(int lev); void RecursiveStep(int lev);

View File

@@ -1098,12 +1098,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
betaz_rhs[i] = FF * dtSfz[i]; betaz_rhs[i] = FF * dtSfz[i];
reta[i] = reta[i] =
gupxx[i] * chix[i] * chix[i] gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
+ gupyy[i] * chiy[i] * chiy[i] + gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
+ gupzz[i] * chiz[i] * chiz[i] + gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i]
+ TWO * ( gupxy[i] * chix[i] * chiy[i] + TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i]
+ gupxz[i] * chix[i] * chiz[i] + gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
+ gupyz[i] * chiy[i] * chiz[i] ); + gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
#if (GAUGE == 2) #if (GAUGE == 2)
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 ); reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
@@ -1116,12 +1116,12 @@ int f_compute_rhs_bssn(int *ex, double &T,
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i]; dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
#elif (GAUGE == 4 || GAUGE == 5) #elif (GAUGE == 4 || GAUGE == 5)
reta[i] = reta[i] =
gupxx[i] * chix[i] * chix[i] gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
+ gupyy[i] * chiy[i] * chiy[i] + gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
+ gupzz[i] * chiz[i] * chiz[i] + gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i]
+ TWO * ( gupxy[i] * chix[i] * chiy[i] + TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i]
+ gupxz[i] * chix[i] * chiz[i] + gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
+ gupyz[i] * chiy[i] * chiz[i] ); + gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
#if (GAUGE == 4) #if (GAUGE == 4)
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 ); reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
#ifndef BSSN_GPU_H_ #ifndef BSSN_GPU_H_
#define BSSN_GPU_H_ #define BSSN_GPU_H_
#include "bssn_macro.h" #include "bssn_macro.h"
#include "macrodef.h" #include "macrodef.fh"
#define DEVICE_ID 0 #define DEVICE_ID 0
// #define DEVICE_ID_BY_MPI_RANK // #define DEVICE_ID_BY_MPI_RANK
@@ -25,32 +25,49 @@
/** main function */ /** main function */
int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T, int gpu_rhs(int calledby, int mpi_rank, int *ex, double &T,
double *X, double *Y, double *Z, double *X, double *Y, double *Z,
double *chi, double *trK, double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz, double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz, double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz, double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs, double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx,
double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co); int &Symmetry, int &Lev, double &eps, int &co);
int gpu_rhs_ss(RHS_SS_PARA); int gpu_rhs_ss(RHS_SS_PARA);
#define Z4C_SS_PARA int calledby, int mpi_rank, int *ex, double &T, double *crho, double *sigma, double *R, double *X, double *Y, double *Z, double *drhodx, double *drhody, double *drhodz, double *dsigmadx, double *dsigmady, double *dsigmadz, double *dRdx, double *dRdy, double *dRdz, double *drhodxx, double *drhodxy, double *drhodxz, double *drhodyy, double *drhodyz, double *drhodzz, double *dsigmadxx, double *dsigmadxy, double *dsigmadxz, double *dsigmadyy, double *dsigmadyz, double *dsigmadzz, double *dRdxx, double *dRdxy, double *dRdxz, double *dRdyy, double *dRdyz, double *dRdzz, double *chi, double *trK, double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz, double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz, double *Gamx, double *Gamy, double *Gamz, double *Lap, double *betax, double *betay, double *betaz, double *dtSfx, double *dtSfy, double *dtSfz, double *TZ, double *chi_rhs, double *trK_rhs, double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs, double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs, double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs, double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs, double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs, double *TZ_rhs, double *rho, double *Sx, double *Sy, double *Sz, double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz, double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz, double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz, double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz, double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz, double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res, double *Gmx_Res, double *Gmy_Res, double *Gmz_Res, int &Symmetry, int &Lev, double &eps, int &sst, int &co /** Init GPU side data in GPUMeta. */
// void init_fluid_meta_gpu(GPUMeta *gpu_meta);
int gpu_rhs_z4c_ss(Z4C_SS_PARA);
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,210 @@
#ifndef BSSN_GPU_CLASS_H
#define BSSN_GPU_CLASS_H
#ifdef newc
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdlib>
#include <string>
#include <cmath>
using namespace std;
#else
#include <iostream.h>
#include <iomanip.h>
#include <fstream.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#endif
#include <mpi.h>
#include "macrodef.h"
#include "cgh.h"
#include "ShellPatch.h"
#include "misc.h"
#include "var.h"
#include "MyList.h"
#include "monitor.h"
#include "surface_integral.h"
#include "checkpoint.h"
// added by yangquan
#include "bssn_macro.h"
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
class bssn_class
{
public:
// added by yangquan
//----------------------
int gpu_num_mynode;
int cpu_core_num_mynode;
int mpi_process_num_mynode;
int my_sequence_mynode;
int mynode_id;
int use_gpu;
virtual void Step_GPU(int lev, int YN);
virtual void Get_runtime_envirment();
// virtual void Step_OPENMP(int lev,int YN);
//----------------------
int ngfs;
int nprocs, myrank;
cgh *GH;
ShellPatch *SH;
double PhysTime;
int checkrun;
char checkfilename[50];
int Steps;
double StartTime, TotalTime;
double AnasTime, DumpTime, d2DumpTime, CheckTime;
double LastAnas, LastConsOut;
double Courant;
double numepss, numepsb, numepsh;
int Symmetry;
int maxl, decn;
double maxrex, drex;
int trfls, a_lev;
double dT;
double chitiny;
double **Porg0, **Porgbr, **Porg, **Porg1, **Porg_rhs;
int BH_num, BH_num_input;
double *Mass, *Pmom, *Spin;
double ADMMass;
var *phio, *trKo;
var *gxxo, *gxyo, *gxzo, *gyyo, *gyzo, *gzzo;
var *Axxo, *Axyo, *Axzo, *Ayyo, *Ayzo, *Azzo;
var *Gmxo, *Gmyo, *Gmzo;
var *Lapo, *Sfxo, *Sfyo, *Sfzo;
var *dtSfxo, *dtSfyo, *dtSfzo;
var *phi0, *trK0;
var *gxx0, *gxy0, *gxz0, *gyy0, *gyz0, *gzz0;
var *Axx0, *Axy0, *Axz0, *Ayy0, *Ayz0, *Azz0;
var *Gmx0, *Gmy0, *Gmz0;
var *Lap0, *Sfx0, *Sfy0, *Sfz0;
var *dtSfx0, *dtSfy0, *dtSfz0;
var *phi, *trK;
var *gxx, *gxy, *gxz, *gyy, *gyz, *gzz;
var *Axx, *Axy, *Axz, *Ayy, *Ayz, *Azz;
var *Gmx, *Gmy, *Gmz;
var *Lap, *Sfx, *Sfy, *Sfz;
var *dtSfx, *dtSfy, *dtSfz;
var *phi1, *trK1;
var *gxx1, *gxy1, *gxz1, *gyy1, *gyz1, *gzz1;
var *Axx1, *Axy1, *Axz1, *Ayy1, *Ayz1, *Azz1;
var *Gmx1, *Gmy1, *Gmz1;
var *Lap1, *Sfx1, *Sfy1, *Sfz1;
var *dtSfx1, *dtSfy1, *dtSfz1;
var *phi_rhs, *trK_rhs;
var *gxx_rhs, *gxy_rhs, *gxz_rhs, *gyy_rhs, *gyz_rhs, *gzz_rhs;
var *Axx_rhs, *Axy_rhs, *Axz_rhs, *Ayy_rhs, *Ayz_rhs, *Azz_rhs;
var *Gmx_rhs, *Gmy_rhs, *Gmz_rhs;
var *Lap_rhs, *Sfx_rhs, *Sfy_rhs, *Sfz_rhs;
var *dtSfx_rhs, *dtSfy_rhs, *dtSfz_rhs;
var *rho, *Sx, *Sy, *Sz, *Sxx, *Sxy, *Sxz, *Syy, *Syz, *Szz;
var *Gamxxx, *Gamxxy, *Gamxxz, *Gamxyy, *Gamxyz, *Gamxzz;
var *Gamyxx, *Gamyxy, *Gamyxz, *Gamyyy, *Gamyyz, *Gamyzz;
var *Gamzxx, *Gamzxy, *Gamzxz, *Gamzyy, *Gamzyz, *Gamzzz;
var *Rxx, *Rxy, *Rxz, *Ryy, *Ryz, *Rzz;
var *Rpsi4, *Ipsi4;
var *t1Rpsi4, *t1Ipsi4, *t2Rpsi4, *t2Ipsi4;
var *Cons_Ham, *Cons_Px, *Cons_Py, *Cons_Pz, *Cons_Gx, *Cons_Gy, *Cons_Gz;
#ifdef Point_Psi4
var *phix, *phiy, *phiz;
var *trKx, *trKy, *trKz;
var *Axxx, *Axxy, *Axxz;
var *Axyx, *Axyy, *Axyz;
var *Axzx, *Axzy, *Axzz;
var *Ayyx, *Ayyy, *Ayyz;
var *Ayzx, *Ayzy, *Ayzz;
var *Azzx, *Azzy, *Azzz;
#endif
// FIXME: uc = StateList, up = OldStateList, upp = SynchList_cor; so never touch these three data
MyList<var> *StateList, *SynchList_pre, *SynchList_cor, *RHSList;
MyList<var> *OldStateList, *DumpList;
MyList<var> *ConstraintList;
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
monitor *ConVMonitor;
surface_integral *Waveshell;
checkpoint *CheckPoint;
public:
bssn_class(double Couranti, double StartTimei, double TotalTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
int Symmetryi, int checkruni, char *checkfilenamei, double numepssi, double numepsbi, double numepshi,
int a_levi, int maxli, int decni, double maxrexi, double drexi);
~bssn_class();
void Evolve(int Steps);
void RecursiveStep(int lev);
#if (PSTR == 1)
void ParallelStep();
void SHStep();
#endif
void RestrictProlong(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
void RestrictProlong_aux(int lev, int YN, bool BB, MyList<var> *SL, MyList<var> *OL, MyList<var> *corL);
void RestrictProlong(int lev, int YN, bool BB);
void ProlongRestrict(int lev, int YN, bool BB);
void Setup_Black_Hole_position();
void compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int lev);
bool read_Pablo_file(int *ext, double *datain, char *filename);
void write_Pablo_file(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
char *filename);
void AnalysisStuff(int lev, double dT_lev);
void Setup_KerrSchild();
void Enforce_algcon(int lev, int fg);
void testRestrict();
void testOutBd();
virtual void Setup_Initial_Data_Lousto();
virtual void Setup_Initial_Data_Cao();
virtual void Initialize();
virtual void Read_Ansorg();
virtual void Read_Pablo() {};
virtual void Compute_Psi4(int lev);
virtual void Step(int lev, int YN);
virtual void Interp_Constraint(bool infg);
virtual void Constraint_Out();
virtual void Compute_Constraint();
#ifdef With_AHF
protected:
MyList<var> *AHList, *AHDList, *GaugeList;
int AHfindevery;
double AHdumptime;
int *lastahdumpid, HN_num; // number of possible horizons
int *findeveryl;
double *xc, *yc, *zc, *xr, *yr, *zr;
bool *trigger;
double *dTT;
int *dumpid;
public:
void AH_Prepare_derivatives();
bool AH_Interp_Points(MyList<var> *VarList,
int NN, double **XX,
double *Shellf, int Symmetryi);
void AH_Step_Find(int lev, double dT_lev);
#endif
};
#endif /* BSSN_GPU_CLASS_H */

View File

@@ -20,14 +20,12 @@ using namespace std;
__device__ volatile unsigned int global_count = 0; __device__ volatile unsigned int global_count = 0;
#ifdef RESULT_CHECK
void compare_result_gpu(int ftag1,double * datac,int data_num){ void compare_result_gpu(int ftag1,double * datac,int data_num){
double * data = (double*)malloc(sizeof(double)*data_num); double * data = (double*)malloc(sizeof(double)*data_num);
cudaMemcpy(data, datac, data_num * sizeof(double), cudaMemcpyDeviceToHost); cudaMemcpy(data, datac, data_num * sizeof(double), cudaMemcpyDeviceToHost);
compare_result(ftag1,data,data_num); compare_result(ftag1,data,data_num);
free(data); free(data);
} }
#endif
__global__ void sub_symmetry_bd_ss_partF(int ord, double * func, double *funcc) __global__ void sub_symmetry_bd_ss_partF(int ord, double * func, double *funcc)
{ {
@@ -155,11 +153,11 @@ __global__ void sub_symmetry_bd_ss_partJ(int ord,double * func, double * funcc,d
inline void sub_symmetry_bd_ss(int ord,double * func, double * funcc,double * SoA){ inline void sub_symmetry_bd_ss(int ord,double * func, double * funcc,double * SoA){
sub_symmetry_bd_ss_partF<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc); sub_symmetry_bd_ss_partF<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc);
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_symmetry_bd_ss_partI<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc,SoA[0]); sub_symmetry_bd_ss_partI<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc,SoA[0]);
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_symmetry_bd_ss_partJ<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc,SoA[1]); sub_symmetry_bd_ss_partJ<<<GRID_DIM,BLOCK_DIM>>>(ord,func,funcc,SoA[1]);
cudaDeviceSynchronize(); cudaThreadSynchronize();
} }
__global__ void sub_fderivs_shc_part1(double *fx,double *fy,double *fz){ __global__ void sub_fderivs_shc_part1(double *fx,double *fy,double *fz){
@@ -249,13 +247,13 @@ inline void sub_fderivs_shc(int& sst,double * f,double * fh,double *fx,double *f
//cudaMemset(Msh_ gy,0,h_3D_SIZE[0] * sizeof(double)); //cudaMemset(Msh_ gy,0,h_3D_SIZE[0] * sizeof(double));
//cudaMemset(Msh_ gz,0,h_3D_SIZE[0] * sizeof(double)); //cudaMemset(Msh_ gz,0,h_3D_SIZE[0] * sizeof(double));
sub_symmetry_bd_ss(2,f,fh,SoA1); sub_symmetry_bd_ss(2,f,fh,SoA1);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(0,fh,h_3D_SIZE[2]); //compare_result_gpu(0,fh,h_3D_SIZE[2]);
sub_fderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gx,Msh_ gy,Msh_ gz); sub_fderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gx,Msh_ gy,Msh_ gz);
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fderivs_shc_part1<<<GRID_DIM,BLOCK_DIM>>>(fx,fy,fz); sub_fderivs_shc_part1<<<GRID_DIM,BLOCK_DIM>>>(fx,fy,fz);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(1,fx,h_3D_SIZE[0]); //compare_result_gpu(1,fx,h_3D_SIZE[0]);
//compare_result_gpu(2,fy,h_3D_SIZE[0]); //compare_result_gpu(2,fy,h_3D_SIZE[0]);
//compare_result_gpu(3,fz,h_3D_SIZE[0]); //compare_result_gpu(3,fz,h_3D_SIZE[0]);
@@ -453,17 +451,17 @@ inline void sub_fdderivs_shc(int& sst,double * f,double * fh,
//fderivs_sh //fderivs_sh
sub_symmetry_bd_ss(2,f,fh,SoA1); sub_symmetry_bd_ss(2,f,fh,SoA1);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(1,fh,h_3D_SIZE[2]); //compare_result_gpu(1,fh,h_3D_SIZE[2]);
sub_fderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gx,Msh_ gy,Msh_ gz); sub_fderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gx,Msh_ gy,Msh_ gz);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//fdderivs_sh //fdderivs_sh
sub_symmetry_bd_ss(2,f,fh,SoA1); sub_symmetry_bd_ss(2,f,fh,SoA1);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(21,fh,h_3D_SIZE[2]); //compare_result_gpu(21,fh,h_3D_SIZE[2]);
sub_fdderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gxx,Msh_ gxy,Msh_ gxz,Msh_ gyy,Msh_ gyz,Msh_ gzz); sub_fdderivs_sh<<<GRID_DIM,BLOCK_DIM>>>(fh,Msh_ gxx,Msh_ gxy,Msh_ gxz,Msh_ gyy,Msh_ gyz,Msh_ gzz);
cudaDeviceSynchronize(); cudaThreadSynchronize();
/*compare_result_gpu(11,Msh_ gx,h_3D_SIZE[0]); /*compare_result_gpu(11,Msh_ gx,h_3D_SIZE[0]);
compare_result_gpu(12,Msh_ gy,h_3D_SIZE[0]); compare_result_gpu(12,Msh_ gy,h_3D_SIZE[0]);
compare_result_gpu(13,Msh_ gz,h_3D_SIZE[0]); compare_result_gpu(13,Msh_ gz,h_3D_SIZE[0]);
@@ -474,7 +472,7 @@ inline void sub_fdderivs_shc(int& sst,double * f,double * fh,
compare_result_gpu(5,Msh_ gyz,h_3D_SIZE[0]); compare_result_gpu(5,Msh_ gyz,h_3D_SIZE[0]);
compare_result_gpu(6,Msh_ gzz,h_3D_SIZE[0]);*/ compare_result_gpu(6,Msh_ gzz,h_3D_SIZE[0]);*/
sub_fdderivs_shc_part1<<<GRID_DIM,BLOCK_DIM>>>(fxx,fxy,fxz,fyy,fyz,fzz); sub_fdderivs_shc_part1<<<GRID_DIM,BLOCK_DIM>>>(fxx,fxy,fxz,fyy,fyz,fzz);
cudaDeviceSynchronize(); cudaThreadSynchronize();
/*compare_result_gpu(1,fxx,h_3D_SIZE[0]); /*compare_result_gpu(1,fxx,h_3D_SIZE[0]);
compare_result_gpu(2,fxy,h_3D_SIZE[0]); compare_result_gpu(2,fxy,h_3D_SIZE[0]);
compare_result_gpu(3,fxz,h_3D_SIZE[0]); compare_result_gpu(3,fxz,h_3D_SIZE[0]);
@@ -498,9 +496,9 @@ __global__ void computeRicci_ss_part1(double * dst)
inline void computeRicci_ss(int &sst,double * src,double* dst,double * SoA, Meta* meta) inline void computeRicci_ss(int &sst,double * src,double* dst,double * SoA, Meta* meta)
{ {
sub_fdderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,SoA); sub_fdderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,SoA);
cudaDeviceSynchronize(); cudaThreadSynchronize();
computeRicci_ss_part1<<<GRID_DIM,BLOCK_DIM>>>(dst); computeRicci_ss_part1<<<GRID_DIM,BLOCK_DIM>>>(dst);
cudaDeviceSynchronize(); cudaThreadSynchronize();
} }
__global__ void sub_lopsided_ss_part1(double * dst) __global__ void sub_lopsided_ss_part1(double * dst)
@@ -518,9 +516,9 @@ __global__ void sub_lopsided_ss_part1(double * dst)
inline void sub_lopsided_ss(int& sst,double *src,double* dst,double *SoA) inline void sub_lopsided_ss(int& sst,double *src,double* dst,double *SoA)
{ {
sub_fderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,SoA); sub_fderivs_shc(sst,src,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,SoA);
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_lopsided_ss_part1<<<GRID_DIM,BLOCK_DIM>>>(dst); sub_lopsided_ss_part1<<<GRID_DIM,BLOCK_DIM>>>(dst);
cudaDeviceSynchronize(); cudaThreadSynchronize();
} }
__global__ void sub_kodis_sh_part1(double *f,double *fh,double *f_rhs) __global__ void sub_kodis_sh_part1(double *f,double *fh,double *f_rhs)
@@ -592,11 +590,11 @@ inline void sub_kodis_ss(int &sst,double *f,double *fh,double *f_rhs,double *SoA
} }
//compare_result_gpu(10,f,h_3D_SIZE[0]); //compare_result_gpu(10,f,h_3D_SIZE[0]);
sub_symmetry_bd_ss(3,f,fh,SoA1); sub_symmetry_bd_ss(3,f,fh,SoA1);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(0,fh,h_3D_SIZE[3]); //compare_result_gpu(0,fh,h_3D_SIZE[3]);
sub_kodis_sh_part1<<<GRID_DIM,BLOCK_DIM>>>(f,fh,f_rhs); sub_kodis_sh_part1<<<GRID_DIM,BLOCK_DIM>>>(f,fh,f_rhs);
cudaDeviceSynchronize(); cudaThreadSynchronize();
//compare_result_gpu(1,f_rhs,h_3D_SIZE[0]); //compare_result_gpu(1,f_rhs,h_3D_SIZE[0]);
} }
@@ -1701,7 +1699,7 @@ void destroy_meta(Meta *meta,Metass *metass)
if(Msh_ gzz) cudaFree(Msh_ gzz); if(Msh_ gzz) cudaFree(Msh_ gzz);
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) #if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
if(Mh_ reta) cudaFree(Mh_ reta); if(Mh_ reta) CUDA_SAFE_CALL(cudaFree(Mh_ reta));
#endif #endif
@@ -1897,7 +1895,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
//1.2 local Data //1.2 local Data
cudaMalloc((void**)&(Mh_ gxx), matrix_size * sizeof(double)); cudaMalloc((void**)&(Mh_ gxx), matrix_size * sizeof(double));
cudaMalloc((void**)&(Mh_ gyy), matrix_size * sizeof(double)); CUDA_SAFE_CALL( cudaMalloc((void**)&(Mh_ gyy), matrix_size * sizeof(double)));
cudaMalloc((void**)&(Mh_ gzz), matrix_size * sizeof(double)); cudaMalloc((void**)&(Mh_ gzz), matrix_size * sizeof(double));
cudaMalloc((void**)&(Mh_ chix), matrix_size * sizeof(double)); cudaMalloc((void**)&(Mh_ chix), matrix_size * sizeof(double));
cudaMalloc((void**)&(Mh_ chiy), matrix_size * sizeof(double)); cudaMalloc((void**)&(Mh_ chiy), matrix_size * sizeof(double));
@@ -2162,7 +2160,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
double tmp_con2 = 1/Mass[0] - tmp_con; double tmp_con2 = 1/Mass[0] - tmp_con;
cudaMemcpyToSymbol(C1, &tmp_con2, sizeof(double)); cudaMemcpyToSymbol(C1, &tmp_con2, sizeof(double));
tmp_con2 = 1/Mass[1] - tmp_con; double tmp_con2 = 1/Mass[1] - tmp_con;
cudaMemcpyToSymbol(C2, &tmp_con2, sizeof(double)); cudaMemcpyToSymbol(C2, &tmp_con2, sizeof(double));
@@ -2235,7 +2233,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
if((sst == 2 || sst == 4) && abs[1] < dYh) if((sst == 2 || sst == 4) && abs[1] < dYh)
{ {
ijkmin_h[1] = -2; ijkmin_h[1] = -2;
ijkmin3_h[1] = -3; ijkmin_h[1] = -3;
} }
if((sst == 3 || sst == 5) && abs_Y_ex2 < dYh) if((sst == 3 || sst == 5) && abs_Y_ex2 < dYh)
{ {
@@ -2289,13 +2287,13 @@ int gpu_rhs_ss(RHS_SS_PARA)
#ifdef TIMING1 #ifdef TIMING1
cudaDeviceSynchronize(); cudaThreadSynchronize();
gettimeofday(&tv2, NULL); gettimeofday(&tv2, NULL);
cout<<"TIME USED"<<TimeBetween(tv1, tv2)<<endl; cout<<"TIME USED"<<TimeBetween(tv1, tv2)<<endl;
#endif #endif
//cout<<"GPU meta data ready.\n"; //cout<<"GPU meta data ready.\n";
cudaDeviceSynchronize(); cudaThreadSynchronize();
//-------------get device info------------------------------------- //-------------get device info-------------------------------------
@@ -2308,7 +2306,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
//sub_enforce_ga(matrix_size); //sub_enforce_ga(matrix_size);
//4.1-----compute rhs--------- //4.1-----compute rhs---------
compute_rhs_ss_part1<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part1<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ betaxx,Mh_ betaxy,Mh_ betaxz,ass); sub_fderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ betaxx,Mh_ betaxy,Mh_ betaxz,ass);
sub_fderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ betayx,Mh_ betayy,Mh_ betayz,sas); sub_fderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ betayx,Mh_ betayy,Mh_ betayz,sas);
@@ -2324,7 +2322,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
sub_fderivs_shc(sst,Mh_ gyz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz, saa); sub_fderivs_shc(sst,Mh_ gyz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz, saa);
compute_rhs_ss_part2<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part2<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fdderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ gxxx,Mh_ gxyx,Mh_ gxzx,Mh_ gyyx,Mh_ gyzx,Mh_ gzzx,ass); sub_fdderivs_shc(sst,Mh_ betax,Mh_ fh,Mh_ gxxx,Mh_ gxyx,Mh_ gxzx,Mh_ gyyx,Mh_ gyzx,Mh_ gzzx,ass);
sub_fdderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ gxxy,Mh_ gxyy,Mh_ gxzy,Mh_ gyyy,Mh_ gyzy,Mh_ gzzy,sas); sub_fdderivs_shc(sst,Mh_ betay,Mh_ fh,Mh_ gxxy,Mh_ gxyy,Mh_ gxzy,Mh_ gyyy,Mh_ gyzy,Mh_ gzzy,sas);
@@ -2334,7 +2332,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
sub_fderivs_shc( sst,Mh_ Gamz, Mh_ fh,Mh_ Gamzx, Mh_ Gamzy, Mh_ Gamzz,ssa); sub_fderivs_shc( sst,Mh_ Gamz, Mh_ fh,Mh_ Gamzx, Mh_ Gamzy, Mh_ Gamzz,ssa);
compute_rhs_ss_part3<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part3<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
computeRicci_ss(sst,Mh_ dxx,Mh_ Rxx,sss, meta); computeRicci_ss(sst,Mh_ dxx,Mh_ Rxx,sss, meta);
computeRicci_ss(sst,Mh_ dyy,Mh_ Ryy,sss, meta); computeRicci_ss(sst,Mh_ dyy,Mh_ Ryy,sss, meta);
@@ -2342,25 +2340,25 @@ int gpu_rhs_ss(RHS_SS_PARA)
computeRicci_ss(sst,Mh_ gxy,Mh_ Rxy,aas, meta); computeRicci_ss(sst,Mh_ gxy,Mh_ Rxy,aas, meta);
computeRicci_ss(sst,Mh_ gxz,Mh_ Rxz,asa, meta); computeRicci_ss(sst,Mh_ gxz,Mh_ Rxz,asa, meta);
computeRicci_ss(sst,Mh_ gyz,Mh_ Ryz,saa, meta); computeRicci_ss(sst,Mh_ gyz,Mh_ Ryz,saa, meta);
cudaDeviceSynchronize(); cudaThreadSynchronize();
compute_rhs_ss_part4<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part4<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fdderivs_shc(sst,Mh_ chi,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); sub_fdderivs_shc(sst,Mh_ chi,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss);
//cudaDeviceSynchronize(); //cudaThreadSynchronize();
//compare_result_gpu(0,Mh_ chi,h_3D_SIZE[0]); //compare_result_gpu(0,Mh_ chi,h_3D_SIZE[0]);
//compare_result_gpu(1,Mh_ chi,h_3D_SIZE[0]); //compare_result_gpu(1,Mh_ chi,h_3D_SIZE[0]);
//compare_result_gpu(2,Mh_ fyz,h_3D_SIZE[0]); //compare_result_gpu(2,Mh_ fyz,h_3D_SIZE[0]);
compute_rhs_ss_part5<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part5<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fdderivs_shc(sst,Mh_ Lap,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss); sub_fdderivs_shc(sst,Mh_ Lap,Mh_ fh,Mh_ fxx,Mh_ fxy,Mh_ fxz,Mh_ fyy,Mh_ fyz,Mh_ fzz,sss);
compute_rhs_ss_part6<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part6<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5) #if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5)
sub_fderivs_shc(sst,Mh_ chi,Mh_ fh, Mh_ dtSfx_rhs, Mh_ dtSfy_rhs, Mh_ dtSfz_rhs,sss); sub_fderivs_shc(sst,Mh_ chi,Mh_ fh, Mh_ dtSfx_rhs, Mh_ dtSfy_rhs, Mh_ dtSfz_rhs,sss);
@@ -2425,7 +2423,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
} }
if(co == 0){ if(co == 0){
compute_rhs_ss_part7<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part7<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
sub_fderivs_shc(sst,Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss); sub_fderivs_shc(sst,Mh_ Axx,Mh_ fh,Mh_ gxxx,Mh_ gxxy,Mh_ gxxz,sss);
sub_fderivs_shc(sst,Mh_ Axy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz,aas); sub_fderivs_shc(sst,Mh_ Axy,Mh_ fh,Mh_ gxyx,Mh_ gxyy,Mh_ gxyz,aas);
@@ -2434,7 +2432,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
sub_fderivs_shc(sst,Mh_ Ayz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz,saa); sub_fderivs_shc(sst,Mh_ Ayz,Mh_ fh,Mh_ gyzx,Mh_ gyzy,Mh_ gyzz,saa);
sub_fderivs_shc(sst,Mh_ Azz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz,sss); sub_fderivs_shc(sst,Mh_ Azz,Mh_ fh,Mh_ gzzx,Mh_ gzzy,Mh_ gzzz,sss);
compute_rhs_ss_part8<<<GRID_DIM,BLOCK_DIM>>>(); compute_rhs_ss_part8<<<GRID_DIM,BLOCK_DIM>>>();
cudaDeviceSynchronize(); cudaThreadSynchronize();
} }
#if (ABV == 1) #if (ABV == 1)
@@ -2514,7 +2512,7 @@ int gpu_rhs_ss(RHS_SS_PARA)
//test kodis //test kodis
//sub_kodis_sh(sst,Msh_ drhodx,Mh_ fh2,Msh_ drhody,sss); //sub_kodis_sh(sst,Msh_ drhodx,Mh_ fh2,Msh_ drhody,sss);
#ifdef TIMING #ifdef TIMING
cudaDeviceSynchronize(); cudaThreadSynchronize();
gettimeofday(&tv2, NULL); gettimeofday(&tv2, NULL);
cout<<"MPI rank is: "<<mpi_rank<<" GPU TIME is"<<TimeBetween(tv1, tv2)<<" (s)."<<endl; cout<<"MPI rank is: "<<mpi_rank<<" GPU TIME is"<<TimeBetween(tv1, tv2)<<" (s)."<<endl;
#endif #endif
@@ -2524,55 +2522,4 @@ int gpu_rhs_ss(RHS_SS_PARA)
return 0;//TODO return return 0;//TODO return
} }
#if (ABEtype == 2)
// Z4C Shell GPU: calls BSSN gpu_rhs_ss with trKd=trK+2*TZ, then applies
// TZ_rhs = alpn1*Hcon/2 and constraint damping on CPU.
int gpu_rhs_z4c_ss(Z4C_SS_PARA)
{
int matrix_size = ex[0] * ex[1] * ex[2];
double k1 = 0.02, k2 = 0.0;
double *trKd_host = new double[matrix_size];
for (int _i = 0; _i < matrix_size; _i++)
trKd_host[_i] = trK[_i] + 2.0 * TZ[_i];
int result = gpu_rhs_ss(calledby, mpi_rank,
ex, T, crho, sigma, R, X, Y, Z,
drhodx, drhody, drhodz, dsigmadx, dsigmady, dsigmadz,
dRdx, dRdy, dRdz,
drhodxx, drhodxy, drhodxz, drhodyy, drhodyz, drhodzz,
dsigmadxx, dsigmadxy, dsigmadxz, dsigmadyy, dsigmadyz, dsigmadzz,
dRdxx, dRdxy, dRdxz, dRdyy, dRdyz, dRdzz,
chi, trKd_host, dxx, gxy, gxz, dyy, gyz, dzz,
Axx, Axy, Axz, Ayy, Ayz, Azz,
Gamx, Gamy, Gamz,
Lap, betax, betay, betaz,
dtSfx, dtSfy, dtSfz,
chi_rhs, trK_rhs,
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs,
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs,
Gamx_rhs, Gamy_rhs, Gamz_rhs,
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs,
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs,
rho, Sx, Sy, Sz, Sxx, Sxy, Sxz, Syy, Syz, Szz,
Gamxxx, Gamxxy, Gamxxz, Gamxyy, Gamxyz, Gamxzz,
Gamyxx, Gamyxy, Gamyxz, Gamyyy, Gamyyz, Gamyzz,
Gamzxx, Gamzxy, Gamzxz, Gamzyy, Gamzyz, Gamzzz,
Rxx, Rxy, Rxz, Ryy, Ryz, Rzz,
ham_Res, movx_Res, movy_Res, movz_Res,
Gmx_Res, Gmy_Res, Gmz_Res,
Symmetry, Lev, eps, sst, co);
delete[] trKd_host;
if (result != 0) return result;
for (int _i = 0; _i < matrix_size; _i++) {
double alp = Lap[_i] + 1.0;
TZ_rhs[_i] = alp * ham_Res[_i] * 0.5;
TZ_rhs[_i] -= alp * (2.0 + k2) * k1 * TZ[_i];
trK_rhs[_i] += alp * k1 * (1.0 - k2) * TZ[_i];
}
return 0;
}
#endif // ABEtype == 2
#endif //WithShell #endif //WithShell

Some files were not shown because too many files have changed in this diff Show More