Compare commits
1 Commits
cjy-oneapi
...
hxh-new
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19b0e79692 |
12
.gitignore
vendored
12
.gitignore
vendored
@@ -1,6 +1,6 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
GW150914
|
GW150914
|
||||||
GW150914-origin
|
GW150914-origin
|
||||||
docs
|
docs
|
||||||
*.tmp
|
*.tmp
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import numpy
|
|||||||
File_directory = "GW150914" ## output file directory
|
File_directory = "GW150914" ## output file directory
|
||||||
Output_directory = "binary_output" ## binary data file directory
|
Output_directory = "binary_output" ## binary data file directory
|
||||||
## The file directory name should not be too long
|
## The file directory name should not be too long
|
||||||
MPI_processes = 64 ## number of mpi processes used in the simulation
|
MPI_processes = 2 ## number of mpi processes used in the simulation
|
||||||
|
|
||||||
GPU_Calculation = "no" ## Use GPU or not
|
GPU_Calculation = "no" ## Use GPU or not
|
||||||
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
|
||||||
@@ -50,7 +50,7 @@ Check_Time = 100.0
|
|||||||
Dump_Time = 100.0 ## time inteval dT for dumping binary data
|
Dump_Time = 100.0 ## time inteval dT for dumping binary data
|
||||||
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
|
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
|
||||||
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
|
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
|
||||||
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
|
Evolution_Step_Number = 6 ## stop the calculation after the maximal step number
|
||||||
Courant_Factor = 0.5 ## Courant Factor
|
Courant_Factor = 0.5 ## Courant Factor
|
||||||
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength
|
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength
|
||||||
|
|
||||||
|
|||||||
@@ -8,14 +8,6 @@
|
|||||||
##
|
##
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Guard against re-execution by multiprocessing child processes.
|
|
||||||
## Without this, using 'spawn' or 'forkserver' context would cause every
|
|
||||||
## worker to re-run the entire script, spawning exponentially more
|
|
||||||
## workers (fork bomb).
|
|
||||||
if __name__ != '__main__':
|
|
||||||
import sys as _sys
|
|
||||||
_sys.exit(0)
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
@@ -57,32 +49,32 @@ import time
|
|||||||
File_directory = os.path.join(input_data.File_directory)
|
File_directory = os.path.join(input_data.File_directory)
|
||||||
|
|
||||||
## If the specified output directory exists, ask the user whether to continue
|
## If the specified output directory exists, ask the user whether to continue
|
||||||
if os.path.exists(File_directory):
|
# if os.path.exists(File_directory):
|
||||||
print( " Output dictionary has been existed !!! " )
|
# print( " Output dictionary has been existed !!! " )
|
||||||
print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
|
# print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
|
||||||
print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
|
# print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
|
||||||
print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
|
# print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
|
||||||
print( )
|
# print( )
|
||||||
## Prompt whether to overwrite the existing directory
|
# ## Prompt whether to overwrite the existing directory
|
||||||
while True:
|
# while True:
|
||||||
try:
|
# try:
|
||||||
inputvalue = input()
|
# inputvalue = input()
|
||||||
## If the user agrees to overwrite, proceed and remove the existing directory
|
# ## If the user agrees to overwrite, proceed and remove the existing directory
|
||||||
if ( inputvalue == "continue" ):
|
# if ( inputvalue == "continue" ):
|
||||||
print( " Continue the calculation !!! " )
|
# print( " Continue the calculation !!! " )
|
||||||
print( )
|
# print( )
|
||||||
break
|
# break
|
||||||
## If the user chooses not to overwrite, exit and keep the existing directory
|
# ## If the user chooses not to overwrite, exit and keep the existing directory
|
||||||
elif ( inputvalue == "stop" ):
|
# elif ( inputvalue == "stop" ):
|
||||||
print( " Stop the calculation !!! " )
|
# print( " Stop the calculation !!! " )
|
||||||
sys.exit()
|
# sys.exit()
|
||||||
## If the user input is invalid, prompt again
|
# ## If the user input is invalid, prompt again
|
||||||
else:
|
# else:
|
||||||
print( " Please input your choice !!! " )
|
# print( " Please input your choice !!! " )
|
||||||
print( " Input 'continue' or 'stop' in the terminal !!! " )
|
# print( " Input 'continue' or 'stop' in the terminal !!! " )
|
||||||
except ValueError:
|
# except ValueError:
|
||||||
print( " Please input your choice !!! " )
|
# print( " Please input your choice !!! " )
|
||||||
print( " Input 'continue' or 'stop' in the terminal !!! " )
|
# print( " Input 'continue' or 'stop' in the terminal !!! " )
|
||||||
|
|
||||||
## Remove the existing output directory if present
|
## Remove the existing output directory if present
|
||||||
shutil.rmtree(File_directory, ignore_errors=True)
|
shutil.rmtree(File_directory, ignore_errors=True)
|
||||||
@@ -270,12 +262,6 @@ if not os.path.exists( ABE_file ):
|
|||||||
## Copy the executable ABE (or ABEGPU) into the run directory
|
## Copy the executable ABE (or ABEGPU) into the run directory
|
||||||
shutil.copy2(ABE_file, output_directory)
|
shutil.copy2(ABE_file, output_directory)
|
||||||
|
|
||||||
## Copy interp load balance profile if present (for optimize pass)
|
|
||||||
interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
|
|
||||||
if os.path.exists(interp_lb_profile):
|
|
||||||
shutil.copy2(interp_lb_profile, output_directory)
|
|
||||||
print( " Copied interp_lb_profile.bin to run directory " )
|
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
|
|
||||||
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
|
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
|
||||||
@@ -438,31 +424,26 @@ print(
|
|||||||
|
|
||||||
import plot_xiaoqu
|
import plot_xiaoqu
|
||||||
import plot_GW_strain_amplitude_xiaoqu
|
import plot_GW_strain_amplitude_xiaoqu
|
||||||
from parallel_plot_helper import run_plot_tasks_parallel
|
|
||||||
|
|
||||||
plot_tasks = []
|
|
||||||
|
|
||||||
## Plot black hole trajectory
|
## Plot black hole trajectory
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_orbit_plot( binary_results_directory, figure_directory )
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_orbit_plot3D, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_orbit_plot3D( binary_results_directory, figure_directory )
|
||||||
|
|
||||||
## Plot black hole separation vs. time
|
## Plot black hole separation vs. time
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_puncture_distence_plot, (binary_results_directory, figure_directory) ) )
|
plot_xiaoqu.generate_puncture_distence_plot( binary_results_directory, figure_directory )
|
||||||
|
|
||||||
## Plot gravitational waveforms (psi4 and strain amplitude)
|
## Plot gravitational waveforms (psi4 and strain amplitude)
|
||||||
for i in range(input_data.Detector_Number):
|
for i in range(input_data.Detector_Number):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_gravitational_wave_psi4_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_gravitational_wave_psi4_plot( binary_results_directory, figure_directory, i )
|
||||||
plot_tasks.append( ( plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_GW_strain_amplitude_xiaoqu.generate_gravitational_wave_amplitude_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
## Plot ADM mass evolution
|
## Plot ADM mass evolution
|
||||||
for i in range(input_data.Detector_Number):
|
for i in range(input_data.Detector_Number):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_ADMmass_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_ADMmass_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
## Plot Hamiltonian constraint violation over time
|
## Plot Hamiltonian constraint violation over time
|
||||||
for i in range(input_data.grid_level):
|
for i in range(input_data.grid_level):
|
||||||
plot_tasks.append( ( plot_xiaoqu.generate_constraint_check_plot, (binary_results_directory, figure_directory, i) ) )
|
plot_xiaoqu.generate_constraint_check_plot( binary_results_directory, figure_directory, i )
|
||||||
|
|
||||||
run_plot_tasks_parallel(plot_tasks)
|
|
||||||
|
|
||||||
## Plot stored binary data
|
## Plot stored binary data
|
||||||
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
|
plot_xiaoqu.generate_binary_data_plot( binary_results_directory, figure_directory )
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ using namespace std;
|
|||||||
|
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "macrodef.h"
|
#include "macrodef.h"
|
||||||
|
#include <omp.h>
|
||||||
#ifndef ABEtype
|
#ifndef ABEtype
|
||||||
#error "not define ABEtype"
|
#error "not define ABEtype"
|
||||||
#endif
|
#endif
|
||||||
@@ -69,8 +69,9 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
double Begin_clock, End_clock;
|
double Begin_clock, End_clock;
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
Begin_clock = MPI_Wtime();
|
Begin_clock = MPI_Wtime();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argc > 1)
|
if (argc > 1)
|
||||||
|
|||||||
130050
AMSS_NCKU_source/Ansorg.psid
130050
AMSS_NCKU_source/Ansorg.psid
File diff suppressed because it is too large
Load Diff
@@ -13,10 +13,7 @@ using namespace std;
|
|||||||
#include "MPatch.h"
|
#include "MPatch.h"
|
||||||
#include "Parallel.h"
|
#include "Parallel.h"
|
||||||
#include "fmisc.h"
|
#include "fmisc.h"
|
||||||
#ifdef INTERP_LB_PROFILE
|
#include "xh_global_interp.h"
|
||||||
#include "interp_lb_profile.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
||||||
{
|
{
|
||||||
|
|
||||||
@@ -397,7 +394,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
while (notfind && Bp) // run along Blocks
|
while (notfind && Bp) // run along Blocks
|
||||||
{
|
{
|
||||||
Block *BP = Bp->data;
|
Block *BP = Bp->data;
|
||||||
|
|
||||||
bool flag = true;
|
bool flag = true;
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
{
|
{
|
||||||
@@ -433,8 +429,10 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
int k = 0;
|
int k = 0;
|
||||||
while (varl) // run along variables
|
while (varl) // run along variables
|
||||||
{
|
{
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
|
||||||
|
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
|
|
||||||
varl = varl->next;
|
varl = varl->next;
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
@@ -444,6 +442,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
break;
|
break;
|
||||||
Bp = Bp->next;
|
Bp = Bp->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
// Replace MPI_Allreduce with per-owner MPI_Bcast:
|
||||||
@@ -510,13 +509,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
// Targeted point-to-point overload: each owner sends each point only to
|
// Targeted point-to-point overload: each owner sends each point only to
|
||||||
// the one rank that needs it for integration (consumer), reducing
|
// the one rank that needs it for integration (consumer), reducing
|
||||||
// communication volume by ~nprocs times compared to the Bcast version.
|
// communication volume by ~nprocs times compared to the Bcast version.
|
||||||
#ifdef INTERP_LB_PROFILE
|
|
||||||
double t_interp_start = MPI_Wtime();
|
|
||||||
#endif
|
|
||||||
int myrank, nprocs;
|
int myrank, nprocs;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
|
// printf("here----\n");
|
||||||
|
// int zzz = 0;
|
||||||
int ordn = 2 * ghost_width;
|
int ordn = 2 * ghost_width;
|
||||||
MyList<var> *varl;
|
MyList<var> *varl;
|
||||||
int num_var = 0;
|
int num_var = 0;
|
||||||
@@ -535,30 +532,35 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
owner_rank[j] = -1;
|
owner_rank[j] = -1;
|
||||||
|
|
||||||
double DH[dim], llb[dim], uub[dim];
|
double DH[dim];
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
DH[i] = getdX(i);
|
DH[i] = getdX(i);
|
||||||
|
|
||||||
// --- Interpolation phase (identical to original) ---
|
// --- Interpolation phase (identical to original) ---
|
||||||
|
// printf("NN: %d, num_var = %d\n", NN, num_var);
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
{
|
{
|
||||||
double pox[dim];
|
double pox[dim], llb[dim], uub[dim];
|
||||||
|
MyList<var> *varl1;
|
||||||
for (int i = 0; i < dim; i++)
|
for (int i = 0; i < dim; i++)
|
||||||
{
|
{
|
||||||
pox[i] = XX[i][j];
|
pox[i] = XX[i][j];
|
||||||
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
// if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
|
||||||
{
|
// {
|
||||||
cout << "Patch::Interp_Points: point (";
|
// cout << "Patch::Interp_Points: point (";
|
||||||
for (int k = 0; k < dim; k++)
|
// for (int k = 0; k < dim; k++)
|
||||||
{
|
// {
|
||||||
cout << XX[k][j];
|
// cout << XX[k][j];
|
||||||
if (k < dim - 1)
|
// if (k < dim - 1)
|
||||||
cout << ",";
|
// cout << ",";
|
||||||
else
|
// else
|
||||||
cout << ") is out of current Patch." << endl;
|
// cout << ") is out of current Patch." << endl;
|
||||||
}
|
// }
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
// MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Block> *Bp = blb;
|
MyList<Block> *Bp = blb;
|
||||||
@@ -590,21 +592,23 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// printf("flag = %d\n", flag);
|
||||||
if (flag)
|
if (flag)
|
||||||
{
|
{
|
||||||
notfind = false;
|
notfind = false;
|
||||||
owner_rank[j] = BP->rank;
|
owner_rank[j] = BP->rank;
|
||||||
if (myrank == BP->rank)
|
if (myrank == BP->rank)
|
||||||
{
|
{
|
||||||
varl = VarList;
|
varl1 = VarList;
|
||||||
int k = 0;
|
int k = 0;
|
||||||
while (varl)
|
while (varl1)
|
||||||
{
|
{
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl1->data->sgfn], Shellf[j * num_var + k],
|
||||||
varl = varl->next;
|
pox[0], pox[1], pox[2], ordn, varl1->data->SoA, Symmetry);
|
||||||
|
varl1 = varl1->next;
|
||||||
k++;
|
k++;
|
||||||
|
// zzz += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -613,12 +617,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
Bp = Bp->next;
|
Bp = Bp->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#ifdef INTERP_LB_PROFILE
|
// printf("Interpolation done, zzz = %d\n", zzz);
|
||||||
double t_interp_end = MPI_Wtime();
|
|
||||||
double t_interp_local = t_interp_end - t_interp_start;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// --- Error check for unfound points ---
|
// --- Error check for unfound points ---
|
||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
{
|
{
|
||||||
@@ -775,31 +775,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
delete[] recv_count;
|
delete[] recv_count;
|
||||||
delete[] consumer_rank;
|
delete[] consumer_rank;
|
||||||
delete[] owner_rank;
|
delete[] owner_rank;
|
||||||
|
|
||||||
#ifdef INTERP_LB_PROFILE
|
|
||||||
{
|
|
||||||
static bool profile_written = false;
|
|
||||||
if (!profile_written) {
|
|
||||||
double *all_times = nullptr;
|
|
||||||
if (myrank == 0) all_times = new double[nprocs];
|
|
||||||
MPI_Gather(&t_interp_local, 1, MPI_DOUBLE,
|
|
||||||
all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
|
||||||
if (myrank == 0) {
|
|
||||||
int heavy[64];
|
|
||||||
int nh = InterpLBProfile::identify_heavy_ranks(
|
|
||||||
all_times, nprocs, 2.5, heavy, 64);
|
|
||||||
InterpLBProfile::write_profile(
|
|
||||||
"interp_lb_profile.bin", nprocs,
|
|
||||||
all_times, heavy, nh, 2.5);
|
|
||||||
printf("[InterpLB] Profile written: %d heavy ranks\n", nh);
|
|
||||||
for (int i = 0; i < nh; i++)
|
|
||||||
printf(" Heavy rank %d: %.6f s\n", heavy[i], all_times[heavy[i]]);
|
|
||||||
delete[] all_times;
|
|
||||||
}
|
|
||||||
profile_written = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
void Patch::Interp_Points(MyList<var> *VarList,
|
void Patch::Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
@@ -809,7 +784,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
int myrank, lmyrank;
|
int myrank, lmyrank;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
MPI_Comm_rank(Comm_here, &lmyrank);
|
MPI_Comm_rank(Comm_here, &lmyrank);
|
||||||
|
|
||||||
int ordn = 2 * ghost_width;
|
int ordn = 2 * ghost_width;
|
||||||
MyList<var> *varl;
|
MyList<var> *varl;
|
||||||
int num_var = 0;
|
int num_var = 0;
|
||||||
@@ -899,7 +873,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
int k = 0;
|
int k = 0;
|
||||||
while (varl) // run along variables
|
while (varl) // run along variables
|
||||||
{
|
{
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
varl = varl->next;
|
varl = varl->next;
|
||||||
k++;
|
k++;
|
||||||
@@ -1131,7 +1105,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
|
|||||||
{
|
{
|
||||||
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
|
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
|
||||||
// pox,ordn,varl->data->SoA,Symmetry);
|
// pox,ordn,varl->data->SoA,Symmetry);
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
|
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
varl = varl->next;
|
varl = varl->next;
|
||||||
k++;
|
k++;
|
||||||
@@ -1233,7 +1207,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
|
|||||||
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
|
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
|
||||||
int myrank;
|
int myrank;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
|
||||||
int ordn = 2 * ghost_width;
|
int ordn = 2 * ghost_width;
|
||||||
MyList<var> *varl;
|
MyList<var> *varl;
|
||||||
int num_var = 0;
|
int num_var = 0;
|
||||||
@@ -1373,7 +1347,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
|
|||||||
{
|
{
|
||||||
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
|
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
|
||||||
// pox,ordn,varl->data->SoA,Symmetry);
|
// pox,ordn,varl->data->SoA,Symmetry);
|
||||||
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
|
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
|
||||||
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
|
||||||
varl = varl->next;
|
varl = varl->next;
|
||||||
k++;
|
k++;
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#include "prolongrestrict.h"
|
#include "prolongrestrict.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "parameters.h"
|
#include "parameters.h"
|
||||||
|
#include <omp.h>
|
||||||
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
|
||||||
{
|
{
|
||||||
nx = Mymax(1, shape / min_width);
|
nx = Mymax(1, shape / min_width);
|
||||||
@@ -462,7 +462,7 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev);
|
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
||||||
// ng->checkBlock();
|
// ng->checkBlock();
|
||||||
if (BlL)
|
if (BlL)
|
||||||
BlL->insert(ng);
|
BlL->insert(ng);
|
||||||
@@ -500,384 +500,6 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
|
|
||||||
return BlL;
|
return BlL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef INTERP_LB_OPTIMIZE
|
|
||||||
#include "interp_lb_profile_data.h"
|
|
||||||
|
|
||||||
MyList<Block> *Parallel::distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
|
||||||
bool periodic, int nodes)
|
|
||||||
{
|
|
||||||
#ifdef USE_GPU_DIVIDE
|
|
||||||
double cpu_part, gpu_part;
|
|
||||||
map<string, double>::iterator iter;
|
|
||||||
iter = parameters::dou_par.find("cpu part");
|
|
||||||
if (iter != parameters::dou_par.end())
|
|
||||||
{
|
|
||||||
cpu_part = iter->second;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
|
||||||
const int LEN = 256;
|
|
||||||
char pline[LEN];
|
|
||||||
string str, sgrp, skey, sval;
|
|
||||||
int sind;
|
|
||||||
char pname[50];
|
|
||||||
{
|
|
||||||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
|
||||||
if (iter != parameters::str_par.end())
|
|
||||||
strcpy(pname, (iter->second).c_str());
|
|
||||||
else { cout << "Error inputpar" << endl; exit(0); }
|
|
||||||
}
|
|
||||||
ifstream inf(pname, ifstream::in);
|
|
||||||
if (!inf.good() && myrank == 0)
|
|
||||||
{ cout << "Can not open parameter file " << pname << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
|
||||||
for (int i = 1; inf.good(); i++)
|
|
||||||
{
|
|
||||||
inf.getline(pline, LEN); str = pline;
|
|
||||||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
|
||||||
if (status == -1) { cout << "error reading parameter file " << pname << " in line " << i << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
|
||||||
else if (status == 0) continue;
|
|
||||||
if (sgrp == "ABE") { if (skey == "cpu part") cpu_part = atof(sval.c_str()); }
|
|
||||||
}
|
|
||||||
inf.close();
|
|
||||||
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
|
||||||
}
|
|
||||||
iter = parameters::dou_par.find("gpu part");
|
|
||||||
if (iter != parameters::dou_par.end())
|
|
||||||
{
|
|
||||||
gpu_part = iter->second;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
|
||||||
const int LEN = 256;
|
|
||||||
char pline[LEN];
|
|
||||||
string str, sgrp, skey, sval;
|
|
||||||
int sind;
|
|
||||||
char pname[50];
|
|
||||||
{
|
|
||||||
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
|
||||||
if (iter != parameters::str_par.end())
|
|
||||||
strcpy(pname, (iter->second).c_str());
|
|
||||||
else { cout << "Error inputpar" << endl; exit(0); }
|
|
||||||
}
|
|
||||||
ifstream inf(pname, ifstream::in);
|
|
||||||
if (!inf.good() && myrank == 0)
|
|
||||||
{ cout << "Can not open parameter file " << pname << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
|
||||||
for (int i = 1; inf.good(); i++)
|
|
||||||
{
|
|
||||||
inf.getline(pline, LEN); str = pline;
|
|
||||||
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
|
||||||
if (status == -1) { cout << "error reading parameter file " << pname << " in line " << i << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
|
||||||
else if (status == 0) continue;
|
|
||||||
if (sgrp == "ABE") { if (skey == "gpu part") gpu_part = atof(sval.c_str()); }
|
|
||||||
}
|
|
||||||
inf.close();
|
|
||||||
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
|
||||||
}
|
|
||||||
if (nodes == 0) nodes = cpusize / 2;
|
|
||||||
#else
|
|
||||||
if (nodes == 0) nodes = cpusize;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (dim != 3)
|
|
||||||
{
|
|
||||||
cout << "distrivute: now we only support 3-dimension" << endl;
|
|
||||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
MyList<Block> *BlL = 0;
|
|
||||||
int split_size, min_size, block_size = 0;
|
|
||||||
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
|
||||||
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
|
||||||
|
|
||||||
MyList<Patch> *PLi = PatchLIST;
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
min_shape[i] = PLi->data->shape[i];
|
|
||||||
int lev = PLi->data->lev;
|
|
||||||
PLi = PLi->next;
|
|
||||||
while (PLi)
|
|
||||||
{
|
|
||||||
Patch *PP = PLi->data;
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
|
||||||
if (lev != PLi->data->lev)
|
|
||||||
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
|
||||||
PLi = PLi->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < dim; i++)
|
|
||||||
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
|
||||||
min_size = mmin_width[0];
|
|
||||||
for (int i = 1; i < dim; i++)
|
|
||||||
min_size = min_size * mmin_width[i];
|
|
||||||
|
|
||||||
PLi = PatchLIST;
|
|
||||||
while (PLi)
|
|
||||||
{
|
|
||||||
Patch *PP = PLi->data;
|
|
||||||
int bs = PP->shape[0];
|
|
||||||
for (int i = 1; i < dim; i++)
|
|
||||||
bs = bs * PP->shape[i];
|
|
||||||
block_size = block_size + bs;
|
|
||||||
PLi = PLi->next;
|
|
||||||
}
|
|
||||||
split_size = Mymax(min_size, block_size / nodes);
|
|
||||||
split_size = Mymax(1, split_size);
|
|
||||||
|
|
||||||
int n_rank = 0;
|
|
||||||
PLi = PatchLIST;
|
|
||||||
int reacpu = 0;
|
|
||||||
int current_block_id = 0;
|
|
||||||
while (PLi) {
|
|
||||||
Block *ng0, *ng;
|
|
||||||
bool first_block_in_patch = true;
|
|
||||||
Patch *PP = PLi->data;
|
|
||||||
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
|
||||||
|
|
||||||
for (int i = 0; i < nxyz[0]; i++)
|
|
||||||
for (int j = 0; j < nxyz[1]; j++)
|
|
||||||
for (int k = 0; k < nxyz[2]; k++)
|
|
||||||
{
|
|
||||||
int ibbox_here[6], shape_here[3];
|
|
||||||
double bbox_here[6], dd;
|
|
||||||
Block *current_ng_start = nullptr;
|
|
||||||
|
|
||||||
bool is_heavy = false;
|
|
||||||
int r_l = -1, r_r = -1;
|
|
||||||
if (cpusize == INTERP_LB_NPROCS) {
|
|
||||||
for (int si = 0; si < INTERP_LB_NUM_HEAVY; si++) {
|
|
||||||
if (current_block_id == interp_lb_splits[si][0]) {
|
|
||||||
is_heavy = true;
|
|
||||||
r_l = interp_lb_splits[si][1];
|
|
||||||
r_r = interp_lb_splits[si][2];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_heavy)
|
|
||||||
{
|
|
||||||
int ib0 = (PP->shape[0] * i) / nxyz[0];
|
|
||||||
int ib3 = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
|
||||||
int jb1 = (PP->shape[1] * j) / nxyz[1];
|
|
||||||
int jb4 = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
|
||||||
int kb2 = (PP->shape[2] * k) / nxyz[2];
|
|
||||||
int kb5 = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
|
||||||
|
|
||||||
Block *split_first_block = nullptr;
|
|
||||||
Block *split_last_block = nullptr;
|
|
||||||
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
|
||||||
PP, r_l, r_r, ingfsi, fngfsi, periodic,
|
|
||||||
split_first_block, split_last_block);
|
|
||||||
|
|
||||||
current_ng_start = split_first_block;
|
|
||||||
ng = split_last_block;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
|
||||||
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
|
||||||
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
|
||||||
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
|
||||||
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
|
||||||
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
|
||||||
|
|
||||||
if (periodic) {
|
|
||||||
for(int d=0; d<3; d++) {
|
|
||||||
ibbox_here[d] -= ghost_width;
|
|
||||||
ibbox_here[d+3] += ghost_width;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
|
||||||
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
|
||||||
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
|
||||||
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
|
||||||
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
|
||||||
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int d=0; d<3; d++) shape_here[d] = ibbox_here[d+3] - ibbox_here[d] + 1;
|
|
||||||
|
|
||||||
#ifdef Vertex
|
|
||||||
#ifdef Cell
|
|
||||||
#error Both Cell and Vertex are defined
|
|
||||||
#endif
|
|
||||||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
|
||||||
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
|
||||||
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
|
||||||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
|
||||||
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
|
||||||
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
|
||||||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
|
||||||
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
|
||||||
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
|
||||||
#else
|
|
||||||
#ifdef Cell
|
|
||||||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
|
||||||
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
|
||||||
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
|
||||||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
|
||||||
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
|
||||||
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
|
||||||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
|
||||||
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
|
||||||
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
|
||||||
#else
|
|
||||||
#error Not define Vertex nor Cell
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
ng = createMappedBlock(BlL, dim, shape_here, bbox_here,
|
|
||||||
current_block_id, ingfsi, fngfsi, PP->lev);
|
|
||||||
current_ng_start = ng;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (first_block_in_patch) {
|
|
||||||
ng0 = current_ng_start;
|
|
||||||
MyList<Block> *Bp_start = BlL;
|
|
||||||
while (Bp_start && Bp_start->data != ng0) Bp_start = Bp_start->next;
|
|
||||||
PP->blb = Bp_start;
|
|
||||||
first_block_in_patch = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
current_block_id++;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
MyList<Block> *Bp_end = BlL;
|
|
||||||
while (Bp_end && Bp_end->data != ng) Bp_end = Bp_end->next;
|
|
||||||
PP->ble = Bp_end;
|
|
||||||
}
|
|
||||||
|
|
||||||
PLi = PLi->next;
|
|
||||||
}
|
|
||||||
if (reacpu < nodes * 2 / 3)
|
|
||||||
{
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
|
||||||
if (myrank == 0)
|
|
||||||
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return BlL;
|
|
||||||
}
|
|
||||||
|
|
||||||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
|
||||||
int ib0_orig, int ib3_orig,
|
|
||||||
int jb1_orig, int jb4_orig,
|
|
||||||
int kb2_orig, int kb5_orig,
|
|
||||||
Patch* PP, int r_left, int r_right,
|
|
||||||
int ingfsi, int fngfsi, bool periodic,
|
|
||||||
Block* &split_first_block, Block* &split_last_block)
|
|
||||||
{
|
|
||||||
int mid = (ib0_orig + ib3_orig) / 2;
|
|
||||||
|
|
||||||
int indices_L[6] = {ib0_orig, jb1_orig, kb2_orig, mid, jb4_orig, kb5_orig};
|
|
||||||
int indices_R[6] = {mid + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig};
|
|
||||||
|
|
||||||
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
|
||||||
int ib_final[6];
|
|
||||||
int sh_here[3];
|
|
||||||
double bb_here[6], dd;
|
|
||||||
|
|
||||||
if (periodic) {
|
|
||||||
ib_final[0] = ib_raw[0] - ghost_width;
|
|
||||||
ib_final[3] = ib_raw[3] + ghost_width;
|
|
||||||
ib_final[1] = ib_raw[1] - ghost_width;
|
|
||||||
ib_final[4] = ib_raw[4] + ghost_width;
|
|
||||||
ib_final[2] = ib_raw[2] - ghost_width;
|
|
||||||
ib_final[5] = ib_raw[5] + ghost_width;
|
|
||||||
} else {
|
|
||||||
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
|
||||||
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
|
||||||
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
|
||||||
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
|
||||||
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
|
||||||
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
|
||||||
}
|
|
||||||
|
|
||||||
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
|
||||||
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
|
||||||
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
|
||||||
|
|
||||||
#ifdef Vertex
|
|
||||||
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
|
||||||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
|
||||||
bb_here[3] = PP->bbox[0] + ib_final[3] * dd;
|
|
||||||
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
|
||||||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
|
||||||
bb_here[4] = PP->bbox[1] + ib_final[4] * dd;
|
|
||||||
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
|
||||||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
|
||||||
bb_here[5] = PP->bbox[2] + ib_final[5] * dd;
|
|
||||||
#else
|
|
||||||
#ifdef Cell
|
|
||||||
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
|
||||||
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
|
||||||
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
|
||||||
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
|
||||||
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
|
||||||
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
|
||||||
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
|
||||||
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
|
||||||
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Block* Bg = new Block(dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
|
||||||
if (BlL) BlL->insert(Bg);
|
|
||||||
else BlL = new MyList<Block>(Bg);
|
|
||||||
|
|
||||||
return Bg;
|
|
||||||
};
|
|
||||||
|
|
||||||
split_first_block = createSubBlock(indices_L, r_left);
|
|
||||||
split_last_block = createSubBlock(indices_R, r_right);
|
|
||||||
return split_last_block;
|
|
||||||
}
|
|
||||||
|
|
||||||
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
|
||||||
int block_id, int ingfsi, int fngfsi, int lev)
|
|
||||||
{
|
|
||||||
int target_rank = block_id;
|
|
||||||
if (INTERP_LB_NPROCS > 0) {
|
|
||||||
for (int ri = 0; ri < interp_lb_num_remaps; ri++) {
|
|
||||||
if (block_id == interp_lb_remaps[ri][0]) {
|
|
||||||
target_rank = interp_lb_remaps[ri][1];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Block* ng = new Block(dim, shape, bbox, target_rank, ingfsi, fngfsi, lev);
|
|
||||||
if (BlL) BlL->insert(ng);
|
|
||||||
else BlL = new MyList<Block>(ng);
|
|
||||||
|
|
||||||
return ng;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// When INTERP_LB_OPTIMIZE is not defined, distribute_optimize falls back to distribute
|
|
||||||
MyList<Block> *Parallel::distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
|
||||||
bool periodic, int nodes)
|
|
||||||
{
|
|
||||||
return distribute(PatchLIST, cpusize, ingfsi, fngfsi, periodic, nodes);
|
|
||||||
}
|
|
||||||
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
|
||||||
int ib0_orig, int ib3_orig,
|
|
||||||
int jb1_orig, int jb4_orig,
|
|
||||||
int kb2_orig, int kb5_orig,
|
|
||||||
Patch* PP, int r_left, int r_right,
|
|
||||||
int ingfsi, int fngfsi, bool periodic,
|
|
||||||
Block* &split_first_block, Block* &split_last_block)
|
|
||||||
{ return nullptr; }
|
|
||||||
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
|
||||||
int block_id, int ingfsi, int fngfsi, int lev)
|
|
||||||
{ return nullptr; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||||
bool periodic, int start_rank, int end_rank, int nodes)
|
bool periodic, int start_rank, int end_rank, int nodes)
|
||||||
@@ -3716,7 +3338,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
{
|
{
|
||||||
int myrank;
|
int myrank;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
// double time1 = omp_get_wtime();
|
||||||
int DIM = dim;
|
int DIM = dim;
|
||||||
|
|
||||||
if (dir != PACK && dir != UNPACK)
|
if (dir != PACK && dir != UNPACK)
|
||||||
@@ -3739,7 +3361,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
varls = varls->next;
|
varls = varls->next;
|
||||||
varld = varld->next;
|
varld = varld->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (varls || varld)
|
if (varls || varld)
|
||||||
{
|
{
|
||||||
cout << "error in short data packer, var lists does not match." << endl;
|
cout << "error in short data packer, var lists does not match." << endl;
|
||||||
@@ -3753,7 +3374,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
type = 2;
|
type = 2;
|
||||||
else
|
else
|
||||||
type = 3;
|
type = 3;
|
||||||
|
|
||||||
while (src && dst)
|
while (src && dst)
|
||||||
{
|
{
|
||||||
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
|
||||||
@@ -3763,6 +3383,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
varld = VarListd;
|
varld = VarListd;
|
||||||
while (varls && varld)
|
while (varls && varld)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (data)
|
if (data)
|
||||||
{
|
{
|
||||||
if (dir == PACK)
|
if (dir == PACK)
|
||||||
@@ -3783,6 +3404,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
|
||||||
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
|
||||||
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
|
||||||
|
|
||||||
}
|
}
|
||||||
if (dir == UNPACK) // from target data to corresponding grid
|
if (dir == UNPACK) // from target data to corresponding grid
|
||||||
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
|
||||||
@@ -3796,8 +3418,14 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
|
|||||||
}
|
}
|
||||||
dst = dst->next;
|
dst = dst->next;
|
||||||
src = src->next;
|
src = src->next;
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
|
// double time2 = omp_get_wtime();
|
||||||
|
// xxx += time2 - time1;
|
||||||
|
// if(myrank == 0){
|
||||||
|
// printf("prolong3 time = %lf\n", time2 - time1);
|
||||||
|
|
||||||
|
// }
|
||||||
return size_out;
|
return size_out;
|
||||||
}
|
}
|
||||||
int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
|
int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
|
||||||
@@ -3892,7 +3520,7 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
|
||||||
int node;
|
int node;
|
||||||
|
// double time1 = omp_get_wtime();
|
||||||
MPI_Request *reqs;
|
MPI_Request *reqs;
|
||||||
MPI_Status *stats;
|
MPI_Status *stats;
|
||||||
reqs = new MPI_Request[2 * cpusize];
|
reqs = new MPI_Request[2 * cpusize];
|
||||||
@@ -3961,7 +3589,9 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
|
|||||||
if (rec_data[node])
|
if (rec_data[node])
|
||||||
delete[] rec_data[node];
|
delete[] rec_data[node];
|
||||||
}
|
}
|
||||||
|
// double time2 = omp_get_wtime();
|
||||||
|
// if (myrank == 0)
|
||||||
|
// printf("transfer time = %lf\n", time2 - time1);
|
||||||
delete[] reqs;
|
delete[] reqs;
|
||||||
delete[] stats;
|
delete[] stats;
|
||||||
delete[] send_data;
|
delete[] send_data;
|
||||||
@@ -5664,203 +5294,6 @@ void Parallel::OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|||||||
delete[] transfer_src;
|
delete[] transfer_src;
|
||||||
delete[] transfer_dst;
|
delete[] transfer_dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restrict_cached: cache grid segment lists, reuse buffers via transfer_cached
|
|
||||||
void Parallel::Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache)
|
|
||||||
{
|
|
||||||
if (!cache.valid)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
cache.cpusize = cpusize;
|
|
||||||
|
|
||||||
if (!cache.combined_src)
|
|
||||||
{
|
|
||||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.send_lengths = new int[cpusize];
|
|
||||||
cache.recv_lengths = new int[cpusize];
|
|
||||||
cache.send_bufs = new double *[cpusize];
|
|
||||||
cache.recv_bufs = new double *[cpusize];
|
|
||||||
cache.send_buf_caps = new int[cpusize];
|
|
||||||
cache.recv_buf_caps = new int[cpusize];
|
|
||||||
for (int i = 0; i < cpusize; i++)
|
|
||||||
{
|
|
||||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
|
||||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
|
||||||
}
|
|
||||||
cache.max_reqs = 2 * cpusize;
|
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
|
||||||
}
|
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatfL, node, 2, Symmetry);
|
|
||||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
|
||||||
if (src_owned) src_owned->destroyList();
|
|
||||||
}
|
|
||||||
if (dst) dst->destroyList();
|
|
||||||
|
|
||||||
cache.valid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
// OutBdLow2Hi_cached: cache grid segment lists, reuse buffers via transfer_cached
|
|
||||||
void Parallel::OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache)
|
|
||||||
{
|
|
||||||
if (!cache.valid)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
cache.cpusize = cpusize;
|
|
||||||
|
|
||||||
if (!cache.combined_src)
|
|
||||||
{
|
|
||||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.send_lengths = new int[cpusize];
|
|
||||||
cache.recv_lengths = new int[cpusize];
|
|
||||||
cache.send_bufs = new double *[cpusize];
|
|
||||||
cache.recv_bufs = new double *[cpusize];
|
|
||||||
cache.send_buf_caps = new int[cpusize];
|
|
||||||
cache.recv_buf_caps = new int[cpusize];
|
|
||||||
for (int i = 0; i < cpusize; i++)
|
|
||||||
{
|
|
||||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
|
||||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
|
||||||
}
|
|
||||||
cache.max_reqs = 2 * cpusize;
|
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
|
||||||
}
|
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
|
||||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
|
||||||
if (src_owned) src_owned->destroyList();
|
|
||||||
}
|
|
||||||
if (dst) dst->destroyList();
|
|
||||||
|
|
||||||
cache.valid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
transfer_cached(cache.combined_src, cache.combined_dst, VarList1, VarList2, Symmetry, cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
// OutBdLow2Himix_cached: same as OutBdLow2Hi_cached but uses transfermix for unpacking
|
|
||||||
void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache)
|
|
||||||
{
|
|
||||||
if (!cache.valid)
|
|
||||||
{
|
|
||||||
int cpusize;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cpusize);
|
|
||||||
cache.cpusize = cpusize;
|
|
||||||
|
|
||||||
if (!cache.combined_src)
|
|
||||||
{
|
|
||||||
cache.combined_src = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.combined_dst = new MyList<Parallel::gridseg> *[cpusize];
|
|
||||||
cache.send_lengths = new int[cpusize];
|
|
||||||
cache.recv_lengths = new int[cpusize];
|
|
||||||
cache.send_bufs = new double *[cpusize];
|
|
||||||
cache.recv_bufs = new double *[cpusize];
|
|
||||||
cache.send_buf_caps = new int[cpusize];
|
|
||||||
cache.recv_buf_caps = new int[cpusize];
|
|
||||||
for (int i = 0; i < cpusize; i++)
|
|
||||||
{
|
|
||||||
cache.send_bufs[i] = cache.recv_bufs[i] = 0;
|
|
||||||
cache.send_buf_caps[i] = cache.recv_buf_caps[i] = 0;
|
|
||||||
}
|
|
||||||
cache.max_reqs = 2 * cpusize;
|
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
|
||||||
}
|
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
MyList<Parallel::gridseg> *src_owned = build_owned_gsl(PatcL, node, 4, Symmetry);
|
|
||||||
build_gstl(src_owned, dst, &cache.combined_src[node], &cache.combined_dst[node]);
|
|
||||||
if (src_owned) src_owned->destroyList();
|
|
||||||
}
|
|
||||||
if (dst) dst->destroyList();
|
|
||||||
|
|
||||||
cache.valid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use transfermix instead of transfer for mix-mode interpolation
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &cache.cpusize);
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
|
||||||
int cpusize = cache.cpusize;
|
|
||||||
|
|
||||||
int req_no = 0;
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
{
|
|
||||||
if (node == myrank)
|
|
||||||
{
|
|
||||||
int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
cache.recv_lengths[node] = length;
|
|
||||||
if (length > 0)
|
|
||||||
{
|
|
||||||
if (length > cache.recv_buf_caps[node])
|
|
||||||
{
|
|
||||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
|
||||||
cache.recv_bufs[node] = new double[length];
|
|
||||||
cache.recv_buf_caps[node] = length;
|
|
||||||
}
|
|
||||||
data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
cache.send_lengths[node] = slength;
|
|
||||||
if (slength > 0)
|
|
||||||
{
|
|
||||||
if (slength > cache.send_buf_caps[node])
|
|
||||||
{
|
|
||||||
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
|
|
||||||
cache.send_bufs[node] = new double[slength];
|
|
||||||
cache.send_buf_caps[node] = slength;
|
|
||||||
}
|
|
||||||
data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
|
|
||||||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
|
||||||
}
|
|
||||||
int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
|
||||||
cache.recv_lengths[node] = rlength;
|
|
||||||
if (rlength > 0)
|
|
||||||
{
|
|
||||||
if (rlength > cache.recv_buf_caps[node])
|
|
||||||
{
|
|
||||||
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
|
|
||||||
cache.recv_bufs[node] = new double[rlength];
|
|
||||||
cache.recv_buf_caps[node] = rlength;
|
|
||||||
}
|
|
||||||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Waitall(req_no, cache.reqs, cache.stats);
|
|
||||||
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
|
||||||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
|
||||||
data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// collect all buffer grid segments or blocks for given patch
|
// collect all buffer grid segments or blocks for given patch
|
||||||
MyList<Parallel::gridseg> *Parallel::build_buffer_gsl(Patch *Pat)
|
MyList<Parallel::gridseg> *Parallel::build_buffer_gsl(Patch *Pat)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -32,16 +32,6 @@ namespace Parallel
|
|||||||
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
||||||
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
||||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
||||||
MyList<Block> *distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0);
|
|
||||||
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
|
||||||
int ib0_orig, int ib3_orig,
|
|
||||||
int jb1_orig, int jb4_orig,
|
|
||||||
int kb2_orig, int kb5_orig,
|
|
||||||
Patch* PP, int r_left, int r_right,
|
|
||||||
int ingfsi, int fngfsi, bool periodic,
|
|
||||||
Block* &split_first_block, Block* &split_last_block);
|
|
||||||
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
|
||||||
int block_id, int ingfsi, int fngfsi, int lev);
|
|
||||||
void KillBlocks(MyList<Patch> *PatchLIST);
|
void KillBlocks(MyList<Patch> *PatchLIST);
|
||||||
|
|
||||||
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
||||||
@@ -140,15 +130,6 @@ namespace Parallel
|
|||||||
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
void OutBdLow2Himix(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
void Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache);
|
|
||||||
void OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache);
|
|
||||||
void OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|
||||||
MyList<var> *VarList1, MyList<var> *VarList2,
|
|
||||||
int Symmetry, SyncCache &cache);
|
|
||||||
void Prolong(Patch *Patc, Patch *Patf,
|
void Prolong(Patch *Patc, Patch *Patf,
|
||||||
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
MyList<var> *VarList1 /* source */, MyList<var> *VarList2 /* target */,
|
||||||
int Symmetry);
|
int Symmetry);
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ using namespace std;
|
|||||||
|
|
||||||
#include "derivatives.h"
|
#include "derivatives.h"
|
||||||
#include "ricci_gamma.h"
|
#include "ricci_gamma.h"
|
||||||
|
#include "xh_bssn_rhs_compute.h"
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|
||||||
// define bssn_class
|
// define bssn_class
|
||||||
@@ -2029,6 +2029,7 @@ void bssn_class::Read_Ansorg()
|
|||||||
void bssn_class::Evolve(int Steps)
|
void bssn_class::Evolve(int Steps)
|
||||||
{
|
{
|
||||||
clock_t prev_clock, curr_clock;
|
clock_t prev_clock, curr_clock;
|
||||||
|
double prev_time, curr_time;
|
||||||
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
|
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
|
||||||
LastAnas = 0;
|
LastAnas = 0;
|
||||||
#if 0
|
#if 0
|
||||||
@@ -2141,8 +2142,10 @@ void bssn_class::Evolve(int Steps)
|
|||||||
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
||||||
// { GH->levels=GH->movls; }
|
// { GH->levels=GH->movls; }
|
||||||
|
|
||||||
if (myrank == 0)
|
if (myrank == 0){
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
|
curr_time = omp_get_wtime();
|
||||||
|
}
|
||||||
#if (PSTR == 0)
|
#if (PSTR == 0)
|
||||||
RecursiveStep(0);
|
RecursiveStep(0);
|
||||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
@@ -2198,12 +2201,17 @@ void bssn_class::Evolve(int Steps)
|
|||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
{
|
{
|
||||||
prev_clock = curr_clock;
|
prev_clock = curr_clock;
|
||||||
|
prev_time = curr_time;
|
||||||
curr_clock = clock();
|
curr_clock = clock();
|
||||||
|
curr_time = omp_get_wtime();
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
// cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
|
||||||
|
// << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
||||||
|
// << " seconds! " << endl;
|
||||||
|
// // cout << endl;
|
||||||
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
|
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
|
||||||
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
|
<< " Computer used " << (curr_time - prev_time)
|
||||||
<< " seconds! " << endl;
|
<< " seconds! " << endl;
|
||||||
// cout << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PhysTime >= TotalTime)
|
if (PhysTime >= TotalTime)
|
||||||
@@ -2426,9 +2434,9 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (REGLEV == 0)
|
#if (REGLEV == 0)
|
||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -2605,9 +2613,9 @@ void bssn_class::ParallelStep()
|
|||||||
delete[] tporg;
|
delete[] tporg;
|
||||||
delete[] tporgo;
|
delete[] tporgo;
|
||||||
#if (REGLEV == 0)
|
#if (REGLEV == 0)
|
||||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -2772,9 +2780,9 @@ void bssn_class::ParallelStep()
|
|||||||
if (lev + 1 >= GH->movls)
|
if (lev + 1 >= GH->movls)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -2787,9 +2795,9 @@ void bssn_class::ParallelStep()
|
|||||||
// for this level
|
// for this level
|
||||||
if (YN == 1)
|
if (YN == 1)
|
||||||
{
|
{
|
||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -2806,9 +2814,9 @@ void bssn_class::ParallelStep()
|
|||||||
if (YN == 1)
|
if (YN == 1)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -2822,9 +2830,9 @@ void bssn_class::ParallelStep()
|
|||||||
if (i % 4 == 3)
|
if (i % 4 == 3)
|
||||||
{
|
{
|
||||||
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
|
||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
@@ -3092,7 +3100,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -3292,7 +3300,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||||
ERROR = 1;
|
ERROR = 1;
|
||||||
}
|
}
|
||||||
|
// cout<<"....................................."<<endl;
|
||||||
// rk4 substep and boundary
|
// rk4 substep and boundary
|
||||||
{
|
{
|
||||||
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
|
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
|
||||||
@@ -3457,7 +3465,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
@@ -3970,7 +3978,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -4312,7 +4320,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
@@ -4848,7 +4856,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -5048,7 +5056,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||||
@@ -5819,11 +5827,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5870,11 +5888,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5949,11 +5977,21 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -5971,11 +6009,21 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6036,11 +6084,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6060,11 +6118,21 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6101,11 +6169,21 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -6114,11 +6192,21 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
else // no time refinement levels and for all same time levels
|
else // no time refinement levels and for all same time levels
|
||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
|
Ppc = GH->PatL[lev - 1];
|
||||||
|
while (Ppc)
|
||||||
|
{
|
||||||
|
Pp = GH->PatL[lev];
|
||||||
|
while (Pp)
|
||||||
|
{
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
|
Pp = Pp->next;
|
||||||
|
}
|
||||||
|
Ppc = Ppc->next;
|
||||||
|
}
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
|
||||||
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
|
||||||
@@ -7263,7 +7351,7 @@ void bssn_class::Constraint_Out()
|
|||||||
Block *cg = BP->data;
|
Block *cg = BP->data;
|
||||||
if (myrank == cg->rank)
|
if (myrank == cg->rank)
|
||||||
{
|
{
|
||||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -7766,7 +7854,7 @@ void bssn_class::Interp_Constraint(bool infg)
|
|||||||
Block *cg = BP->data;
|
Block *cg = BP->data;
|
||||||
if (myrank == cg->rank)
|
if (myrank == cg->rank)
|
||||||
{
|
{
|
||||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
@@ -8024,7 +8112,7 @@ void bssn_class::Compute_Constraint()
|
|||||||
Block *cg = BP->data;
|
Block *cg = BP->data;
|
||||||
if (myrank == cg->rank)
|
if (myrank == cg->rank)
|
||||||
{
|
{
|
||||||
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||||
|
|||||||
@@ -106,38 +106,6 @@
|
|||||||
call getpbh(BHN,Porg,Mass)
|
call getpbh(BHN,Porg,Mass)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
!!! sanity check (disabled in production builds for performance)
|
|
||||||
#ifdef DEBUG
|
|
||||||
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
|
|
||||||
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
|
|
||||||
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
|
|
||||||
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)
|
|
||||||
if(dX.ne.dX) then
|
|
||||||
if(sum(chi).ne.sum(chi))write(*,*)"bssn.f90: find NaN in chi"
|
|
||||||
if(sum(trK).ne.sum(trK))write(*,*)"bssn.f90: find NaN in trk"
|
|
||||||
if(sum(dxx).ne.sum(dxx))write(*,*)"bssn.f90: find NaN in dxx"
|
|
||||||
if(sum(gxy).ne.sum(gxy))write(*,*)"bssn.f90: find NaN in gxy"
|
|
||||||
if(sum(gxz).ne.sum(gxz))write(*,*)"bssn.f90: find NaN in gxz"
|
|
||||||
if(sum(dyy).ne.sum(dyy))write(*,*)"bssn.f90: find NaN in dyy"
|
|
||||||
if(sum(gyz).ne.sum(gyz))write(*,*)"bssn.f90: find NaN in gyz"
|
|
||||||
if(sum(dzz).ne.sum(dzz))write(*,*)"bssn.f90: find NaN in dzz"
|
|
||||||
if(sum(Axx).ne.sum(Axx))write(*,*)"bssn.f90: find NaN in Axx"
|
|
||||||
if(sum(Axy).ne.sum(Axy))write(*,*)"bssn.f90: find NaN in Axy"
|
|
||||||
if(sum(Axz).ne.sum(Axz))write(*,*)"bssn.f90: find NaN in Axz"
|
|
||||||
if(sum(Ayy).ne.sum(Ayy))write(*,*)"bssn.f90: find NaN in Ayy"
|
|
||||||
if(sum(Ayz).ne.sum(Ayz))write(*,*)"bssn.f90: find NaN in Ayz"
|
|
||||||
if(sum(Azz).ne.sum(Azz))write(*,*)"bssn.f90: find NaN in Azz"
|
|
||||||
if(sum(Gamx).ne.sum(Gamx))write(*,*)"bssn.f90: find NaN in Gamx"
|
|
||||||
if(sum(Gamy).ne.sum(Gamy))write(*,*)"bssn.f90: find NaN in Gamy"
|
|
||||||
if(sum(Gamz).ne.sum(Gamz))write(*,*)"bssn.f90: find NaN in Gamz"
|
|
||||||
if(sum(Lap).ne.sum(Lap))write(*,*)"bssn.f90: find NaN in Lap"
|
|
||||||
if(sum(betax).ne.sum(betax))write(*,*)"bssn.f90: find NaN in betax"
|
|
||||||
if(sum(betay).ne.sum(betay))write(*,*)"bssn.f90: find NaN in betay"
|
|
||||||
if(sum(betaz).ne.sum(betaz))write(*,*)"bssn.f90: find NaN in betaz"
|
|
||||||
gont = 1
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PI = dacos(-ONE)
|
PI = dacos(-ONE)
|
||||||
|
|
||||||
@@ -634,7 +602,7 @@
|
|||||||
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
||||||
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
||||||
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
|
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
|
||||||
! now get physical second kind of connection
|
|
||||||
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
|
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
|
||||||
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
|
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
|
||||||
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
|
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
|
||||||
@@ -945,60 +913,103 @@
|
|||||||
SSA(2)=SYM
|
SSA(2)=SYM
|
||||||
SSA(3)=ANTI
|
SSA(3)=ANTI
|
||||||
|
|
||||||
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
!!!!!!!!!advection term part
|
||||||
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
|
||||||
! dissipation, eliminating redundant full-grid copies. For metric variables
|
|
||||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
|
|
||||||
! so the constant offset has no effect on dissipation.
|
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||||
|
!!
|
||||||
#if 1
|
|
||||||
!! bam does not apply dissipation on gauge variables
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
|
||||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
|
||||||
#endif
|
|
||||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
! No dissipation on gauge variables (advection only)
|
|
||||||
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
|
call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS)
|
||||||
|
|
||||||
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
||||||
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
|
call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||||
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
|
call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||||
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
|
call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||||
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
|
call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS)
|
||||||
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
|
call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS)
|
||||||
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
|
call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if(eps>0)then
|
||||||
|
! usual Kreiss-Oliger dissipation
|
||||||
|
call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps)
|
||||||
|
#if 0
|
||||||
|
#define i 42
|
||||||
|
#define j 40
|
||||||
|
#define k 40
|
||||||
|
if(Lev == 1)then
|
||||||
|
write(*,*) X(i),Y(j),Z(k)
|
||||||
|
write(*,*) "before",Axx_rhs(i,j,k)
|
||||||
|
endif
|
||||||
|
#undef i
|
||||||
|
#undef j
|
||||||
|
#undef k
|
||||||
|
!!stop
|
||||||
#endif
|
#endif
|
||||||
|
call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps)
|
||||||
|
#if 0
|
||||||
|
#define i 42
|
||||||
|
#define j 40
|
||||||
|
#define k 40
|
||||||
|
if(Lev == 1)then
|
||||||
|
write(*,*) X(i),Y(j),Z(k)
|
||||||
|
write(*,*) "after",Axx_rhs(i,j,k)
|
||||||
|
endif
|
||||||
|
#undef i
|
||||||
|
#undef j
|
||||||
|
#undef k
|
||||||
|
!!stop
|
||||||
|
#endif
|
||||||
|
call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps)
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
!! bam does not apply dissipation on gauge variables
|
||||||
|
call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps)
|
||||||
|
#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7)
|
||||||
|
call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps)
|
||||||
|
call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
if(co == 0)then
|
if(co == 0)then
|
||||||
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho
|
! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho
|
||||||
|
|||||||
@@ -130,11 +130,7 @@ void cgh::compose_cgh(int nprocs)
|
|||||||
for (int lev = 0; lev < levels; lev++)
|
for (int lev = 0; lev < levels; lev++)
|
||||||
{
|
{
|
||||||
checkPatchList(PatL[lev], false);
|
checkPatchList(PatL[lev], false);
|
||||||
#ifdef INTERP_LB_OPTIMIZE
|
|
||||||
Parallel::distribute_optimize(PatL[lev], nprocs, ingfs, fngfs, false);
|
|
||||||
#else
|
|
||||||
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
|
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
|
||||||
#endif
|
|
||||||
#if (RPB == 1)
|
#if (RPB == 1)
|
||||||
// we need distributed box of PatL[lev] and PatL[lev-1]
|
// we need distributed box of PatL[lev] and PatL[lev-1]
|
||||||
if (lev > 0)
|
if (lev > 0)
|
||||||
@@ -1305,13 +1301,13 @@ bool cgh::Interp_One_Point(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor)
|
monitor *ErrorMonitor)
|
||||||
{
|
{
|
||||||
if (lev < movls)
|
if (lev < movls)
|
||||||
return false;
|
return;
|
||||||
|
|
||||||
#if (0)
|
#if (0)
|
||||||
// #if (PSTR == 1 || PSTR == 2)
|
// #if (PSTR == 1 || PSTR == 2)
|
||||||
@@ -1400,7 +1396,7 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
|
|||||||
for (bhi = 0; bhi < BH_num; bhi++)
|
for (bhi = 0; bhi < BH_num; bhi++)
|
||||||
delete[] tmpPorg[bhi];
|
delete[] tmpPorg[bhi];
|
||||||
delete[] tmpPorg;
|
delete[] tmpPorg;
|
||||||
return false;
|
return;
|
||||||
}
|
}
|
||||||
// x direction
|
// x direction
|
||||||
rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX;
|
rr = (Porg0[bhi][0] - handle[lev][grd][0]) / dX;
|
||||||
@@ -1504,7 +1500,6 @@ bool cgh::Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, do
|
|||||||
for (int bhi = 0; bhi < BH_num; bhi++)
|
for (int bhi = 0; bhi < BH_num; bhi++)
|
||||||
delete[] tmpPorg[bhi];
|
delete[] tmpPorg[bhi];
|
||||||
delete[] tmpPorg;
|
delete[] tmpPorg;
|
||||||
return tot_flag;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ public:
|
|||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList,
|
MyList<var> *FutureList, MyList<var> *tmList,
|
||||||
int Symmetry, bool BB);
|
int Symmetry, bool BB);
|
||||||
bool Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
void Regrid_Onelevel(int lev, int Symmetry, int BH_num, double **Porgbr, double **Porg0,
|
||||||
MyList<var> *OldList, MyList<var> *StateList,
|
MyList<var> *OldList, MyList<var> *StateList,
|
||||||
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
MyList<var> *FutureList, MyList<var> *tmList, bool BB,
|
||||||
monitor *ErrorMonitor);
|
monitor *ErrorMonitor);
|
||||||
|
|||||||
@@ -69,12 +69,10 @@
|
|||||||
fy = ZEO
|
fy = ZEO
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! x direction
|
! x direction
|
||||||
if(i+1 <= imax .and. i-1 >= imin)then
|
if(i+1 <= imax .and. i-1 >= imin)then
|
||||||
!
|
!
|
||||||
! - f(i-1) + f(i+1)
|
! - f(i-1) + f(i+1)
|
||||||
@@ -373,8 +371,6 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
|
|||||||
26
AMSS_NCKU_source/extention/include/xh_bssn_rhs_compute.h
Normal file
26
AMSS_NCKU_source/extention/include/xh_bssn_rhs_compute.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
#include "xh_macrodef.h"
|
||||||
|
#include "xh_tool.h"
|
||||||
|
int f_compute_rhs_bssn(int *ex, double &T,
|
||||||
|
double *X, double *Y, double *Z,
|
||||||
|
double *chi, double *trK,
|
||||||
|
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||||
|
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||||
|
double *Gamx, double *Gamy, double *Gamz,
|
||||||
|
double *Lap, double *betax, double *betay, double *betaz,
|
||||||
|
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||||
|
double *chi_rhs, double *trK_rhs,
|
||||||
|
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||||
|
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||||
|
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||||
|
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||||
|
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||||
|
double *rho, double *Sx, double *Sy, double *Sz,
|
||||||
|
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||||
|
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||||
|
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||||
|
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||||
|
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||||
|
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||||
|
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||||
|
int &Symmetry, int &Lev, double &eps, int &co
|
||||||
|
);
|
||||||
66
AMSS_NCKU_source/extention/include/xh_macrodef.h
Normal file
66
AMSS_NCKU_source/extention/include/xh_macrodef.h
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/* tetrad notes
|
||||||
|
v:r; u: phi; w: theta
|
||||||
|
|
||||||
|
tetradtype 0
|
||||||
|
v^a = (x,y,z)
|
||||||
|
orthonormal order: v,u,w
|
||||||
|
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
||||||
|
|
||||||
|
tetradtype 1
|
||||||
|
orthonormal order: w,u,v
|
||||||
|
m = (theta + i phi)/sqrt(2) following Sperhake, Eq.(3.2) of PRD 85, 124062(2012)
|
||||||
|
|
||||||
|
tetradtype 2
|
||||||
|
v_a = (x,y,z)
|
||||||
|
orthonormal order: v,u,w
|
||||||
|
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
||||||
|
*/
|
||||||
|
#define tetradtype 2
|
||||||
|
|
||||||
|
/* Cell center or Vertex center */
|
||||||
|
#define Cell
|
||||||
|
|
||||||
|
/* ghost_width meaning:
|
||||||
|
2nd order: 2
|
||||||
|
4th order: 3
|
||||||
|
6th order: 4
|
||||||
|
8th order: 5
|
||||||
|
*/
|
||||||
|
#define ghost_width 3
|
||||||
|
|
||||||
|
/* use shell or not */
|
||||||
|
#define WithShell
|
||||||
|
|
||||||
|
/* use constraint preserving boundary condition or not
|
||||||
|
only affect Z4c
|
||||||
|
*/
|
||||||
|
#define CPBC
|
||||||
|
|
||||||
|
/* Gauge condition type
|
||||||
|
0: B^i gauge
|
||||||
|
1: David's puncture gauge
|
||||||
|
2: MB B^i gauge
|
||||||
|
3: RIT B^i gauge
|
||||||
|
4: MB beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
||||||
|
5: RIT beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
||||||
|
6: MGB1 B^i gauge
|
||||||
|
7: MGB2 B^i gauge
|
||||||
|
*/
|
||||||
|
#define GAUGE 2
|
||||||
|
|
||||||
|
/* buffer points for CPBC boundary */
|
||||||
|
#define CPBC_ghost_width (ghost_width)
|
||||||
|
|
||||||
|
/* using BSSN variable for constraint violation and psi4 calculation: 0
|
||||||
|
using ADM variable for constraint violation and psi4 calculation: 1
|
||||||
|
*/
|
||||||
|
#define ABV 0
|
||||||
|
|
||||||
|
/* Type of Potential and Scalar Distribution in F(R) Scalar-Tensor Theory
|
||||||
|
1: Case C of 1112.3928, V=0
|
||||||
|
2: shell with a2^2*phi0/(1+a2^2), f(R) = R+a2*R^2 induced V
|
||||||
|
3: ground state of Schrodinger-Newton system, f(R) = R+a2*R^2 induced V
|
||||||
|
4: a2 = infinity and phi(r) = phi0 * 0.5 * ( tanh((r+r0)/sigma) - tanh((r-r0)/sigma) )
|
||||||
|
5: shell with phi(r) = phi0*Exp(-(r-r0)**2/sigma), V = 0
|
||||||
|
*/
|
||||||
|
#define EScalar_CC 2
|
||||||
338
AMSS_NCKU_source/extention/include/xh_share_func.h
Normal file
338
AMSS_NCKU_source/extention/include/xh_share_func.h
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
#ifndef SHARE_FUNC_H
|
||||||
|
#define SHARE_FUNC_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <omp.h>
|
||||||
|
/* 主网格:0-based -> 1D */
|
||||||
|
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1];
|
||||||
|
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
|
||||||
|
* ord=2 => shift=1
|
||||||
|
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex)
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 1;
|
||||||
|
const int nx = ex[0] + 2; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 2;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..ex1+1
|
||||||
|
const int jj = jF + shift; // 0..ex2+1
|
||||||
|
const int kk = kF + shift; // 0..ex3+1
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
|
||||||
|
* ord=3 => shift=2
|
||||||
|
* iF/jF/kF 是 Fortran 索引(可为负)
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 2; // ord=3 -> -2..ex
|
||||||
|
const int nx = ex[0] + 3; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 3;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..ex1+2
|
||||||
|
const int jj = jF + shift; // 0..ex2+2
|
||||||
|
const int kk = kF + shift; // 0..ex3+2
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
||||||
|
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
||||||
|
*
|
||||||
|
* C 里我们把:
|
||||||
|
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
|
||||||
|
* funcc 用“平移下标”存为一维数组:
|
||||||
|
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
|
||||||
|
* 总长度 nx = extc1 + ord
|
||||||
|
* 同理 ny = extc2 + ord, nz = extc3 + ord
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
|
||||||
|
const int nx = extc[0], ny = extc[1];
|
||||||
|
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
|
||||||
|
const int shift = ord - 1; // iF = -shift .. extc1
|
||||||
|
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
|
||||||
|
const int ny = extc[1] + ord;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..extc1+shift
|
||||||
|
const int jj = jF + shift; // 0..extc2+shift
|
||||||
|
const int kk = kF + shift; // 0..extc3+shift
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 等价于 Fortran:
|
||||||
|
* funcc(1:extc1,1:extc2,1:extc3)=func
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
|
||||||
|
* enddo
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
|
||||||
|
* enddo
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
||||||
|
* enddo
|
||||||
|
*/
|
||||||
|
static inline void symmetry_bd(int ord,
|
||||||
|
const int extc[3],
|
||||||
|
const double *func,
|
||||||
|
double *funcc,
|
||||||
|
const double SoA[3])
|
||||||
|
{
|
||||||
|
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
|
||||||
|
|
||||||
|
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
|
||||||
|
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
|
||||||
|
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < extc1; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
for (int ii = 0; ii <= ord - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= extc3; ++kF) {
|
||||||
|
for (int jF = 1; jF <= extc2; ++jF) {
|
||||||
|
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
for (int jj = 0; jj <= ord - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= extc3; ++kF) {
|
||||||
|
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||||
|
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
for (int kk = 0; kk <= ord - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -ord + 1; jF <= extc2; ++jF) {
|
||||||
|
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* 你已有的函数:idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
|
||||||
|
static inline void fdderivs_xh(
|
||||||
|
int i0, int j0, int k0,
|
||||||
|
const int ex[3],
|
||||||
|
const double *fh,
|
||||||
|
int iminF, int jminF, int kminF,
|
||||||
|
int imaxF, int jmaxF, int kmaxF,
|
||||||
|
double Fdxdx, double Fdydy, double Fdzdz,
|
||||||
|
double Fdxdy, double Fdxdz, double Fdydz,
|
||||||
|
double Sdxdx, double Sdydy, double Sdzdz,
|
||||||
|
double Sdxdy, double Sdxdz, double Sdydz,
|
||||||
|
double *fxx, double *fxy, double *fxz,
|
||||||
|
double *fyy, double *fyz, double *fzz
|
||||||
|
){
|
||||||
|
const double F8 = 8.0;
|
||||||
|
const double F16 = 16.0;
|
||||||
|
const double F30 = 30.0;
|
||||||
|
const double TWO = 2.0;
|
||||||
|
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* 高阶分支:i±2,j±2,k±2 都在范围内 */
|
||||||
|
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||||
|
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||||
|
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Fdxdx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Fdydy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Fdzdz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
/* fxy 高阶 */
|
||||||
|
{
|
||||||
|
const double t_jm2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jm1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||||
|
|
||||||
|
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fxz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fyz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 二阶分支:i±1,j±1,k±1 在范围内 */
|
||||||
|
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||||
|
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||||
|
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Sdxdx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Sdydy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Sdzdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxy[p] = Sdxdy * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxz[p] = Sdxdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyz[p] = Sdydz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
|
||||||
|
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "share_func.h"
|
#include "xh_share_func.h"
|
||||||
void fdderivs(const int ex[3],
|
void fdderivs(const int ex[3],
|
||||||
const double *f,
|
const double *f,
|
||||||
double *fxx, double *fxy, double *fxz,
|
double *fxx, double *fxy, double *fxz,
|
||||||
1980
AMSS_NCKU_source/extention/src/bssn_rhs copy.c
Normal file
1980
AMSS_NCKU_source/extention/src/bssn_rhs copy.c
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1961
AMSS_NCKU_source/extention/src/bssn_rhs-try.c
Normal file
1961
AMSS_NCKU_source/extention/src/bssn_rhs-try.c
Normal file
File diff suppressed because it is too large
Load Diff
311
AMSS_NCKU_source/extention/src/fdderivs-fast.c
Normal file
311
AMSS_NCKU_source/extention/src/fdderivs-fast.c
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
#include "../include/tool.h"
|
||||||
|
void fdderivs(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
double *fxx, double *fxy, double *fxz,
|
||||||
|
double *fyy, double *fyz, double *fzz,
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
double SYM1, double SYM2, double SYM3,
|
||||||
|
int Symmetry, int onoff)
|
||||||
|
{
|
||||||
|
(void)onoff;
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
||||||
|
const double F1o4 = 2.5e-1; // 1/4
|
||||||
|
const double F8 = 8.0;
|
||||||
|
const double F16 = 16.0;
|
||||||
|
const double F30 = 30.0;
|
||||||
|
const double F1o12 = ONE / 12.0;
|
||||||
|
const double F1o144 = ONE / 144.0;
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
const int imaxF = ex1;
|
||||||
|
const int jmaxF = ex2;
|
||||||
|
const int kmaxF = ex3;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
|
|
||||||
|
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
|
||||||
|
const size_t nx = (size_t)ex1 + 2;
|
||||||
|
const size_t ny = (size_t)ex2 + 2;
|
||||||
|
const size_t nz = (size_t)ex3 + 2;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
/* 系数:按 Fortran 原式 */
|
||||||
|
const double Sdxdx = ONE / (dX * dX);
|
||||||
|
const double Sdydy = ONE / (dY * dY);
|
||||||
|
const double Sdzdz = ONE / (dZ * dZ);
|
||||||
|
|
||||||
|
const double Fdxdx = F1o12 / (dX * dX);
|
||||||
|
const double Fdydy = F1o12 / (dY * dY);
|
||||||
|
const double Fdzdz = F1o12 / (dZ * dZ);
|
||||||
|
|
||||||
|
const double Sdxdy = F1o4 / (dX * dY);
|
||||||
|
const double Sdxdz = F1o4 / (dX * dZ);
|
||||||
|
const double Sdydz = F1o4 / (dY * dZ);
|
||||||
|
|
||||||
|
const double Fdxdy = F1o144 / (dX * dY);
|
||||||
|
const double Fdxdz = F1o144 / (dX * dZ);
|
||||||
|
const double Fdydz = F1o144 / (dY * dZ);
|
||||||
|
|
||||||
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
// symmetry_bd(2, ex, f, fh, SoA);
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 < ex[2]; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < ex[1]; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < ex[0]; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
for (int ii = 0; ii <= 2 - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int jF = 1; jF <= ex[1]; ++jF) {
|
||||||
|
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
for (int jj = 0; jj <= 2 - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
for (int kk = 0; kk <= 2 - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
||||||
|
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
// for (size_t p = 0; p < all; ++p) {
|
||||||
|
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
||||||
|
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
||||||
|
// }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fortran:
|
||||||
|
* do k=1,ex3-1
|
||||||
|
* do j=1,ex2-1
|
||||||
|
* do i=1,ex1-1
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* 高阶分支:i±2,j±2,k±2 都在范围内 */
|
||||||
|
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||||
|
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||||
|
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Fdxdx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Fdydy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Fdzdz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
|
||||||
|
{
|
||||||
|
const double t_jm2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jm1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||||
|
|
||||||
|
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fxz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fyz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 二阶分支:i±1,j±1,k±1 在范围内 */
|
||||||
|
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||||
|
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||||
|
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Sdxdx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Sdydy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Sdzdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxy[p] = Sdxdy * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxz[p] = Sdxdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyz[p] = Sdydz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}else{
|
||||||
|
fxx[p] = 0.0;
|
||||||
|
fyy[p] = 0.0;
|
||||||
|
fzz[p] = 0.0;
|
||||||
|
fxy[p] = 0.0;
|
||||||
|
fxz[p] = 0.0;
|
||||||
|
fyz[p] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// free(fh);
|
||||||
|
}
|
||||||
7
AMSS_NCKU_source/extention/src/main.c
Normal file
7
AMSS_NCKU_source/extention/src/main.c
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
#include "include/bssn_rhs_compute.h"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
// 这里可以写一些测试代码,调用 f_compute_rhs_bssn 来验证它的正确性
|
||||||
|
// 例如,定义一些小的网格和初始条件,调用函数,并检查输出是否合理。
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
65
AMSS_NCKU_source/extention/src/new.c
Normal file
65
AMSS_NCKU_source/extention/src/new.c
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
SoA[0] = SYM, SoA[1] = SYM, SoA[2] = SYM;
|
||||||
|
#pragma omp for collapse(3)
|
||||||
|
for (int k0 = 0; k0 < ex[2]; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < ex[1]; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < ex[0]; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = Lap[idx_func0(i0, j0, k0, ex)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
#pragma omp for collapse(3)
|
||||||
|
for (int ii = 0; ii <= 2 - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int jF = 1; jF <= ex[1]; ++jF) {
|
||||||
|
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
#pragma omp for collapse(3)
|
||||||
|
for (int jj = 0; jj <= 2 - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
#pragma omp for collapse(3)
|
||||||
|
for (int kk = 0; kk <= 2 - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp for collapse(3)
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
fdderivs_xh(i0, j0, k0, ex, fh, iminF, jminF, kminF, ex1, ex2, ex3,
|
||||||
|
Fdxdx, Fdydy, Fdzdz, Fdxdy, Fdxdz, Fdydz,
|
||||||
|
Sdxdx, Sdydy, Sdzdz, Sdxdy, Sdxdz, Sdydz,
|
||||||
|
fxx,fxy,fxz,fyy,fyz,fzz
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1980
AMSS_NCKU_source/extention/src/xh_bssn_rhs.c
Normal file
1980
AMSS_NCKU_source/extention/src/xh_bssn_rhs.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
|||||||
#include "tool.h"
|
#include "xh_tool.h"
|
||||||
void fdderivs(const int ex[3],
|
void fdderivs(const int ex[3],
|
||||||
const double *f,
|
const double *f,
|
||||||
double *fxx, double *fxy, double *fxz,
|
double *fxx, double *fxy, double *fxz,
|
||||||
@@ -8,7 +8,6 @@ void fdderivs(const int ex[3],
|
|||||||
int Symmetry, int onoff)
|
int Symmetry, int onoff)
|
||||||
{
|
{
|
||||||
(void)onoff;
|
(void)onoff;
|
||||||
|
|
||||||
const int NO_SYMM = 0, EQ_SYMM = 1;
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
||||||
const double F1o4 = 2.5e-1; // 1/4
|
const double F1o4 = 2.5e-1; // 1/4
|
||||||
@@ -33,7 +32,6 @@ void fdderivs(const int ex[3],
|
|||||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
|
||||||
|
|
||||||
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
|
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
|
||||||
const size_t nx = (size_t)ex1 + 2;
|
const size_t nx = (size_t)ex1 + 2;
|
||||||
@@ -41,19 +39,6 @@ void fdderivs(const int ex[3],
|
|||||||
const size_t nz = (size_t)ex3 + 2;
|
const size_t nz = (size_t)ex3 + 2;
|
||||||
const size_t fh_size = nx * ny * nz;
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
static double *fh = NULL;
|
|
||||||
static size_t cap = 0;
|
|
||||||
|
|
||||||
if (fh_size > cap) {
|
|
||||||
free(fh);
|
|
||||||
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
|
||||||
cap = fh_size;
|
|
||||||
}
|
|
||||||
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
|
||||||
if (!fh) return;
|
|
||||||
|
|
||||||
symmetry_bd(2, ex, f, fh, SoA);
|
|
||||||
|
|
||||||
/* 系数:按 Fortran 原式 */
|
/* 系数:按 Fortran 原式 */
|
||||||
const double Sdxdx = ONE / (dX * dX);
|
const double Sdxdx = ONE / (dX * dX);
|
||||||
const double Sdydy = ONE / (dY * dY);
|
const double Sdydy = ONE / (dY * dY);
|
||||||
@@ -71,12 +56,71 @@ void fdderivs(const int ex[3],
|
|||||||
const double Fdxdz = F1o144 / (dX * dZ);
|
const double Fdxdz = F1o144 / (dX * dZ);
|
||||||
const double Fdydz = F1o144 / (dY * dZ);
|
const double Fdydz = F1o144 / (dY * dZ);
|
||||||
|
|
||||||
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
static thread_local double *fh = NULL;
|
||||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
static thread_local size_t cap = 0;
|
||||||
for (size_t p = 0; p < all; ++p) {
|
|
||||||
fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
if (fh_size > cap) {
|
||||||
fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
}
|
}
|
||||||
|
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
// symmetry_bd(2, ex, f, fh, SoA);
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 < ex[2]; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < ex[1]; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < ex[0]; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
for (int ii = 0; ii <= 2 - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int jF = 1; jF <= ex[1]; ++jF) {
|
||||||
|
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
for (int jj = 0; jj <= 2 - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
for (int kk = 0; kk <= 2 - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
||||||
|
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
// for (size_t p = 0; p < all; ++p) {
|
||||||
|
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
||||||
|
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
||||||
|
// }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fortran:
|
* Fortran:
|
||||||
@@ -263,6 +307,5 @@ void fdderivs(const int ex[3],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// free(fh);
|
// free(fh);
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "tool.h"
|
#include "xh_tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* C 版 fderivs
|
* C 版 fderivs
|
||||||
@@ -32,11 +32,6 @@ void fderivs(const int ex[3],
|
|||||||
const double dY = Y[1] - Y[0];
|
const double dY = Y[1] - Y[0];
|
||||||
const double dZ = Z[1] - Z[0];
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
// Fortran 1-based bounds
|
|
||||||
const int imaxF = ex1;
|
|
||||||
const int jmaxF = ex2;
|
|
||||||
const int kmaxF = ex3;
|
|
||||||
|
|
||||||
int iminF = 1, jminF = 1, kminF = 1;
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
@@ -50,8 +45,8 @@ void fderivs(const int ex[3],
|
|||||||
const size_t ny = (size_t)ex2 + 2;
|
const size_t ny = (size_t)ex2 + 2;
|
||||||
const size_t nz = (size_t)ex3 + 2;
|
const size_t nz = (size_t)ex3 + 2;
|
||||||
const size_t fh_size = nx * ny * nz;
|
const size_t fh_size = nx * ny * nz;
|
||||||
static double *fh = NULL;
|
static thread_local double *fh = NULL;
|
||||||
static size_t cap = 0;
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
if (fh_size > cap) {
|
if (fh_size > cap) {
|
||||||
free(fh);
|
free(fh);
|
||||||
@@ -97,9 +92,9 @@ void fderivs(const int ex[3],
|
|||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
|
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
|
||||||
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
|
||||||
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
|
||||||
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
(kF + 2) <= ex3 && (kF - 2) >= kminF)
|
||||||
{
|
{
|
||||||
fx[p] = d12dx * (
|
fx[p] = d12dx * (
|
||||||
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||||
@@ -123,9 +118,9 @@ void fderivs(const int ex[3],
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
// elseif(i+1 <= imax .and. i-1 >= imin ...)
|
// elseif(i+1 <= imax .and. i-1 >= imin ...)
|
||||||
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
|
||||||
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
|
||||||
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
(kF + 1) <= ex3 && (kF - 1) >= kminF)
|
||||||
{
|
{
|
||||||
fx[p] = d2dx * (
|
fx[p] = d2dx * (
|
||||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "tool.h"
|
#include "xh_tool.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* C 版 kodis
|
* C 版 kodis
|
||||||
@@ -48,7 +48,14 @@ void kodis(const int ex[3],
|
|||||||
const size_t nz = (size_t)ex3 + 3;
|
const size_t nz = (size_t)ex3 + 3;
|
||||||
const size_t fh_size = nx * ny * nz;
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
if (!fh) return;
|
if (!fh) return;
|
||||||
|
|
||||||
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
@@ -105,5 +112,5 @@ void kodis(const int ex[3],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(fh);
|
// free(fh);
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "tool.h"
|
#include "xh_tool.h"
|
||||||
/*
|
/*
|
||||||
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
|
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
|
||||||
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
@@ -60,7 +60,14 @@ void lopsided(const int ex[3],
|
|||||||
const size_t nz = (size_t)ex3 + 3;
|
const size_t nz = (size_t)ex3 + 3;
|
||||||
const size_t fh_size = nx * ny * nz;
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
double *fh = (double*)malloc(fh_size * sizeof(double));
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
|
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
|
||||||
|
|
||||||
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
@@ -246,7 +253,7 @@ void lopsided(const int ex[3],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(fh);
|
// free(fh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -883,17 +883,13 @@ subroutine symmetry_bd(ord,extc,func,funcc,SoA)
|
|||||||
|
|
||||||
integer::i
|
integer::i
|
||||||
|
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
funcc(1:extc(1),1:extc(2),1:extc(3)) = func
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
|
funcc(-i,1:extc(2),1:extc(3)) = funcc(i+1,1:extc(2),1:extc(3))*SoA(1)
|
||||||
enddo
|
enddo
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
|
funcc(:,-i,1:extc(3)) = funcc(:,i+1,1:extc(3))*SoA(2)
|
||||||
enddo
|
enddo
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
do i=0,ord-1
|
do i=0,ord-1
|
||||||
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
||||||
enddo
|
enddo
|
||||||
@@ -1116,7 +1112,6 @@ end subroutine d2dump
|
|||||||
! Lagrangian polynomial interpolation
|
! Lagrangian polynomial interpolation
|
||||||
!------------------------------------------------------------------------------
|
!------------------------------------------------------------------------------
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES FORCEINLINE :: polint
|
|
||||||
subroutine polint(xa, ya, x, y, dy, ordn)
|
subroutine polint(xa, ya, x, y, dy, ordn)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
|
|||||||
@@ -1,107 +0,0 @@
|
|||||||
#include "interp_lb_profile.h"
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cstring>
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
namespace InterpLBProfile {
|
|
||||||
|
|
||||||
bool write_profile(const char *filepath, int nprocs,
|
|
||||||
const double *rank_times,
|
|
||||||
const int *heavy_ranks, int num_heavy,
|
|
||||||
double threshold_ratio)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(filepath, "wb");
|
|
||||||
if (!fp) return false;
|
|
||||||
|
|
||||||
ProfileHeader hdr;
|
|
||||||
hdr.magic = MAGIC;
|
|
||||||
hdr.version = VERSION;
|
|
||||||
hdr.nprocs = nprocs;
|
|
||||||
hdr.num_heavy = num_heavy;
|
|
||||||
hdr.threshold_ratio = threshold_ratio;
|
|
||||||
|
|
||||||
fwrite(&hdr, sizeof(hdr), 1, fp);
|
|
||||||
fwrite(rank_times, sizeof(double), nprocs, fp);
|
|
||||||
fwrite(heavy_ranks, sizeof(int), num_heavy, fp);
|
|
||||||
fclose(fp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool read_profile(const char *filepath, int current_nprocs,
|
|
||||||
int *heavy_ranks, int &num_heavy,
|
|
||||||
double *rank_times, MPI_Comm comm)
|
|
||||||
{
|
|
||||||
int myrank;
|
|
||||||
MPI_Comm_rank(comm, &myrank);
|
|
||||||
|
|
||||||
int valid = 0;
|
|
||||||
ProfileHeader hdr;
|
|
||||||
memset(&hdr, 0, sizeof(hdr));
|
|
||||||
|
|
||||||
if (myrank == 0) {
|
|
||||||
FILE *fp = fopen(filepath, "rb");
|
|
||||||
if (fp) {
|
|
||||||
if (fread(&hdr, sizeof(hdr), 1, fp) == 1 &&
|
|
||||||
hdr.magic == MAGIC && hdr.version == VERSION &&
|
|
||||||
hdr.nprocs == current_nprocs)
|
|
||||||
{
|
|
||||||
if (fread(rank_times, sizeof(double), current_nprocs, fp)
|
|
||||||
== (size_t)current_nprocs &&
|
|
||||||
fread(heavy_ranks, sizeof(int), hdr.num_heavy, fp)
|
|
||||||
== (size_t)hdr.num_heavy)
|
|
||||||
{
|
|
||||||
num_heavy = hdr.num_heavy;
|
|
||||||
valid = 1;
|
|
||||||
}
|
|
||||||
} else if (fp) {
|
|
||||||
printf("[InterpLB] Profile rejected: magic=0x%X version=%u "
|
|
||||||
"nprocs=%d (current=%d)\n",
|
|
||||||
hdr.magic, hdr.version, hdr.nprocs, current_nprocs);
|
|
||||||
}
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Bcast(&valid, 1, MPI_INT, 0, comm);
|
|
||||||
if (!valid) return false;
|
|
||||||
|
|
||||||
MPI_Bcast(&num_heavy, 1, MPI_INT, 0, comm);
|
|
||||||
MPI_Bcast(heavy_ranks, num_heavy, MPI_INT, 0, comm);
|
|
||||||
MPI_Bcast(rank_times, current_nprocs, MPI_DOUBLE, 0, comm);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int identify_heavy_ranks(const double *rank_times, int nprocs,
|
|
||||||
double threshold_ratio,
|
|
||||||
int *heavy_ranks, int max_heavy)
|
|
||||||
{
|
|
||||||
double sum = 0;
|
|
||||||
for (int i = 0; i < nprocs; i++) sum += rank_times[i];
|
|
||||||
double mean = sum / nprocs;
|
|
||||||
double threshold = threshold_ratio * mean;
|
|
||||||
|
|
||||||
// Collect candidates
|
|
||||||
struct RankTime { int rank; double time; };
|
|
||||||
RankTime *candidates = new RankTime[nprocs];
|
|
||||||
int ncand = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < nprocs; i++) {
|
|
||||||
if (rank_times[i] > threshold)
|
|
||||||
candidates[ncand++] = {i, rank_times[i]};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort descending by time
|
|
||||||
std::sort(candidates, candidates + ncand,
|
|
||||||
[](const RankTime &a, const RankTime &b) {
|
|
||||||
return a.time > b.time;
|
|
||||||
});
|
|
||||||
|
|
||||||
int count = (ncand < max_heavy) ? ncand : max_heavy;
|
|
||||||
for (int i = 0; i < count; i++)
|
|
||||||
heavy_ranks[i] = candidates[i].rank;
|
|
||||||
|
|
||||||
delete[] candidates;
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace InterpLBProfile
|
|
||||||
Binary file not shown.
@@ -1,38 +0,0 @@
|
|||||||
#ifndef INTERP_LB_PROFILE_H
|
|
||||||
#define INTERP_LB_PROFILE_H
|
|
||||||
|
|
||||||
#include <mpi.h>
|
|
||||||
|
|
||||||
namespace InterpLBProfile {
|
|
||||||
|
|
||||||
static const unsigned int MAGIC = 0x494C4250; // "ILBP"
|
|
||||||
static const unsigned int VERSION = 1;
|
|
||||||
|
|
||||||
struct ProfileHeader {
|
|
||||||
unsigned int magic;
|
|
||||||
unsigned int version;
|
|
||||||
int nprocs;
|
|
||||||
int num_heavy;
|
|
||||||
double threshold_ratio;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Write profile file (rank 0 only)
|
|
||||||
bool write_profile(const char *filepath, int nprocs,
|
|
||||||
const double *rank_times,
|
|
||||||
const int *heavy_ranks, int num_heavy,
|
|
||||||
double threshold_ratio);
|
|
||||||
|
|
||||||
// Read profile file (rank 0 reads, then broadcasts to all)
|
|
||||||
// Returns true if file found and valid for current nprocs
|
|
||||||
bool read_profile(const char *filepath, int current_nprocs,
|
|
||||||
int *heavy_ranks, int &num_heavy,
|
|
||||||
double *rank_times, MPI_Comm comm);
|
|
||||||
|
|
||||||
// Identify heavy ranks: those with time > threshold_ratio * mean
|
|
||||||
int identify_heavy_ranks(const double *rank_times, int nprocs,
|
|
||||||
double threshold_ratio,
|
|
||||||
int *heavy_ranks, int max_heavy);
|
|
||||||
|
|
||||||
} // namespace InterpLBProfile
|
|
||||||
|
|
||||||
#endif /* INTERP_LB_PROFILE_H */
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
/* Auto-generated from interp_lb_profile.bin — do not edit */
|
|
||||||
#ifndef INTERP_LB_PROFILE_DATA_H
|
|
||||||
#define INTERP_LB_PROFILE_DATA_H
|
|
||||||
|
|
||||||
#define INTERP_LB_NPROCS 64
|
|
||||||
#define INTERP_LB_NUM_HEAVY 4
|
|
||||||
|
|
||||||
static const int interp_lb_heavy_blocks[4] = {27, 35, 28, 36};
|
|
||||||
|
|
||||||
/* Split table: {block_id, r_left, r_right} */
|
|
||||||
static const int interp_lb_splits[4][3] = {
|
|
||||||
{27, 26, 27},
|
|
||||||
{35, 34, 35},
|
|
||||||
{28, 28, 29},
|
|
||||||
{36, 36, 37},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Rank remap for displaced neighbor blocks */
|
|
||||||
static const int interp_lb_num_remaps = 4;
|
|
||||||
static const int interp_lb_remaps[][2] = {
|
|
||||||
{26, 25},
|
|
||||||
{29, 30},
|
|
||||||
{34, 33},
|
|
||||||
{37, 38},
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* INTERP_LB_PROFILE_DATA_H */
|
|
||||||
@@ -65,8 +65,6 @@ real*8,intent(in) :: eps
|
|||||||
! dx^4
|
! dx^4
|
||||||
|
|
||||||
! note the sign (-1)^r-1, now r=2
|
! note the sign (-1)^r-1, now r=2
|
||||||
!DIR$ SIMD VECTORLENGTHFOR(KNOWN_INTEGER=8)
|
|
||||||
!DIR$ UNROLL PARTIAL(4)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|||||||
@@ -487,201 +487,6 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
|
|||||||
|
|
||||||
end subroutine lopsided
|
end subroutine lopsided
|
||||||
|
|
||||||
!-----------------------------------------------------------------------------
|
|
||||||
! Combined advection (lopsided) + Kreiss-Oliger dissipation (kodis)
|
|
||||||
! Shares the symmetry_bd buffer fh, eliminating one full-grid copy per call.
|
|
||||||
! Mathematically identical to calling lopsided then kodis separately.
|
|
||||||
!-----------------------------------------------------------------------------
|
|
||||||
subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps)
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
!~~~~~~> Input parameters:
|
|
||||||
|
|
||||||
integer, intent(in) :: ex(1:3),Symmetry
|
|
||||||
real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
|
|
||||||
real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz
|
|
||||||
|
|
||||||
real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs
|
|
||||||
real*8,dimension(3),intent(in) ::SoA
|
|
||||||
real*8,intent(in) :: eps
|
|
||||||
|
|
||||||
!~~~~~~> local variables:
|
|
||||||
! note index -2,-1,0, so we have 3 extra points
|
|
||||||
real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh
|
|
||||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
|
||||||
real*8 :: dX,dY,dZ
|
|
||||||
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
|
||||||
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0
|
|
||||||
real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1
|
|
||||||
real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0
|
|
||||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
|
||||||
! kodis parameters
|
|
||||||
real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1
|
|
||||||
real*8, parameter :: cof=6.4d1 ! 2^6
|
|
||||||
|
|
||||||
dX = X(2)-X(1)
|
|
||||||
dY = Y(2)-Y(1)
|
|
||||||
dZ = Z(2)-Z(1)
|
|
||||||
|
|
||||||
d12dx = ONE/F12/dX
|
|
||||||
d12dy = ONE/F12/dY
|
|
||||||
d12dz = ONE/F12/dZ
|
|
||||||
|
|
||||||
d2dx = ONE/TWO/dX
|
|
||||||
d2dy = ONE/TWO/dY
|
|
||||||
d2dz = ONE/TWO/dZ
|
|
||||||
|
|
||||||
imax = ex(1)
|
|
||||||
jmax = ex(2)
|
|
||||||
kmax = ex(3)
|
|
||||||
|
|
||||||
imin = 1
|
|
||||||
jmin = 1
|
|
||||||
kmin = 1
|
|
||||||
if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2
|
|
||||||
if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2
|
|
||||||
if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2
|
|
||||||
|
|
||||||
! Single symmetry_bd call shared by both advection and dissipation
|
|
||||||
call symmetry_bd(3,ex,f,fh,SoA)
|
|
||||||
|
|
||||||
! ---- Advection (lopsided) loop ----
|
|
||||||
! upper bound set ex-1 only for efficiency,
|
|
||||||
! the loop body will set ex 0 also
|
|
||||||
do k=1,ex(3)-1
|
|
||||||
do j=1,ex(2)-1
|
|
||||||
do i=1,ex(1)-1
|
|
||||||
! x direction
|
|
||||||
if(Sfx(i,j,k) > ZEO)then
|
|
||||||
if(i+3 <= imax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
|
||||||
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
|
|
||||||
elseif(i+2 <= imax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
|
||||||
|
|
||||||
elseif(i+1 <= imax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
|
||||||
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
|
|
||||||
endif
|
|
||||||
elseif(Sfx(i,j,k) < ZEO)then
|
|
||||||
if(i-3 >= imin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) &
|
|
||||||
-F6*fh(i-2,j,k)+ fh(i-3,j,k))
|
|
||||||
elseif(i-2 >= imin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
|
||||||
|
|
||||||
elseif(i-1 >= imin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) &
|
|
||||||
-F6*fh(i+2,j,k)+ fh(i+3,j,k))
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
! y direction
|
|
||||||
if(Sfy(i,j,k) > ZEO)then
|
|
||||||
if(j+3 <= jmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
|
||||||
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
|
|
||||||
elseif(j+2 <= jmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
|
||||||
|
|
||||||
elseif(j+1 <= jmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
|
||||||
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
|
|
||||||
endif
|
|
||||||
elseif(Sfy(i,j,k) < ZEO)then
|
|
||||||
if(j-3 >= jmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) &
|
|
||||||
-F6*fh(i,j-2,k)+ fh(i,j-3,k))
|
|
||||||
elseif(j-2 >= jmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
|
||||||
|
|
||||||
elseif(j-1 >= jmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) &
|
|
||||||
-F6*fh(i,j+2,k)+ fh(i,j+3,k))
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
! z direction
|
|
||||||
if(Sfz(i,j,k) > ZEO)then
|
|
||||||
if(k+3 <= kmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
|
||||||
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
|
||||||
elseif(k+2 <= kmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
|
||||||
|
|
||||||
elseif(k+1 <= kmax)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
|
||||||
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
|
|
||||||
endif
|
|
||||||
elseif(Sfz(i,j,k) < ZEO)then
|
|
||||||
if(k-3 >= kmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)- &
|
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) &
|
|
||||||
-F6*fh(i,j,k-2)+ fh(i,j,k-3))
|
|
||||||
elseif(k-2 >= kmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
|
||||||
|
|
||||||
elseif(k-1 >= kmin)then
|
|
||||||
f_rhs(i,j,k)=f_rhs(i,j,k)+ &
|
|
||||||
Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) &
|
|
||||||
-F6*fh(i,j,k+2)+ fh(i,j,k+3))
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
|
|
||||||
! ---- Dissipation (kodis) loop ----
|
|
||||||
if(eps > ZEO) then
|
|
||||||
do k=1,ex(3)
|
|
||||||
do j=1,ex(2)
|
|
||||||
do i=1,ex(1)
|
|
||||||
|
|
||||||
if(i-3 >= imin .and. i+3 <= imax .and. &
|
|
||||||
j-3 >= jmin .and. j+3 <= jmax .and. &
|
|
||||||
k-3 >= kmin .and. k+3 <= kmax) then
|
|
||||||
f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( &
|
|
||||||
(fh(i-3,j,k)+fh(i+3,j,k)) - &
|
|
||||||
SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + &
|
|
||||||
FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - &
|
|
||||||
TWT* fh(i,j,k) )/dX + &
|
|
||||||
( &
|
|
||||||
(fh(i,j-3,k)+fh(i,j+3,k)) - &
|
|
||||||
SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + &
|
|
||||||
FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - &
|
|
||||||
TWT* fh(i,j,k) )/dY + &
|
|
||||||
( &
|
|
||||||
(fh(i,j,k-3)+fh(i,j,k+3)) - &
|
|
||||||
SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + &
|
|
||||||
FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - &
|
|
||||||
TWT* fh(i,j,k) )/dZ )
|
|
||||||
endif
|
|
||||||
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
endif
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
end subroutine lopsided_kodis
|
|
||||||
|
|
||||||
#elif (ghost_width == 4)
|
#elif (ghost_width == 4)
|
||||||
! sixth order code
|
! sixth order code
|
||||||
! Compute advection terms in right hand sides of field equations
|
! Compute advection terms in right hand sides of field equations
|
||||||
|
|||||||
@@ -1,77 +1,83 @@
|
|||||||
|
|
||||||
#define tetradtype 2
|
|
||||||
|
#if 0
|
||||||
#define Cell
|
note here
|
||||||
|
v:r; u: phi; w: theta
|
||||||
#define ghost_width 3
|
tetradtype 0
|
||||||
|
v^a = (x,y,z)
|
||||||
|
orthonormal order: v,u,w
|
||||||
|
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
||||||
#define GAUGE 0
|
tetradtype 1
|
||||||
|
orthonormal order: w,u,v
|
||||||
#define CPBC_ghost_width (ghost_width)
|
m = (theta + i phi)/sqrt(2) following Sperhake, Eq.(3.2) of PRD 85, 124062(2012)
|
||||||
|
tetradtype 2
|
||||||
#define ABV 0
|
v_a = (x,y,z)
|
||||||
|
orthonormal order: v,u,w
|
||||||
#define EScalar_CC 2
|
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
||||||
|
#endif
|
||||||
#if 0
|
#define tetradtype 2
|
||||||
|
|
||||||
define tetradtype
|
#if 0
|
||||||
v:r; u: phi; w: theta
|
note here
|
||||||
tetradtype 0
|
Cell center or Vertex center
|
||||||
v^a = (x,y,z)
|
#endif
|
||||||
orthonormal order: v,u,w
|
#define Cell
|
||||||
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
|
||||||
tetradtype 1
|
#if 0
|
||||||
orthonormal order: w,u,v
|
note here
|
||||||
m = (theta + i phi)/sqrt(2) following Sperhake, Eq.(3.2) of PRD 85, 124062(2012)
|
2nd order: 2
|
||||||
tetradtype 2
|
4th order: 3
|
||||||
v_a = (x,y,z)
|
6th order: 4
|
||||||
orthonormal order: v,u,w
|
8th order: 5
|
||||||
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
|
#endif
|
||||||
|
#define ghost_width 3
|
||||||
define Cell or Vertex
|
|
||||||
Cell center or Vertex center
|
#if 0
|
||||||
|
note here
|
||||||
define ghost_width
|
use shell or not
|
||||||
2nd order: 2
|
#endif
|
||||||
4th order: 3
|
#define WithShell
|
||||||
6th order: 4
|
|
||||||
8th order: 5
|
#if 0
|
||||||
|
note here
|
||||||
define WithShell
|
use constraint preserving boundary condition or not
|
||||||
use shell or not
|
only affect Z4c
|
||||||
|
#endif
|
||||||
define CPBC
|
#define CPBC
|
||||||
use constraint preserving boundary condition or not
|
|
||||||
only affect Z4c
|
#if 0
|
||||||
CPBC only supports WithShell
|
note here
|
||||||
|
Gauge condition type
|
||||||
define GAUGE
|
0: B^i gauge
|
||||||
0: B^i gauge
|
1: David's puncture gauge
|
||||||
1: David puncture gauge
|
2: MB B^i gauge
|
||||||
2: MB B^i gauge
|
3: RIT B^i gauge
|
||||||
3: RIT B^i gauge
|
4: MB beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
||||||
4: MB beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
5: RIT beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
||||||
5: RIT beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
|
6: MGB1 B^i gauge
|
||||||
6: MGB1 B^i gauge
|
7: MGB2 B^i gauge
|
||||||
7: MGB2 B^i gauge
|
#endif
|
||||||
|
#define GAUGE 2
|
||||||
define CPBC_ghost_width (ghost_width)
|
|
||||||
buffer points for CPBC boundary
|
#if 0
|
||||||
|
buffer points for CPBC boundary
|
||||||
define ABV
|
#endif
|
||||||
0: using BSSN variable for constraint violation and psi4 calculation
|
#define CPBC_ghost_width (ghost_width)
|
||||||
1: using ADM variable for constraint violation and psi4 calculation
|
|
||||||
|
#if 0
|
||||||
define EScalar_CC
|
using BSSN variable for constraint violation and psi4 calculation: 0
|
||||||
Type of Potential and Scalar Distribution in F(R) Scalar-Tensor Theory
|
using ADM variable for constraint violation and psi4 calculation: 1
|
||||||
1: Case C of 1112.3928, V=0
|
#endif
|
||||||
2: shell with phi(r) = phi0 * a2^2/(1+a2^2), f(R) = R+a2*R^2 induced V
|
#define ABV 0
|
||||||
3: ground state of Schrodinger-Newton system, f(R) = R+a2*R^2 induced V
|
|
||||||
4: a2 = +oo and phi(r) = phi0 * 0.5 * ( tanh((r+r0)/sigma) - tanh((r-r0)/sigma) )
|
#if 0
|
||||||
5: shell with phi(r) = phi0 * Exp(-(r-r0)**2/sigma), V = 0
|
Type of Potential and Scalar Distribution in F(R) Scalar-Tensor Theory
|
||||||
|
1: Case C of 1112.3928, V=0
|
||||||
#endif
|
2: shell with a2^2*phi0/(1+a2^2), f(R) = R+a2*R^2 induced V
|
||||||
|
3: ground state of Schrodinger-Newton system, f(R) = R+a2*R^2 induced V
|
||||||
|
4: a2 = oo and phi(r) = phi0 * 0.5 * ( tanh((r+r0)/sigma) - tanh((r-r0)/sigma) )
|
||||||
|
5: shell with phi(r) = phi0*Exp(-(r-r0)**2/sigma), V = 0
|
||||||
|
#endif
|
||||||
|
#define EScalar_CC 2
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,145 +1,112 @@
|
|||||||
|
|
||||||
#ifndef MICRODEF_H
|
#ifndef MICRODEF_H
|
||||||
#define MICRODEF_H
|
#define MICRODEF_H
|
||||||
|
|
||||||
#include "macrodef.fh"
|
#include "macrodef.fh"
|
||||||
|
|
||||||
// application parameters
|
// application parameters
|
||||||
|
|
||||||
#define SommerType 0
|
/// ****
|
||||||
|
// sommerfeld boundary type
|
||||||
#define GaussInt
|
// 0: bam, 1: shibata
|
||||||
|
#define SommerType 0
|
||||||
#define ABEtype 0
|
|
||||||
|
/// ****
|
||||||
//#define With_AHF
|
// for Using Gauss-Legendre quadrature in theta direction
|
||||||
#define Psi4type 0
|
#define GaussInt
|
||||||
|
|
||||||
//#define Point_Psi4
|
/// ****
|
||||||
|
// 0: BSSN vacuum
|
||||||
#define RPS 1
|
// 1: coupled to scalar field
|
||||||
|
// 2: Z4c vacuum
|
||||||
#define AGM 0
|
// 3: coupled to Maxwell field
|
||||||
|
//
|
||||||
#define RPB 0
|
#define ABEtype 2
|
||||||
|
|
||||||
#define MAPBH 1
|
/// ****
|
||||||
|
// using Apparent Horizon Finder
|
||||||
#define PSTR 0
|
//#define With_AHF
|
||||||
|
|
||||||
#define REGLEV 0
|
/// ****
|
||||||
|
// Psi4 calculation method
|
||||||
//#define USE_GPU
|
// 0: EB method
|
||||||
|
// 1: 4-D method
|
||||||
//#define CHECKDETAIL
|
//
|
||||||
|
#define Psi4type 0
|
||||||
//#define FAKECHECK
|
|
||||||
|
/// ****
|
||||||
//
|
// for Using point psi4 or not
|
||||||
// define SommerType
|
//#define Point_Psi4
|
||||||
// sommerfeld boundary type
|
|
||||||
// 0: bam
|
/// ****
|
||||||
// 1: shibata
|
// RestrictProlong in Step (0) or after Step (1)
|
||||||
//
|
#define RPS 1
|
||||||
// define GaussInt
|
|
||||||
// for Using Gauss-Legendre quadrature in theta direction
|
/// ****
|
||||||
//
|
// Enforce algebra constraint
|
||||||
// define ABEtype
|
// for every RK4 sub step: 0
|
||||||
// 0: BSSN vacuum
|
// only when iter_count == 3: 1
|
||||||
// 1: coupled to scalar field
|
// after routine Step: 2
|
||||||
// 2: Z4c vacuum
|
#define AGM 0
|
||||||
// 3: coupled to Maxwell field
|
|
||||||
//
|
/// ****
|
||||||
// define With_AHF
|
// Restrict Prolong using BAM style 1 or old style 0
|
||||||
// using Apparent Horizon Finder
|
#define RPB 0
|
||||||
//
|
|
||||||
// define Psi4type
|
/// ****
|
||||||
// Psi4 calculation method
|
// 1: move Analysis out ot 4 sub steps and treat PBH with Euler method
|
||||||
// 0: EB method
|
#define MAPBH 1
|
||||||
// 1: 4-D method
|
|
||||||
//
|
/// ****
|
||||||
// define Point_Psi4
|
// parallel structure, 0: level by level, 1: considering all levels, 2: as 1 but reverse the CPU order, 3: Frank's scheme
|
||||||
// for Using point psi4 or not
|
#define PSTR 0
|
||||||
//
|
|
||||||
// define RPS
|
/// ****
|
||||||
// RestrictProlong in Step (0) or after Step (1)
|
// regrid for every level or for all levels at a time
|
||||||
//
|
// 0: for every level; 1: for all
|
||||||
// define AGM
|
#define REGLEV 0
|
||||||
// Enforce algebra constraint
|
|
||||||
// for every RK4 sub step: 0
|
/// ****
|
||||||
// only when iter_count == 3: 1
|
// use gpu or not
|
||||||
// after routine Step: 2
|
//#define USE_GPU
|
||||||
//
|
|
||||||
// define RPB
|
/// ****
|
||||||
// Restrict Prolong using BAM style 1 or old style 0
|
// use checkpoint for every process
|
||||||
//
|
//#define CHECKDETAIL
|
||||||
// define MAPBH
|
|
||||||
// 1: move Analysis out ot 4 sub steps and treat PBH with Euler method
|
/// ****
|
||||||
//
|
// use FakeCheckPrepare to write CheckPoint
|
||||||
// define PSTR
|
//#define FAKECHECK
|
||||||
// parallel structure
|
////================================================================
|
||||||
// 0: level by level
|
// some basic parameters for numerical calculation
|
||||||
// 1: considering all levels
|
#define dim 3
|
||||||
// 2: as 1 but reverse the CPU order
|
|
||||||
// 3: Frank's scheme
|
//#define Cell or Vertex in "microdef.fh"
|
||||||
//
|
|
||||||
// define REGLEV
|
// ******
|
||||||
// regrid for every level or for all levels at a time
|
// buffer point number for mesh refinement interface
|
||||||
// 0: for every level;
|
#define buffer_width 6
|
||||||
// 1: for all
|
|
||||||
//
|
// ******
|
||||||
// define USE_GPU
|
// buffer point number shell-box interface, on shell
|
||||||
// use gpu or not
|
#define SC_width buffer_width
|
||||||
//
|
// buffer point number shell-box interface, on box
|
||||||
// define CHECKDETAIL
|
#define CS_width (2*buffer_width)
|
||||||
// use checkpoint for every process
|
|
||||||
//
|
#if(buffer_width < ghost_width)
|
||||||
// define FAKECHECK
|
#error we always assume buffer_width>ghost_width
|
||||||
// use FakeCheckPrepare to write CheckPoint
|
#endif
|
||||||
//
|
|
||||||
|
#define PACK 1
|
||||||
////================================================================
|
#define UNPACK 2
|
||||||
// some basic parameters for numerical calculation
|
|
||||||
////================================================================
|
#define Mymax(a,b) (((a) > (b)) ? (a) : (b))
|
||||||
|
#define Mymin(a,b) (((a) < (b)) ? (a) : (b))
|
||||||
#define dim 3
|
|
||||||
|
#define feq(a,b,d) (fabs(a-b)<d)
|
||||||
//#define Cell or Vertex in "macrodef.fh"
|
#define flt(a,b,d) ((a-b)<d)
|
||||||
|
#define fgt(a,b,d) ((a-b)>d)
|
||||||
#define buffer_width 6
|
|
||||||
|
#define TINY 1e-10
|
||||||
#define SC_width buffer_width
|
|
||||||
|
#endif /* MICRODEF_H */
|
||||||
#define CS_width (2*buffer_width)
|
|
||||||
|
|
||||||
//
|
|
||||||
// define Cell or Vertex in "macrodef.fh"
|
|
||||||
//
|
|
||||||
// define buffer_width
|
|
||||||
// buffer point number for mesh refinement interface
|
|
||||||
//
|
|
||||||
// define SC_width buffer_width
|
|
||||||
// buffer point number shell-box interface, on shell
|
|
||||||
//
|
|
||||||
// define CS_width
|
|
||||||
// buffer point number shell-box interface, on box
|
|
||||||
//
|
|
||||||
|
|
||||||
#if(buffer_width < ghost_width)
|
|
||||||
# error we always assume buffer_width>ghost_width
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define PACK 1
|
|
||||||
#define UNPACK 2
|
|
||||||
|
|
||||||
#define Mymax(a,b) (((a) > (b)) ? (a) : (b))
|
|
||||||
#define Mymin(a,b) (((a) < (b)) ? (a) : (b))
|
|
||||||
|
|
||||||
#define feq(a,b,d) (fabs(a-b)<d)
|
|
||||||
#define flt(a,b,d) ((a-b)<d)
|
|
||||||
#define fgt(a,b,d) ((a-b)>d)
|
|
||||||
|
|
||||||
#define TINY 1e-10
|
|
||||||
|
|
||||||
#endif /* MICRODEF_H */
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,34 +2,13 @@
|
|||||||
|
|
||||||
include makefile.inc
|
include makefile.inc
|
||||||
|
|
||||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
|
||||||
## make -> opt (PGO-guided, maximum performance)
|
|
||||||
## make PGO_MODE=instrument -> instrument (Phase 1: collect fresh profile data)
|
|
||||||
PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
|
||||||
|
|
||||||
ifeq ($(PGO_MODE),instrument)
|
|
||||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
|
||||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
|
||||||
else
|
|
||||||
## opt (default): maximum performance with PGO profile data
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
|
||||||
endif
|
|
||||||
|
|
||||||
.SUFFIXES: .o .f90 .C .for .cu
|
.SUFFIXES: .o .f90 .C .for .cu
|
||||||
|
|
||||||
.f90.o:
|
.f90.o:
|
||||||
$(f90) $(f90appflags) -c $< -o $@
|
$(f90) $(f90appflags) -c $< -o $@
|
||||||
|
|
||||||
.C.o:
|
.C.o:
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@
|
||||||
|
|
||||||
.for.o:
|
.for.o:
|
||||||
$(f77) -c $< -o $@
|
$(f77) -c $< -o $@
|
||||||
@@ -37,54 +16,20 @@ endif
|
|||||||
.cu.o:
|
.cu.o:
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
|
|
||||||
# C rewrite of BSSN RHS kernel and helpers
|
|
||||||
bssn_rhs_c.o: bssn_rhs_c.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
fderivs_c.o: fderivs_c.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
fdderivs_c.o: fdderivs_c.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
kodiss_c.o: kodiss_c.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
lopsided_c.o: lopsided_c.C
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
|
||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
|
||||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=$(TP_PROFDATA) \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
|
||||||
|
|
||||||
TwoPunctures.o: TwoPunctures.C
|
TwoPunctures.o: TwoPunctures.C
|
||||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
|
||||||
|
|
||||||
TwoPunctureABE.o: TwoPunctureABE.C
|
TwoPunctureABE.o: TwoPunctureABE.C
|
||||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< -o $@
|
||||||
|
|
||||||
# Input files
|
# Input files
|
||||||
|
|
||||||
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
|
||||||
ifeq ($(USE_CXX_KERNELS),0)
|
|
||||||
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
|
||||||
CFILES =
|
|
||||||
else
|
|
||||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
|
||||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o
|
|
||||||
endif
|
|
||||||
|
|
||||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||||
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
NullShellPatch2_Evo.o writefile_f.o xh_bssn_rhs.o xh_fdderivs.o xh_fderivs.o xh_kodiss.o xh_lopsided.o \
|
||||||
|
xh_global_interp.o xh_polint3.o
|
||||||
|
|
||||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||||
cgh.o surface_integral.o ShellPatch.o\
|
cgh.o surface_integral.o ShellPatch.o\
|
||||||
@@ -94,9 +39,9 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
|
|||||||
NullShellPatch2_Evo.o \
|
NullShellPatch2_Evo.o \
|
||||||
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
||||||
|
|
||||||
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||||
rungekutta4_rout.o diff_new.o kodiss.o kodiss_sh.o\
|
rungekutta4_rout.o bssn_rhs.o diff_new.o kodiss.o kodiss_sh.o\
|
||||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||||
@@ -107,14 +52,6 @@ F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
|||||||
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
||||||
NullNews2.o tool_f.o
|
NullNews2.o tool_f.o
|
||||||
|
|
||||||
ifeq ($(USE_CXX_KERNELS),0)
|
|
||||||
# Fortran mode: include original bssn_rhs.o
|
|
||||||
F90FILES = $(F90FILES_BASE) bssn_rhs.o
|
|
||||||
else
|
|
||||||
# C++ mode (default): bssn_rhs.o replaced by C++ kernel
|
|
||||||
F90FILES = $(F90FILES_BASE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
F77FILES = zbesh.o
|
F77FILES = zbesh.o
|
||||||
|
|
||||||
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
||||||
@@ -127,7 +64,7 @@ TwoPunctureFILES = TwoPunctureABE.o TwoPunctures.o
|
|||||||
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
CUDAFILES = bssn_gpu.o bssn_gpu_rhs_ss.o
|
||||||
|
|
||||||
# file dependences
|
# file dependences
|
||||||
$(C++FILES) $(C++FILES_GPU) $(F90FILES) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
$(C++FILES) $(C++FILESGPU) $(F90FILES) $(AHFDOBJS) $(CUDAFILES): macrodef.fh
|
||||||
|
|
||||||
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
$(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||||
@@ -136,7 +73,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
|||||||
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
|
||||||
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
|
||||||
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
|
||||||
initial_null2.h NullShellPatch2.h
|
initial_null2.h NullShellPatch2.h xh_bssn_rhs_compute.h xh_global_interp.h
|
||||||
|
|
||||||
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
|
||||||
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
|
||||||
@@ -150,7 +87,7 @@ $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h
|
|||||||
|
|
||||||
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
$(AHFDOBJS): cctk.h cctk_Config.h cctk_Types.h cctk_Constants.h myglobal.h
|
||||||
|
|
||||||
$(C++FILES) $(C++FILES_GPU) $(CFILES) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
$(C++FILES) $(C++FILES_GPU) $(AHFDOBJS) $(CUDAFILES): macrodef.h
|
||||||
|
|
||||||
TwoPunctureFILES: TwoPunctures.h
|
TwoPunctureFILES: TwoPunctures.h
|
||||||
|
|
||||||
@@ -159,14 +96,14 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
|
|||||||
misc.o : zbesh.o
|
misc.o : zbesh.o
|
||||||
|
|
||||||
# projects
|
# projects
|
||||||
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
ABE: $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||||
|
|
||||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||||
|
|
||||||
TwoPunctureABE: $(TwoPunctureFILES)
|
TwoPunctureABE: $(TwoPunctureFILES)
|
||||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||||
|
|||||||
@@ -1,45 +1,32 @@
|
|||||||
## GCC version (commented out)
|
## GCC version (commented out)
|
||||||
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||||
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
|
||||||
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
|
||||||
|
|
||||||
## Intel oneAPI version with oneMKL (Optimized for performance)
|
## Intel oneAPI version with oneMKL (Optimized for performance)
|
||||||
filein = -I/usr/include/ -I${MKLROOT}/include
|
filein = -I/usr/include/ -I${MKLROOT}/include
|
||||||
|
|
||||||
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
||||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
||||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -liomp5
|
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
|
||||||
|
|
||||||
## PGO build mode switch (ABE only; TwoPunctureABE always uses opt flags)
|
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
|
||||||
## opt : (default) maximum performance with PGO profile-guided optimization
|
## -fprofile-instr-use: use collected profile data to guide optimization decisions
|
||||||
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
## (branch prediction, basic block layout, inlining, loop unrolling)
|
||||||
PGO_MODE ?= opt
|
PROFDATA = /home/hxh/AMSS-NCKU/pgo_profile/default.profdata
|
||||||
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
## Interp_Points load balance profiling mode
|
-fprofile-instr-use=$(PROFDATA) \
|
||||||
## off : (default) no load balance instrumentation
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
## profile : Pass 1 — instrument Interp_Points to collect timing profile
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
## optimize : Pass 2 — read profile and apply block rebalancing
|
-fprofile-instr-use=$(PROFDATA) \
|
||||||
INTERP_LB_MODE ?= off
|
-align array64byte -fpp -I${MKLROOT}/include
|
||||||
|
f90 = ifx
|
||||||
ifeq ($(INTERP_LB_MODE),profile)
|
f77 = ifx
|
||||||
INTERP_LB_FLAGS = -DINTERP_LB_PROFILE
|
CXX = icpx
|
||||||
else ifeq ($(INTERP_LB_MODE),optimize)
|
CC = icx
|
||||||
INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE
|
CLINKER = mpiicpx
|
||||||
else
|
|
||||||
INTERP_LB_FLAGS =
|
Cu = nvcc
|
||||||
endif
|
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||||
|
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||||
## Kernel implementation switch
|
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||||
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
|
||||||
## 0 : fall back to original Fortran kernels
|
|
||||||
USE_CXX_KERNELS ?= 1
|
|
||||||
f90 = ifx
|
|
||||||
f77 = ifx
|
|
||||||
CXX = icpx
|
|
||||||
CC = icx
|
|
||||||
CLINKER = mpiicpx
|
|
||||||
|
|
||||||
Cu = nvcc
|
|
||||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
|
||||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
|
||||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
|
||||||
|
|||||||
@@ -1,146 +0,0 @@
|
|||||||
#ifndef SHARE_FUNC_H
|
|
||||||
#define SHARE_FUNC_H
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
/* 主网格:0-based -> 1D */
|
|
||||||
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
|
|
||||||
const int ex1 = ex[0], ex2 = ex[1];
|
|
||||||
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
|
|
||||||
* ord=2 => shift=1
|
|
||||||
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex)
|
|
||||||
*/
|
|
||||||
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
|
|
||||||
const int shift = 1;
|
|
||||||
const int nx = ex[0] + 2; // ex1 + ord
|
|
||||||
const int ny = ex[1] + 2;
|
|
||||||
|
|
||||||
const int ii = iF + shift; // 0..ex1+1
|
|
||||||
const int jj = jF + shift; // 0..ex2+1
|
|
||||||
const int kk = kF + shift; // 0..ex3+1
|
|
||||||
|
|
||||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
|
|
||||||
* ord=3 => shift=2
|
|
||||||
* iF/jF/kF 是 Fortran 索引(可为负)
|
|
||||||
*/
|
|
||||||
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
|
|
||||||
const int shift = 2; // ord=3 -> -2..ex
|
|
||||||
const int nx = ex[0] + 3; // ex1 + ord
|
|
||||||
const int ny = ex[1] + 3;
|
|
||||||
|
|
||||||
const int ii = iF + shift; // 0..ex1+2
|
|
||||||
const int jj = jF + shift; // 0..ex2+2
|
|
||||||
const int kk = kF + shift; // 0..ex3+2
|
|
||||||
|
|
||||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
|
||||||
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
|
||||||
*
|
|
||||||
* C 里我们把:
|
|
||||||
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
|
|
||||||
* funcc 用“平移下标”存为一维数组:
|
|
||||||
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
|
|
||||||
* 总长度 nx = extc1 + ord
|
|
||||||
* 同理 ny = extc2 + ord, nz = extc3 + ord
|
|
||||||
*/
|
|
||||||
|
|
||||||
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
|
|
||||||
const int nx = extc[0], ny = extc[1];
|
|
||||||
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
|
|
||||||
const int shift = ord - 1; // iF = -shift .. extc1
|
|
||||||
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
|
|
||||||
const int ny = extc[1] + ord;
|
|
||||||
|
|
||||||
const int ii = iF + shift; // 0..extc1+shift
|
|
||||||
const int jj = jF + shift; // 0..extc2+shift
|
|
||||||
const int kk = kF + shift; // 0..extc3+shift
|
|
||||||
|
|
||||||
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 等价于 Fortran:
|
|
||||||
* funcc(1:extc1,1:extc2,1:extc3)=func
|
|
||||||
* do i=0,ord-1
|
|
||||||
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
|
|
||||||
* enddo
|
|
||||||
* do i=0,ord-1
|
|
||||||
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
|
|
||||||
* enddo
|
|
||||||
* do i=0,ord-1
|
|
||||||
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
|
||||||
* enddo
|
|
||||||
*/
|
|
||||||
static inline void symmetry_bd(int ord,
|
|
||||||
const int extc[3],
|
|
||||||
const double *func,
|
|
||||||
double *funcc,
|
|
||||||
const double SoA[3])
|
|
||||||
{
|
|
||||||
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
|
|
||||||
|
|
||||||
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
|
|
||||||
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
|
|
||||||
for (int k0 = 0; k0 < extc3; ++k0) {
|
|
||||||
for (int j0 = 0; j0 < extc2; ++j0) {
|
|
||||||
for (int i0 = 0; i0 < extc1; ++i0) {
|
|
||||||
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
|
||||||
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
|
||||||
for (int ii = 0; ii <= ord - 1; ++ii) {
|
|
||||||
const int iF_dst = -ii; // 0, -1, -2, ...
|
|
||||||
const int iF_src = ii + 1; // 1, 2, 3, ...
|
|
||||||
for (int kF = 1; kF <= extc3; ++kF) {
|
|
||||||
for (int jF = 1; jF <= extc2; ++jF) {
|
|
||||||
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
|
|
||||||
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
|
||||||
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
|
||||||
for (int jj = 0; jj <= ord - 1; ++jj) {
|
|
||||||
const int jF_dst = -jj;
|
|
||||||
const int jF_src = jj + 1;
|
|
||||||
for (int kF = 1; kF <= extc3; ++kF) {
|
|
||||||
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
|
||||||
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
|
|
||||||
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
|
||||||
for (int kk = 0; kk <= ord - 1; ++kk) {
|
|
||||||
const int kF_dst = -kk;
|
|
||||||
const int kF_src = kk + 1;
|
|
||||||
for (int jF = -ord + 1; jF <= extc2; ++jF) {
|
|
||||||
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
|
||||||
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
|
|
||||||
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -2653,6 +2653,7 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
|
|||||||
|
|
||||||
// we have assumed there is only one box on this level,
|
// we have assumed there is only one box on this level,
|
||||||
// so we do not need loop boxes
|
// so we do not need loop boxes
|
||||||
|
|
||||||
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here);
|
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here);
|
||||||
|
|
||||||
double Mass_out = 0;
|
double Mass_out = 0;
|
||||||
|
|||||||
1984
AMSS_NCKU_source/xh_bssn_rhs.C
Normal file
1984
AMSS_NCKU_source/xh_bssn_rhs.C
Normal file
File diff suppressed because it is too large
Load Diff
30
AMSS_NCKU_source/xh_bssn_rhs_compute.h
Normal file
30
AMSS_NCKU_source/xh_bssn_rhs_compute.h
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
#include "xh_tool.h"
|
||||||
|
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
int f_compute_rhs_bssn_xh(int *ex, double &T,
|
||||||
|
double *X, double *Y, double *Z,
|
||||||
|
double *chi, double *trK,
|
||||||
|
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
||||||
|
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
||||||
|
double *Gamx, double *Gamy, double *Gamz,
|
||||||
|
double *Lap, double *betax, double *betay, double *betaz,
|
||||||
|
double *dtSfx, double *dtSfy, double *dtSfz,
|
||||||
|
double *chi_rhs, double *trK_rhs,
|
||||||
|
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
||||||
|
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
||||||
|
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
||||||
|
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
||||||
|
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
||||||
|
double *rho, double *Sx, double *Sy, double *Sz,
|
||||||
|
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
||||||
|
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
||||||
|
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
||||||
|
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
||||||
|
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
||||||
|
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
||||||
|
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
||||||
|
int &Symmetry, int &Lev, double &eps, int &co
|
||||||
|
);
|
||||||
|
}
|
||||||
311
AMSS_NCKU_source/xh_fdderivs.C
Normal file
311
AMSS_NCKU_source/xh_fdderivs.C
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
#include "xh_tool.h"
|
||||||
|
void fdderivs(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
double *fxx, double *fxy, double *fxz,
|
||||||
|
double *fyy, double *fyz, double *fzz,
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
double SYM1, double SYM2, double SYM3,
|
||||||
|
int Symmetry, int onoff)
|
||||||
|
{
|
||||||
|
(void)onoff;
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
|
||||||
|
const double F1o4 = 2.5e-1; // 1/4
|
||||||
|
const double F8 = 8.0;
|
||||||
|
const double F16 = 16.0;
|
||||||
|
const double F30 = 30.0;
|
||||||
|
const double F1o12 = ONE / 12.0;
|
||||||
|
const double F1o144 = ONE / 144.0;
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
const int imaxF = ex1;
|
||||||
|
const int jmaxF = ex2;
|
||||||
|
const int kmaxF = ex3;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
|
|
||||||
|
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
|
||||||
|
const size_t nx = (size_t)ex1 + 2;
|
||||||
|
const size_t ny = (size_t)ex2 + 2;
|
||||||
|
const size_t nz = (size_t)ex3 + 2;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
/* 系数:按 Fortran 原式 */
|
||||||
|
const double Sdxdx = ONE / (dX * dX);
|
||||||
|
const double Sdydy = ONE / (dY * dY);
|
||||||
|
const double Sdzdz = ONE / (dZ * dZ);
|
||||||
|
|
||||||
|
const double Fdxdx = F1o12 / (dX * dX);
|
||||||
|
const double Fdydy = F1o12 / (dY * dY);
|
||||||
|
const double Fdzdz = F1o12 / (dZ * dZ);
|
||||||
|
|
||||||
|
const double Sdxdy = F1o4 / (dX * dY);
|
||||||
|
const double Sdxdz = F1o4 / (dX * dZ);
|
||||||
|
const double Sdydz = F1o4 / (dY * dZ);
|
||||||
|
|
||||||
|
const double Fdxdy = F1o144 / (dX * dY);
|
||||||
|
const double Fdxdz = F1o144 / (dX * dZ);
|
||||||
|
const double Fdydz = F1o144 / (dY * dZ);
|
||||||
|
|
||||||
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
// symmetry_bd(2, ex, f, fh, SoA);
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 < ex[2]; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < ex[1]; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < ex[0]; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
for (int ii = 0; ii <= 2 - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int jF = 1; jF <= ex[1]; ++jF) {
|
||||||
|
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
for (int jj = 0; jj <= 2 - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= ex[2]; ++kF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
for (int kk = 0; kk <= 2 - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
|
||||||
|
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
|
||||||
|
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
||||||
|
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
// for (size_t p = 0; p < all; ++p) {
|
||||||
|
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
||||||
|
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
||||||
|
// }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fortran:
|
||||||
|
* do k=1,ex3-1
|
||||||
|
* do j=1,ex2-1
|
||||||
|
* do i=1,ex1-1
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* 高阶分支:i±2,j±2,k±2 都在范围内 */
|
||||||
|
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||||
|
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||||
|
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Fdxdx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Fdydy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Fdzdz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
|
||||||
|
{
|
||||||
|
const double t_jm2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jm1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||||
|
|
||||||
|
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fxz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fyz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 二阶分支:i±1,j±1,k±1 在范围内 */
|
||||||
|
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||||
|
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||||
|
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Sdxdx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Sdydy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Sdzdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxy[p] = Sdxdy * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxz[p] = Sdxdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyz[p] = Sdydz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}else{
|
||||||
|
fxx[p] = 0.0;
|
||||||
|
fyy[p] = 0.0;
|
||||||
|
fzz[p] = 0.0;
|
||||||
|
fxy[p] = 0.0;
|
||||||
|
fxz[p] = 0.0;
|
||||||
|
fyz[p] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// free(fh);
|
||||||
|
}
|
||||||
145
AMSS_NCKU_source/xh_fderivs.C
Normal file
145
AMSS_NCKU_source/xh_fderivs.C
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
#include "xh_tool.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* C 版 fderivs
|
||||||
|
*
|
||||||
|
* Fortran:
|
||||||
|
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
|
||||||
|
*
|
||||||
|
* 约定:
|
||||||
|
* f, fx, fy, fz: ex1*ex2*ex3,按 idx_ex 布局
|
||||||
|
* X: ex1, Y: ex2, Z: ex3
|
||||||
|
*/
|
||||||
|
void fderivs(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
double *fx, double *fy, double *fz,
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
double SYM1, double SYM2, double SYM3,
|
||||||
|
int Symmetry, int onoff)
|
||||||
|
{
|
||||||
|
(void)onoff; // Fortran 里没用到
|
||||||
|
|
||||||
|
const double ZEO = 0.0, ONE = 1.0;
|
||||||
|
const double TWO = 2.0, EIT = 8.0;
|
||||||
|
const double F12 = 12.0;
|
||||||
|
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
// dX = X(2)-X(1) -> C: X[1]-X[0]
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
|
||||||
|
|
||||||
|
// SoA(1:3) = SYM1,SYM2,SYM3
|
||||||
|
const double SoA[3] = { SYM1, SYM2, SYM3 };
|
||||||
|
|
||||||
|
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
|
||||||
|
const size_t nx = (size_t)ex1 + 2;
|
||||||
|
const size_t ny = (size_t)ex2 + 2;
|
||||||
|
const size_t nz = (size_t)ex3 + 2;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
// double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
// call symmetry_bd(2,ex,f,fh,SoA)
|
||||||
|
symmetry_bd(2, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
|
||||||
|
// fx = fy = fz = 0
|
||||||
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fx[p] = ZEO;
|
||||||
|
fy[p] = ZEO;
|
||||||
|
fz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fortran loops:
|
||||||
|
* do k=1,ex3-1
|
||||||
|
* do j=1,ex2-1
|
||||||
|
* do i=1,ex1-1
|
||||||
|
*
|
||||||
|
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
|
||||||
|
*/
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
|
||||||
|
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
|
||||||
|
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
|
||||||
|
(kF + 2) <= ex3 && (kF - 2) >= kminF)
|
||||||
|
{
|
||||||
|
fx[p] = d12dx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fy[p] = d12dy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fz[p] = d12dz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||||
|
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// elseif(i+1 <= imax .and. i-1 >= imin ...)
|
||||||
|
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
|
||||||
|
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
|
||||||
|
(kF + 1) <= ex3 && (kF - 1) >= kminF)
|
||||||
|
{
|
||||||
|
fx[p] = d2dx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fy[p] = d2dy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fz[p] = d2dz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// free(fh);
|
||||||
|
}
|
||||||
143
AMSS_NCKU_source/xh_global_interp.C
Normal file
143
AMSS_NCKU_source/xh_global_interp.C
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
#include "xh_global_interp.h"
|
||||||
|
|
||||||
|
/* 你已有的 polin3(由前面 Fortran->C 翻译得到) */
|
||||||
|
// void polin3(const double *x1a, const double *x2a, const double *x3a,
|
||||||
|
// const double *ya, double x1, double x2, double x3,
|
||||||
|
// double *y, double *dy, int ordn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
你需要提供 decide3d 的实现(这里仅声明)。
|
||||||
|
Fortran: decide3d(ex,f,f,cxB,cxT,SoA,ya,ORDN,Symmetry)
|
||||||
|
- ex: [3]
|
||||||
|
- f: 三维场(列主序)
|
||||||
|
- cxB/cxT: 3 维窗口起止(Fortran 1-based,且可能 <=0)
|
||||||
|
- SoA: [3]
|
||||||
|
- ya: 输出 ORDN^3 的采样块(列主序)
|
||||||
|
- return: 0 表示正常;非 0 表示错误(对应 Fortran logical = .true.)
|
||||||
|
*/
|
||||||
|
// int xh_decide3d(const int ex[3],
|
||||||
|
// const double *f_in,
|
||||||
|
// const double *f_in2, /* Fortran 里传了 f,f;按原样保留 */
|
||||||
|
// const int cxB[3],
|
||||||
|
// const int cxT[3],
|
||||||
|
// const double SoA[3],
|
||||||
|
// double *ya,
|
||||||
|
// int ordn,
|
||||||
|
// int symmetry);
|
||||||
|
|
||||||
|
/* 把 Fortran 1-based 下标 idxF (可为负/0) 映射到 C 的 X[idx] 访问(只用于 X(2-cxB) 这种表达式) */
|
||||||
|
static inline double X_at_FortranIndex(const double *X, int idxF) {
|
||||||
|
/* Fortran: X(1) 对应 C: X[0] */
|
||||||
|
return X[idxF - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fortran 整数截断:idint 在这里可用 (int) 实现(对正数等价于 floor) */
|
||||||
|
static inline int idint_like(double a) {
|
||||||
|
return (int)a; /* trunc toward zero */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* global_interp 的 C 版 */
|
||||||
|
void xh_global_interp(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, /* f(ex1,ex2,ex3) column-major */
|
||||||
|
double &f_int,
|
||||||
|
double x1, double y1, double z1,
|
||||||
|
int ORDN,
|
||||||
|
const double SoA[3],
|
||||||
|
int symmetry)
|
||||||
|
{
|
||||||
|
// double time1, time2;
|
||||||
|
// time1 = omp_get_wtime();
|
||||||
|
enum { NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2 };
|
||||||
|
|
||||||
|
int j, m;
|
||||||
|
int imin, jmin, kmin;
|
||||||
|
int cxB[3], cxT[3], cxI[3], cmin[3], cmax[3];
|
||||||
|
double cx[3];
|
||||||
|
double dX, dY, dZ, ddy;
|
||||||
|
|
||||||
|
/* Fortran: imin=lbound(f,1) ... 通常是 1;这里按 1 处理 */
|
||||||
|
imin = 1; jmin = 1; kmin = 1;
|
||||||
|
|
||||||
|
dX = X_at_FortranIndex(X, imin + 1) - X_at_FortranIndex(X, imin);
|
||||||
|
dY = X_at_FortranIndex(Y, jmin + 1) - X_at_FortranIndex(Y, jmin);
|
||||||
|
dZ = X_at_FortranIndex(Z, kmin + 1) - X_at_FortranIndex(Z, kmin);
|
||||||
|
|
||||||
|
/* x1a(j) = (j-1)*1.0 (j=1..ORDN) */
|
||||||
|
double *x1a = (double*)malloc((size_t)ORDN * sizeof(double));
|
||||||
|
double *ya = (double*)malloc((size_t)ORDN * (size_t)ORDN * (size_t)ORDN * sizeof(double));
|
||||||
|
if (!x1a || !ya) {
|
||||||
|
fprintf(stderr, "global_interp: malloc failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
for (j = 0; j < ORDN; j++) x1a[j] = (double)j;
|
||||||
|
|
||||||
|
/* cxI(m) = idint((p - P(1))/dP + 0.4) + 1 (Fortran 1-based) */
|
||||||
|
cxI[0] = idint_like((x1 - X_at_FortranIndex(X, 1)) / dX + 0.4) + 1;
|
||||||
|
cxI[1] = idint_like((y1 - X_at_FortranIndex(Y, 1)) / dY + 0.4) + 1;
|
||||||
|
cxI[2] = idint_like((z1 - X_at_FortranIndex(Z, 1)) / dZ + 0.4) + 1;
|
||||||
|
|
||||||
|
/* cxB = cxI - ORDN/2 + 1 ; cxT = cxB + ORDN - 1 */
|
||||||
|
int half = ORDN / 2; /* Fortran 整数除法 */
|
||||||
|
for (m = 0; m < 3; m++) {
|
||||||
|
cxB[m] = cxI[m] - half + 1;
|
||||||
|
cxT[m] = cxB[m] + ORDN - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cmin=1; cmax=ex */
|
||||||
|
cmin[0] = cmin[1] = cmin[2] = 1;
|
||||||
|
cmax[0] = ex[0];
|
||||||
|
cmax[1] = ex[1];
|
||||||
|
cmax[2] = ex[2];
|
||||||
|
|
||||||
|
/* 对称边界时允许 cxB 为负/0(与 Fortran 一致) */
|
||||||
|
if (symmetry == OCTANT && fabs(X_at_FortranIndex(X, 1)) < dX) cmin[0] = -half + 2;
|
||||||
|
if (symmetry == OCTANT && fabs(X_at_FortranIndex(Y, 1)) < dY) cmin[1] = -half + 2;
|
||||||
|
if (symmetry != NO_SYMM && fabs(X_at_FortranIndex(Z, 1)) < dZ) cmin[2] = -half + 2;
|
||||||
|
|
||||||
|
/* 夹紧窗口 [cxB,cxT] 到 [cmin,cmax] */
|
||||||
|
for (m = 0; m < 3; m++) {
|
||||||
|
if (cxB[m] < cmin[m]) {
|
||||||
|
cxB[m] = cmin[m];
|
||||||
|
cxT[m] = cxB[m] + ORDN - 1;
|
||||||
|
}
|
||||||
|
if (cxT[m] > cmax[m]) {
|
||||||
|
cxT[m] = cmax[m];
|
||||||
|
cxB[m] = cxT[m] + 1 - ORDN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
cx(m) 的计算:如果 cxB>0:
|
||||||
|
cx = (p - P(cxB))/dP
|
||||||
|
else:
|
||||||
|
cx = (p + P(2 - cxB))/dP
|
||||||
|
注意这里的 cxB 是 Fortran 1-based 语义下的整数,可能 <=0。
|
||||||
|
*/
|
||||||
|
if (cxB[0] > 0) cx[0] = (x1 - X_at_FortranIndex(X, cxB[0])) / dX;
|
||||||
|
else cx[0] = (x1 + X_at_FortranIndex(X, 2 - cxB[0])) / dX;
|
||||||
|
|
||||||
|
if (cxB[1] > 0) cx[1] = (y1 - X_at_FortranIndex(Y, cxB[1])) / dY;
|
||||||
|
else cx[1] = (y1 + X_at_FortranIndex(Y, 2 - cxB[1])) / dY;
|
||||||
|
|
||||||
|
if (cxB[2] > 0) cx[2] = (z1 - X_at_FortranIndex(Z, cxB[2])) / dZ;
|
||||||
|
else cx[2] = (z1 + X_at_FortranIndex(Z, 2 - cxB[2])) / dZ;
|
||||||
|
|
||||||
|
/* decide3d: 填充 ya(1:ORDN,1:ORDN,1:ORDN) */
|
||||||
|
if (xh_decide3d(ex, f, f, cxB, cxT, SoA, ya, ORDN, symmetry)) {
|
||||||
|
printf("global_interp position: %g %g %g\n", x1, y1, z1);
|
||||||
|
printf("data range: %g %g %g %g %g %g\n",
|
||||||
|
X_at_FortranIndex(X, 1), X_at_FortranIndex(X, ex[0]),
|
||||||
|
X_at_FortranIndex(Y, 1), X_at_FortranIndex(Y, ex[1]),
|
||||||
|
X_at_FortranIndex(Z, 1), X_at_FortranIndex(Z, ex[2]));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* polin3(x1a,x1a,x1a,ya,cx(1),cx(2),cx(3),f_int,ddy,ORDN) */
|
||||||
|
xh_polin3(x1a, x1a, x1a, ya, cx[0], cx[1], cx[2], f_int, &ddy, ORDN);
|
||||||
|
|
||||||
|
free(x1a);
|
||||||
|
free(ya);
|
||||||
|
// time2 = omp_get_wtime();
|
||||||
|
// printf("Time for global_interp: %lf seconds\n", time2 - time1);
|
||||||
|
}
|
||||||
12
AMSS_NCKU_source/xh_global_interp.h
Normal file
12
AMSS_NCKU_source/xh_global_interp.h
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#include "xh_po.h"
|
||||||
|
|
||||||
|
extern "C"{
|
||||||
|
void xh_global_interp(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, /* f(ex1,ex2,ex3) column-major */
|
||||||
|
double &f_int,
|
||||||
|
double x1, double y1, double z1,
|
||||||
|
int ORDN,
|
||||||
|
const double SoA[3],
|
||||||
|
int symmetry);
|
||||||
|
}
|
||||||
116
AMSS_NCKU_source/xh_kodiss.C
Normal file
116
AMSS_NCKU_source/xh_kodiss.C
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
#include "xh_tool.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* C 版 kodis
|
||||||
|
*
|
||||||
|
* Fortran signature:
|
||||||
|
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
|
||||||
|
*
|
||||||
|
* 约定:
|
||||||
|
* X: ex1, Y: ex2, Z: ex3
|
||||||
|
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
|
||||||
|
* SoA[3]
|
||||||
|
* eps: double
|
||||||
|
*/
|
||||||
|
void kodis(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double SoA[3],
|
||||||
|
int Symmetry, double eps)
|
||||||
|
{
|
||||||
|
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
||||||
|
const double cof = 64.0; // 2^6
|
||||||
|
const int NO_SYMM = 0, OCTANT = 2;
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
|
||||||
|
|
||||||
|
// Fortran: imax=ex(1) 等是 1-based 上界
|
||||||
|
const int imaxF = ex1;
|
||||||
|
const int jmaxF = ex2;
|
||||||
|
const int kmaxF = ex3;
|
||||||
|
|
||||||
|
// Fortran: imin=jmin=kmin=1,某些对称情况变 -2
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
|
// 分配 fh:大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
|
||||||
|
const size_t nx = (size_t)ex1 + 3;
|
||||||
|
const size_t ny = (size_t)ex2 + 3;
|
||||||
|
const size_t nz = (size_t)ex3 + 3;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
symmetry_bd(3, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fortran loops:
|
||||||
|
* do k=1,ex3
|
||||||
|
* do j=1,ex2
|
||||||
|
* do i=1,ex1
|
||||||
|
*
|
||||||
|
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
|
||||||
|
* 并定义 Fortran index: iF=i0+1, ...
|
||||||
|
*/
|
||||||
|
for (int k0 = 0; k0 < ex3; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 < ex2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 < ex1; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
|
||||||
|
// Fortran if 条件:
|
||||||
|
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
|
||||||
|
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
|
||||||
|
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
|
||||||
|
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
|
||||||
|
{
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
|
||||||
|
const double Dx_term =
|
||||||
|
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
|
||||||
|
|
||||||
|
const double Dy_term =
|
||||||
|
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
|
||||||
|
|
||||||
|
const double Dz_term =
|
||||||
|
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
|
||||||
|
|
||||||
|
// Fortran:
|
||||||
|
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
|
||||||
|
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// free(fh);
|
||||||
|
}
|
||||||
262
AMSS_NCKU_source/xh_lopsided.C
Normal file
262
AMSS_NCKU_source/xh_lopsided.C
Normal file
@@ -0,0 +1,262 @@
|
|||||||
|
#include "xh_tool.h"
|
||||||
|
/*
|
||||||
|
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
|
||||||
|
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
*
|
||||||
|
* 约定:
|
||||||
|
* nghost = 3
|
||||||
|
* ex[3] = {ex1,ex2,ex3}
|
||||||
|
* f = 原始网格 (ex1*ex2*ex3)
|
||||||
|
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
|
||||||
|
* SoA[3] = 输入参数
|
||||||
|
*/
|
||||||
|
void lopsided(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
|
int Symmetry, const double SoA[3])
|
||||||
|
{
|
||||||
|
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
|
||||||
|
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
|
||||||
|
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
|
||||||
|
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
|
||||||
|
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
// 对应 Fortran: dX = X(2)-X(1) (Fortran 1-based)
|
||||||
|
// C: X[1]-X[0]
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
|
||||||
|
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
|
||||||
|
const double d2dx = ONE / TWO / dX;
|
||||||
|
const double d2dy = ONE / TWO / dY;
|
||||||
|
const double d2dz = ONE / TWO / dZ;
|
||||||
|
(void)d2dx; (void)d2dy; (void)d2dz;
|
||||||
|
|
||||||
|
// Fortran:
|
||||||
|
// imax = ex(1); jmax = ex(2); kmax = ex(3)
|
||||||
|
const int imaxF = ex1;
|
||||||
|
const int jmaxF = ex2;
|
||||||
|
const int kmaxF = ex3;
|
||||||
|
|
||||||
|
// Fortran:
|
||||||
|
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
|
// 分配 fh:大小 (ex1+3)*(ex2+3)*(ex3+3)
|
||||||
|
const size_t nx = (size_t)ex1 + 3;
|
||||||
|
const size_t ny = (size_t)ex2 + 3;
|
||||||
|
const size_t nz = (size_t)ex3 + 3;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
static thread_local double *fh = NULL;
|
||||||
|
static thread_local size_t cap = 0;
|
||||||
|
|
||||||
|
if (fh_size > cap) {
|
||||||
|
free(fh);
|
||||||
|
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
|
||||||
|
cap = fh_size;
|
||||||
|
}
|
||||||
|
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
|
||||||
|
|
||||||
|
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
symmetry_bd(3, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fortran 主循环:
|
||||||
|
* do k=1,ex(3)-1
|
||||||
|
* do j=1,ex(2)-1
|
||||||
|
* do i=1,ex(1)-1
|
||||||
|
*
|
||||||
|
* 转成 C 0-based:
|
||||||
|
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
|
||||||
|
*
|
||||||
|
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
|
||||||
|
* iF=i0+1, jF=j0+1, kF=k0+1
|
||||||
|
*/
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
// ---------------- x direction ----------------
|
||||||
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
|
// Fortran: if(i+3 <= imax)
|
||||||
|
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
|
||||||
|
if (i0 <= ex1 - 4) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
// elseif(i+2 <= imax) <=> i0 <= ex1-3
|
||||||
|
else if (i0 <= ex1 - 3) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
// elseif(i+1 <= imax) <=> i0 <= ex1-2(循环里总成立)
|
||||||
|
else if (i0 <= ex1 - 2) {
|
||||||
|
f_rhs[p] -= sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfx < ZEO) {
|
||||||
|
// Fortran: if(i-3 >= imin)
|
||||||
|
// (iF-3) >= iminF <=> (i0-2) >= iminF
|
||||||
|
if ((i0 - 2) >= iminF) {
|
||||||
|
f_rhs[p] -= sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
|
||||||
|
else if ((i0 - 1) >= iminF) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
// elseif(i-1 >= imin) <=> i0 >= iminF
|
||||||
|
else if (i0 >= iminF) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------- y direction ----------------
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
|
||||||
|
if (j0 <= ex2 - 4) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||||
|
} else if (j0 <= ex2 - 3) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||||
|
} else if (j0 <= ex2 - 2) {
|
||||||
|
f_rhs[p] -= sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0 - 2) >= jminF) {
|
||||||
|
f_rhs[p] -= sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||||
|
} else if ((j0 - 1) >= jminF) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||||
|
} else if (j0 >= jminF) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------- z direction ----------------
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0 <= ex3 - 4) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||||
|
} else if (k0 <= ex3 - 3) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||||
|
} else if (k0 <= ex3 - 2) {
|
||||||
|
f_rhs[p] -= sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0 - 2) >= kminF) {
|
||||||
|
f_rhs[p] -= sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||||
|
} else if ((k0 - 1) >= kminF) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||||
|
} else if (k0 >= kminF) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// free(fh);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
19
AMSS_NCKU_source/xh_po.h
Normal file
19
AMSS_NCKU_source/xh_po.h
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <omp.h>
|
||||||
|
int xh_decide3d(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
const double *fpi, /* 这里未用,Fortran 也没用到 */
|
||||||
|
const int cxB[3],
|
||||||
|
const int cxT[3],
|
||||||
|
const double SoA[3],
|
||||||
|
double *ya,
|
||||||
|
int ordn,
|
||||||
|
int Symmetry);
|
||||||
|
void xh_polint(const double *xa, const double *ya, double x,
|
||||||
|
double *y, double *dy, int ordn);
|
||||||
|
|
||||||
|
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
|
||||||
|
const double *ya, double x1, double x2, double x3,
|
||||||
|
double &y, double *dy, int ordn);
|
||||||
258
AMSS_NCKU_source/xh_polint3.C
Normal file
258
AMSS_NCKU_source/xh_polint3.C
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
#include "xh_po.h"
|
||||||
|
/*
|
||||||
|
ex[0..2] == Fortran ex(1:3)
|
||||||
|
cxB/cxT == Fortran cxB(1:3), cxT(1:3) (可能 <=0)
|
||||||
|
SoA[0..2] == Fortran SoA(1:3)
|
||||||
|
f, fpi == Fortran f(ex1,ex2,ex3) column-major (1-based in formulas)
|
||||||
|
ya == 连续内存,尺寸为 ORDN^3,对应 Fortran ya(cxB1:cxT1, cxB2:cxT2, cxB3:cxT3)
|
||||||
|
但注意:我们用 offset 映射把 Fortran 的 i/j/k 坐标写进去。
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline int imax(int a, int b) { return a > b ? a : b; }
|
||||||
|
static inline int imin(int a, int b) { return a < b ? a : b; }
|
||||||
|
|
||||||
|
/* f(i,j,k): Fortran column-major, i/j/k are Fortran 1-based in [1..ex] */
|
||||||
|
#define F(i,j,k) f[((i)-1) + ex1 * (((j)-1) + ex2 * ((k)-1))]
|
||||||
|
|
||||||
|
/*
|
||||||
|
ya(i,j,k): i in [cxB1..cxT1], j in [cxB2..cxT2], k in [cxB3..cxT3]
|
||||||
|
我们把它映射到 C 的 0..ORDN-1 立方体:
|
||||||
|
ii = i - cxB1
|
||||||
|
jj = j - cxB2
|
||||||
|
kk = k - cxB3
|
||||||
|
并按 column-major 存储(与 Fortran 一致,方便直接喂给你的 polin3)
|
||||||
|
*/
|
||||||
|
#define YA(i,j,k) ya[((i)-cxB1) + ordn * (((j)-cxB2) + ordn * ((k)-cxB3))]
|
||||||
|
|
||||||
|
int xh_decide3d(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
const double *fpi, /* 这里未用,Fortran 也没用到 */
|
||||||
|
const int cxB[3],
|
||||||
|
const int cxT[3],
|
||||||
|
const double SoA[3],
|
||||||
|
double *ya,
|
||||||
|
int ordn,
|
||||||
|
int Symmetry) /* Symmetry 在 decide3d 里也没直接用 */
|
||||||
|
{
|
||||||
|
(void)fpi;
|
||||||
|
(void)Symmetry;
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
int fmin1[3], fmin2[3], fmax1[3], fmax2[3];
|
||||||
|
int i, j, k, m;
|
||||||
|
|
||||||
|
int gont = 0;
|
||||||
|
|
||||||
|
/* 方便 YA 宏使用 */
|
||||||
|
const int cxB1 = cxB[0], cxB2 = cxB[1], cxB3 = cxB[2];
|
||||||
|
|
||||||
|
for (m = 0; m < 3; m++) {
|
||||||
|
/* Fortran 的 “NaN 检查” 在整数上基本无意义,这里不额外处理 */
|
||||||
|
|
||||||
|
fmin1[m] = imax(1, cxB[m]);
|
||||||
|
fmax1[m] = cxT[m];
|
||||||
|
|
||||||
|
fmin2[m] = cxB[m];
|
||||||
|
fmax2[m] = imin(0, cxT[m]);
|
||||||
|
|
||||||
|
/* if((fmin1<=fmax1) and (fmin1<1 or fmax1>ex)) gont=true */
|
||||||
|
if ((fmin1[m] <= fmax1[m]) && (fmin1[m] < 1 || fmax1[m] > ex[m])) gont = 1;
|
||||||
|
|
||||||
|
/* if((fmin2<=fmax2) and (2-fmax2<1 or 2-fmin2>ex)) gont=true */
|
||||||
|
if ((fmin2[m] <= fmax2[m]) && (2 - fmax2[m] < 1 || 2 - fmin2[m] > ex[m])) gont = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gont) {
|
||||||
|
printf("error in decide3d\n");
|
||||||
|
printf("cxB: %d %d %d cxT: %d %d %d ex: %d %d %d\n",
|
||||||
|
cxB[0], cxB[1], cxB[2], cxT[0], cxT[1], cxT[2], ex[0], ex[1], ex[2]);
|
||||||
|
printf("fmin1: %d %d %d fmax1: %d %d %d\n",
|
||||||
|
fmin1[0], fmin1[1], fmin1[2], fmax1[0], fmax1[1], fmax1[2]);
|
||||||
|
printf("fmin2: %d %d %d fmax2: %d %d %d\n",
|
||||||
|
fmin2[0], fmin2[1], fmin2[2], fmax2[0], fmax2[1], fmax2[2]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---- 填充 ya:完全照 Fortran 两大块循环写 ---- */
|
||||||
|
|
||||||
|
/* k in [fmin1(3)..fmax1(3)] */
|
||||||
|
for (k = fmin1[2]; k <= fmax1[2]; k++) {
|
||||||
|
|
||||||
|
/* j in [fmin1(2)..fmax1(2)] */
|
||||||
|
for (j = fmin1[1]; j <= fmax1[1]; j++) {
|
||||||
|
|
||||||
|
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,j,k) */
|
||||||
|
for (i = fmin1[0]; i <= fmax1[0]; i++) {
|
||||||
|
YA(i, j, k) = F(i, j, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* i in [fmin2(1)..fmax2(1)] : ya(i,j,k)=f(2-i,j,k)*SoA(1) */
|
||||||
|
for (i = fmin2[0]; i <= fmax2[0]; i++) {
|
||||||
|
YA(i, j, k) = F(2 - i, j, k) * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* j in [fmin2(2)..fmax2(2)] */
|
||||||
|
for (j = fmin2[1]; j <= fmax2[1]; j++) {
|
||||||
|
|
||||||
|
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,2-j,k)*SoA(2) */
|
||||||
|
for (i = fmin1[0]; i <= fmax1[0]; i++) {
|
||||||
|
YA(i, j, k) = F(i, 2 - j, k) * SoA[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,k)*SoA(1)*SoA(2) */
|
||||||
|
for (i = fmin2[0]; i <= fmax2[0]; i++) {
|
||||||
|
YA(i, j, k) = F(2 - i, 2 - j, k) * SoA[0] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* k in [fmin2(3)..fmax2(3)] */
|
||||||
|
for (k = fmin2[2]; k <= fmax2[2]; k++) {
|
||||||
|
|
||||||
|
/* j in [fmin1(2)..fmax1(2)] */
|
||||||
|
for (j = fmin1[1]; j <= fmax1[1]; j++) {
|
||||||
|
|
||||||
|
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,j,2-k)*SoA(3) */
|
||||||
|
for (i = fmin1[0]; i <= fmax1[0]; i++) {
|
||||||
|
YA(i, j, k) = F(i, j, 2 - k) * SoA[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,j,2-k)*SoA(1)*SoA(3) */
|
||||||
|
for (i = fmin2[0]; i <= fmax2[0]; i++) {
|
||||||
|
YA(i, j, k) = F(2 - i, j, 2 - k) * SoA[0] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* j in [fmin2(2)..fmax2(2)] */
|
||||||
|
for (j = fmin2[1]; j <= fmax2[1]; j++) {
|
||||||
|
|
||||||
|
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,2-j,2-k)*SoA(2)*SoA(3) */
|
||||||
|
for (i = fmin1[0]; i <= fmax1[0]; i++) {
|
||||||
|
YA(i, j, k) = F(i, 2 - j, 2 - k) * SoA[1] * SoA[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,2-k)*SoA1*SoA2*SoA3 */
|
||||||
|
for (i = fmin2[0]; i <= fmax2[0]; i++) {
|
||||||
|
YA(i, j, k) = F(2 - i, 2 - j, 2 - k) * SoA[0] * SoA[1] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef F
|
||||||
|
#undef YA
|
||||||
|
|
||||||
|
void xh_polint(const double *xa, const double *ya, double x,
|
||||||
|
double *y, double *dy, int ordn)
|
||||||
|
{
|
||||||
|
int i, m, ns, n_m;
|
||||||
|
double dif, dift, hp, h, den_val;
|
||||||
|
|
||||||
|
double *c = (double*)malloc((size_t)ordn * sizeof(double));
|
||||||
|
double *d = (double*)malloc((size_t)ordn * sizeof(double));
|
||||||
|
double *ho = (double*)malloc((size_t)ordn * sizeof(double));
|
||||||
|
if (!c || !d || !ho) {
|
||||||
|
fprintf(stderr, "polint: malloc failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ordn; i++) {
|
||||||
|
c[i] = ya[i];
|
||||||
|
d[i] = ya[i];
|
||||||
|
ho[i] = xa[i] - x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ns = 0; // Fortran ns=1 -> C ns=0
|
||||||
|
dif = fabs(x - xa[0]);
|
||||||
|
|
||||||
|
for (i = 1; i < ordn; i++) {
|
||||||
|
dift = fabs(x - xa[i]);
|
||||||
|
if (dift < dif) {
|
||||||
|
ns = i;
|
||||||
|
dif = dift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*y = ya[ns];
|
||||||
|
ns -= 1; // Fortran ns=ns-1
|
||||||
|
|
||||||
|
for (m = 1; m <= ordn - 1; m++) {
|
||||||
|
n_m = ordn - m; // number of active points this round
|
||||||
|
for (i = 0; i < n_m; i++) {
|
||||||
|
hp = ho[i];
|
||||||
|
h = ho[i + m];
|
||||||
|
den_val = hp - h;
|
||||||
|
|
||||||
|
if (den_val == 0.0) {
|
||||||
|
fprintf(stderr, "failure in polint for point %g\n", x);
|
||||||
|
fprintf(stderr, "with input points xa: ");
|
||||||
|
for (int t = 0; t < ordn; t++) fprintf(stderr, "%g ", xa[t]);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
den_val = (c[i + 1] - d[i]) / den_val;
|
||||||
|
d[i] = h * den_val;
|
||||||
|
c[i] = hp * den_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fortran: if (2*ns < n_m) then dy=c(ns+1) else dy=d(ns); ns=ns-1
|
||||||
|
// Here ns is C-indexed and can be -1; logic still matches.
|
||||||
|
if (2 * ns < n_m) {
|
||||||
|
*dy = c[ns + 1];
|
||||||
|
} else {
|
||||||
|
*dy = d[ns];
|
||||||
|
ns -= 1;
|
||||||
|
}
|
||||||
|
*y += *dy;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(c);
|
||||||
|
free(d);
|
||||||
|
free(ho);
|
||||||
|
}
|
||||||
|
|
||||||
|
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
|
||||||
|
const double *ya, double x1, double x2, double x3,
|
||||||
|
double &y, double *dy, int ordn)
|
||||||
|
{
|
||||||
|
// ya is ordn x ordn x ordn in Fortran layout (column-major)
|
||||||
|
#define YA3(i,j,k) ya[(i) + ordn*((j) + ordn*(k))] // i,j,k: 0..ordn-1
|
||||||
|
|
||||||
|
int j, k;
|
||||||
|
double dy_temp;
|
||||||
|
|
||||||
|
// yatmp(j,k) in Fortran code is ordn x ordn, treat column-major:
|
||||||
|
// yatmp(j,k) -> yatmp[j + ordn*k]
|
||||||
|
double *yatmp = (double*)malloc((size_t)ordn * (size_t)ordn * sizeof(double));
|
||||||
|
double *ymtmp = (double*)malloc((size_t)ordn * sizeof(double));
|
||||||
|
if (!yatmp || !ymtmp) {
|
||||||
|
fprintf(stderr, "polin3: malloc failed\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#define YAT(j,k) yatmp[(j) + ordn*(k)]
|
||||||
|
|
||||||
|
for (k = 0; k < ordn; k++) {
|
||||||
|
for (j = 0; j < ordn; j++) {
|
||||||
|
// call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp)
|
||||||
|
// ya(:,j,k) contiguous: base is &YA3(0,j,k)
|
||||||
|
xh_polint(x1a, &YA3(0, j, k), x1, &YAT(j, k), &dy_temp, ordn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k = 0; k < ordn; k++) {
|
||||||
|
// call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp)
|
||||||
|
xh_polint(x2a, &YAT(0, k), x2, &ymtmp[k], &dy_temp, ordn);
|
||||||
|
}
|
||||||
|
|
||||||
|
xh_polint(x3a, ymtmp, x3, &y, dy, ordn);
|
||||||
|
|
||||||
|
#undef YAT
|
||||||
|
free(yatmp);
|
||||||
|
free(ymtmp);
|
||||||
|
#undef YA3
|
||||||
|
}
|
||||||
338
AMSS_NCKU_source/xh_share_func.h
Normal file
338
AMSS_NCKU_source/xh_share_func.h
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
#ifndef SHARE_FUNC_H
|
||||||
|
#define SHARE_FUNC_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <omp.h>
|
||||||
|
/* 主网格:0-based -> 1D */
|
||||||
|
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1];
|
||||||
|
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
|
||||||
|
* ord=2 => shift=1
|
||||||
|
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex)
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 1;
|
||||||
|
const int nx = ex[0] + 2; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 2;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..ex1+1
|
||||||
|
const int jj = jF + shift; // 0..ex2+1
|
||||||
|
const int kk = kF + shift; // 0..ex3+1
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
|
||||||
|
* ord=3 => shift=2
|
||||||
|
* iF/jF/kF 是 Fortran 索引(可为负)
|
||||||
|
*/
|
||||||
|
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
|
||||||
|
const int shift = 2; // ord=3 -> -2..ex
|
||||||
|
const int nx = ex[0] + 3; // ex1 + ord
|
||||||
|
const int ny = ex[1] + 3;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..ex1+2
|
||||||
|
const int jj = jF + shift; // 0..ex2+2
|
||||||
|
const int kk = kF + shift; // 0..ex3+2
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
|
||||||
|
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
|
||||||
|
*
|
||||||
|
* C 里我们把:
|
||||||
|
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
|
||||||
|
* funcc 用“平移下标”存为一维数组:
|
||||||
|
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
|
||||||
|
* 总长度 nx = extc1 + ord
|
||||||
|
* 同理 ny = extc2 + ord, nz = extc3 + ord
|
||||||
|
*/
|
||||||
|
|
||||||
|
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
|
||||||
|
const int nx = extc[0], ny = extc[1];
|
||||||
|
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
|
||||||
|
const int shift = ord - 1; // iF = -shift .. extc1
|
||||||
|
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
|
||||||
|
const int ny = extc[1] + ord;
|
||||||
|
|
||||||
|
const int ii = iF + shift; // 0..extc1+shift
|
||||||
|
const int jj = jF + shift; // 0..extc2+shift
|
||||||
|
const int kk = kF + shift; // 0..extc3+shift
|
||||||
|
|
||||||
|
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 等价于 Fortran:
|
||||||
|
* funcc(1:extc1,1:extc2,1:extc3)=func
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
|
||||||
|
* enddo
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
|
||||||
|
* enddo
|
||||||
|
* do i=0,ord-1
|
||||||
|
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
|
||||||
|
* enddo
|
||||||
|
*/
|
||||||
|
static inline void symmetry_bd(int ord,
|
||||||
|
const int extc[3],
|
||||||
|
const double *func,
|
||||||
|
double *funcc,
|
||||||
|
const double SoA[3])
|
||||||
|
{
|
||||||
|
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
|
||||||
|
|
||||||
|
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
|
||||||
|
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
|
||||||
|
for (int k0 = 0; k0 < extc3; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < extc2; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < extc1; ++i0) {
|
||||||
|
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
|
||||||
|
for (int ii = 0; ii <= ord - 1; ++ii) {
|
||||||
|
const int iF_dst = -ii; // 0, -1, -2, ...
|
||||||
|
const int iF_src = ii + 1; // 1, 2, 3, ...
|
||||||
|
for (int kF = 1; kF <= extc3; ++kF) {
|
||||||
|
for (int jF = 1; jF <= extc2; ++jF) {
|
||||||
|
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
|
||||||
|
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
|
||||||
|
for (int jj = 0; jj <= ord - 1; ++jj) {
|
||||||
|
const int jF_dst = -jj;
|
||||||
|
const int jF_src = jj + 1;
|
||||||
|
for (int kF = 1; kF <= extc3; ++kF) {
|
||||||
|
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||||
|
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
|
||||||
|
for (int kk = 0; kk <= ord - 1; ++kk) {
|
||||||
|
const int kF_dst = -kk;
|
||||||
|
const int kF_src = kk + 1;
|
||||||
|
for (int jF = -ord + 1; jF <= extc2; ++jF) {
|
||||||
|
for (int iF = -ord + 1; iF <= extc1; ++iF) {
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
|
||||||
|
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* 你已有的函数:idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
|
||||||
|
static inline void fdderivs_xh(
|
||||||
|
int i0, int j0, int k0,
|
||||||
|
const int ex[3],
|
||||||
|
const double *fh,
|
||||||
|
int iminF, int jminF, int kminF,
|
||||||
|
int imaxF, int jmaxF, int kmaxF,
|
||||||
|
double Fdxdx, double Fdydy, double Fdzdz,
|
||||||
|
double Fdxdy, double Fdxdz, double Fdydz,
|
||||||
|
double Sdxdx, double Sdydy, double Sdzdz,
|
||||||
|
double Sdxdy, double Sdxdz, double Sdydz,
|
||||||
|
double *fxx, double *fxy, double *fxz,
|
||||||
|
double *fyy, double *fyz, double *fzz
|
||||||
|
){
|
||||||
|
const double F8 = 8.0;
|
||||||
|
const double F16 = 16.0;
|
||||||
|
const double F30 = 30.0;
|
||||||
|
const double TWO = 2.0;
|
||||||
|
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
/* 高阶分支:i±2,j±2,k±2 都在范围内 */
|
||||||
|
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
||||||
|
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
||||||
|
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Fdxdx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Fdydy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Fdzdz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
|
||||||
|
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
/* fxy 高阶 */
|
||||||
|
{
|
||||||
|
const double t_jm2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jm1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
|
||||||
|
|
||||||
|
const double t_jp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
|
||||||
|
|
||||||
|
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fxz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fyz 高阶 */
|
||||||
|
{
|
||||||
|
const double t_km2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
|
||||||
|
|
||||||
|
const double t_km1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp1 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
|
||||||
|
|
||||||
|
const double t_kp2 =
|
||||||
|
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
|
||||||
|
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
|
||||||
|
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
|
||||||
|
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
|
||||||
|
|
||||||
|
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* 二阶分支:i±1,j±1,k±1 在范围内 */
|
||||||
|
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
||||||
|
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
||||||
|
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
||||||
|
{
|
||||||
|
fxx[p] = Sdxdx * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyy[p] = Sdydy * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fzz[p] = Sdzdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
|
||||||
|
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxy[p] = Sdxdy * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fxz[p] = Sdxdz * (
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fyz[p] = Sdydz * (
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
|
||||||
|
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
|
||||||
|
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
27
AMSS_NCKU_source/xh_tool.h
Normal file
27
AMSS_NCKU_source/xh_tool.h
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#include "xh_share_func.h"
|
||||||
|
void fdderivs(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
double *fxx, double *fxy, double *fxz,
|
||||||
|
double *fyy, double *fyz, double *fzz,
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
double SYM1, double SYM2, double SYM3,
|
||||||
|
int Symmetry, int onoff);
|
||||||
|
|
||||||
|
void fderivs(const int ex[3],
|
||||||
|
const double *f,
|
||||||
|
double *fx, double *fy, double *fz,
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
double SYM1, double SYM2, double SYM3,
|
||||||
|
int Symmetry, int onoff);
|
||||||
|
|
||||||
|
void kodis(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double SoA[3],
|
||||||
|
int Symmetry, double eps);
|
||||||
|
|
||||||
|
void lopsided(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
|
int Symmetry, const double SoA[3]);
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,195 +1,195 @@
|
|||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
##
|
##
|
||||||
## Generate input file for the AMSS-NCKU TwoPuncture routine
|
## Generate input file for the AMSS-NCKU TwoPuncture routine
|
||||||
## Author: Xiaoqu
|
## Author: Xiaoqu
|
||||||
## 2024/11/27
|
## 2024/11/27
|
||||||
## Modified: 2025/01/21
|
## Modified: 2025/01/21
|
||||||
##
|
##
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import os
|
import os
|
||||||
import AMSS_NCKU_Input as input_data ## import program input file
|
import AMSS_NCKU_Input as input_data ## import program input file
|
||||||
import math
|
import math
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Import binary black hole coordinates
|
## Import binary black hole coordinates
|
||||||
|
|
||||||
## If puncture data are set to "Automatically-BBH", compute initial orbital
|
## If puncture data are set to "Automatically-BBH", compute initial orbital
|
||||||
## positions and momenta according to the settings and rescale the total
|
## positions and momenta according to the settings and rescale the total
|
||||||
## binary mass to M = 1 for TwoPuncture input.
|
## binary mass to M = 1 for TwoPuncture input.
|
||||||
|
|
||||||
if (input_data.puncture_data_set == "Automatically-BBH" ):
|
if (input_data.puncture_data_set == "Automatically-BBH" ):
|
||||||
|
|
||||||
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
|
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
|
||||||
|
|
||||||
if ( mass_ratio_Q < 1.0 ):
|
if ( mass_ratio_Q < 1.0 ):
|
||||||
print( " mass_ratio setting is wrong, please reset!!!" )
|
print( " mass_ratio setting is wrong, please reset!!!" )
|
||||||
print( " set the first black hole to be the larger mass!!!" )
|
print( " set the first black hole to be the larger mass!!!" )
|
||||||
|
|
||||||
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
|
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
|
||||||
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
|
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
|
||||||
|
|
||||||
## Load binary separation and eccentricity
|
## Load binary separation and eccentricity
|
||||||
distance = input_data.Distance
|
distance = input_data.Distance
|
||||||
e0 = input_data.e0
|
e0 = input_data.e0
|
||||||
|
|
||||||
## Set binary component coordinates
|
## Set binary component coordinates
|
||||||
## Note: place the larger-mass black hole at positive y and the
|
## Note: place the larger-mass black hole at positive y and the
|
||||||
## smaller-mass black hole at negative y to follow Brugmann's convention
|
## smaller-mass black hole at negative y to follow Brugmann's convention
|
||||||
## Coordinate convention for TwoPuncture input (Brugmann):
|
## Coordinate convention for TwoPuncture input (Brugmann):
|
||||||
## -----0-----> y
|
## -----0-----> y
|
||||||
## - +
|
## - +
|
||||||
|
|
||||||
|
|
||||||
BBH_X1 = 0.0
|
BBH_X1 = 0.0
|
||||||
BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q )
|
BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q )
|
||||||
BBH_Z1 = 0.0
|
BBH_Z1 = 0.0
|
||||||
|
|
||||||
BBH_X2 = 0.0
|
BBH_X2 = 0.0
|
||||||
BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q )
|
BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q )
|
||||||
BBH_Z2 = 0.0
|
BBH_Z2 = 0.0
|
||||||
|
|
||||||
position_BH = numpy.zeros( (2,3) )
|
position_BH = numpy.zeros( (2,3) )
|
||||||
position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1]
|
position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1]
|
||||||
position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2]
|
position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2]
|
||||||
|
|
||||||
## Optionally load momentum from parameter file
|
## Optionally load momentum from parameter file
|
||||||
## momentum_BH = input_data.momentum_BH
|
## momentum_BH = input_data.momentum_BH
|
||||||
|
|
||||||
## Compute orbital momenta using the BBH_orbit_parameter module
|
## Compute orbital momenta using the BBH_orbit_parameter module
|
||||||
import BBH_orbit_parameter
|
import BBH_orbit_parameter
|
||||||
|
|
||||||
## Use the dimensionless spins defined in BBH_orbit_parameter
|
## Use the dimensionless spins defined in BBH_orbit_parameter
|
||||||
BBH_S1 = BBH_orbit_parameter.S1
|
BBH_S1 = BBH_orbit_parameter.S1
|
||||||
BBH_S2 = BBH_orbit_parameter.S2
|
BBH_S2 = BBH_orbit_parameter.S2
|
||||||
|
|
||||||
momentum_BH = numpy.zeros( (2,3) )
|
momentum_BH = numpy.zeros( (2,3) )
|
||||||
|
|
||||||
## Compute initial orbital momenta from post-Newtonian-based routine
|
## Compute initial orbital momenta from post-Newtonian-based routine
|
||||||
momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 )
|
momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 )
|
||||||
|
|
||||||
## Set spin angular momentum input for TwoPuncture
|
## Set spin angular momentum input for TwoPuncture
|
||||||
## Note: these are dimensional angular momenta (not dimensionless); multiply
|
## Note: these are dimensional angular momenta (not dimensionless); multiply
|
||||||
## by the square of the mass scale. Here masses are scaled so total M=1.
|
## by the square of the mass scale. Here masses are scaled so total M=1.
|
||||||
## angular_momentum_BH = input_data.angular_momentum_BH
|
## angular_momentum_BH = input_data.angular_momentum_BH
|
||||||
|
|
||||||
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
|
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
|
||||||
|
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
|
|
||||||
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
||||||
if i==0:
|
if i==0:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ]
|
||||||
elif i==1:
|
elif i==1:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ]
|
||||||
else:
|
else:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
|
||||||
|
|
||||||
elif ( input_data.Symmetry == "no-symmetry" ):
|
elif ( input_data.Symmetry == "no-symmetry" ):
|
||||||
|
|
||||||
if i==0:
|
if i==0:
|
||||||
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
||||||
elif i==1:
|
elif i==1:
|
||||||
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
||||||
else:
|
else:
|
||||||
angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
|
||||||
|
|
||||||
#######################################################
|
#######################################################
|
||||||
|
|
||||||
## If puncture data are set to "Manually", read initial positions and momenta
|
## If puncture data are set to "Manually", read initial positions and momenta
|
||||||
## directly from the parameter file. Rescale the total binary mass to M=1
|
## directly from the parameter file. Rescale the total binary mass to M=1
|
||||||
## for TwoPuncture input.
|
## for TwoPuncture input.
|
||||||
|
|
||||||
elif (input_data.puncture_data_set == "Manually" ):
|
elif (input_data.puncture_data_set == "Manually" ):
|
||||||
|
|
||||||
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
|
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
|
||||||
|
|
||||||
if ( mass_ratio_Q < 1.0 ):
|
if ( mass_ratio_Q < 1.0 ):
|
||||||
print( " mass_ratio setting is wrong, please reset!!!" )
|
print( " mass_ratio setting is wrong, please reset!!!" )
|
||||||
print( " set the first black hole to be the larger mass!!!" )
|
print( " set the first black hole to be the larger mass!!!" )
|
||||||
|
|
||||||
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
|
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
|
||||||
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
|
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
|
||||||
|
|
||||||
parameter_BH = input_data.parameter_BH
|
parameter_BH = input_data.parameter_BH
|
||||||
position_BH = input_data.position_BH
|
position_BH = input_data.position_BH
|
||||||
momentum_BH = input_data.momentum_BH
|
momentum_BH = input_data.momentum_BH
|
||||||
|
|
||||||
## Compute binary separation and load eccentricity
|
## Compute binary separation and load eccentricity
|
||||||
distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 )
|
distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 )
|
||||||
e0 = input_data.e0
|
e0 = input_data.e0
|
||||||
|
|
||||||
## Set spin angular momentum input for TwoPuncture
|
## Set spin angular momentum input for TwoPuncture
|
||||||
## Note: these are dimensional angular momenta (not dimensionless); multiply
|
## Note: these are dimensional angular momenta (not dimensionless); multiply
|
||||||
## by the square of the mass scale. Here masses are scaled so total M=1.
|
## by the square of the mass scale. Here masses are scaled so total M=1.
|
||||||
|
|
||||||
## angular_momentum_BH = input_data.angular_momentum_BH
|
## angular_momentum_BH = input_data.angular_momentum_BH
|
||||||
|
|
||||||
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
|
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
|
||||||
|
|
||||||
|
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
|
|
||||||
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
||||||
if i==0:
|
if i==0:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ]
|
||||||
elif i==1:
|
elif i==1:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ]
|
||||||
else:
|
else:
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ]
|
||||||
|
|
||||||
elif ( input_data.Symmetry == "no-symmetry" ):
|
elif ( input_data.Symmetry == "no-symmetry" ):
|
||||||
if i==0:
|
if i==0:
|
||||||
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
|
||||||
elif i==1:
|
elif i==1:
|
||||||
angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i]
|
||||||
else:
|
else:
|
||||||
angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Write the above binary data into the AMSS-NCKU TwoPuncture input file
|
## Write the above binary data into the AMSS-NCKU TwoPuncture input file
|
||||||
|
|
||||||
def generate_AMSSNCKU_TwoPuncture_input():
|
def generate_AMSSNCKU_TwoPuncture_input():
|
||||||
|
|
||||||
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w")
|
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w")
|
||||||
|
|
||||||
print( "# -----0-----> y", file=file1 )
|
print( "# -----0-----> y", file=file1 )
|
||||||
print( "# - + use Brugmann's convention", file=file1 )
|
print( "# - + use Brugmann's convention", file=file1 )
|
||||||
print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically
|
print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically
|
||||||
print( "ABE::mm = -1.0", file=file1 )
|
print( "ABE::mm = -1.0", file=file1 )
|
||||||
print( "# b = D/2", file=file1 )
|
print( "# b = D/2", file=file1 )
|
||||||
print( "ABE::b = ", ( distance / 2.0 ), file=file1 )
|
print( "ABE::b = ", ( distance / 2.0 ), file=file1 )
|
||||||
print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 )
|
print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 )
|
||||||
print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 )
|
print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 )
|
||||||
print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 )
|
print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 )
|
||||||
print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 )
|
print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 )
|
||||||
print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 )
|
print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 )
|
||||||
print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 )
|
print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 )
|
||||||
print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 )
|
print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 )
|
||||||
print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 )
|
print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 )
|
||||||
print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 )
|
print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 )
|
||||||
print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 )
|
print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 )
|
||||||
print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 )
|
print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 )
|
||||||
print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 )
|
print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 )
|
||||||
print( "ABE::Mp = ", BBH_M1, file=file1 )
|
print( "ABE::Mp = ", BBH_M1, file=file1 )
|
||||||
print( "ABE::Mm = ", BBH_M2, file=file1 )
|
print( "ABE::Mm = ", BBH_M2, file=file1 )
|
||||||
print( "ABE::admtol = 1.e-8", file=file1 )
|
print( "ABE::admtol = 1.e-8", file=file1 )
|
||||||
print( "ABE::Newtontol = 5.e-12", file=file1 )
|
print( "ABE::Newtontol = 5.e-12", file=file1 )
|
||||||
print( "ABE::nA = 50", file=file1 )
|
print( "ABE::nA = 50", file=file1 )
|
||||||
print( "ABE::nB = 50", file=file1 )
|
print( "ABE::nB = 50", file=file1 )
|
||||||
print( "ABE::nphi = 26", file=file1 )
|
print( "ABE::nphi = 26", file=file1 )
|
||||||
print( "ABE::Newtonmaxit = 50", file=file1 )
|
print( "ABE::Newtonmaxit = 50", file=file1 )
|
||||||
|
|
||||||
file1.close()
|
file1.close()
|
||||||
|
|
||||||
return file1
|
return file1
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,72 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""Convert interp_lb_profile.bin to a C header for compile-time embedding."""
|
|
||||||
import struct, sys
|
|
||||||
|
|
||||||
if len(sys.argv) < 3:
|
|
||||||
print(f"Usage: {sys.argv[0]} <profile.bin> <output.h>")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
with open(sys.argv[1], 'rb') as f:
|
|
||||||
magic, version, nprocs, num_heavy = struct.unpack('IIii', f.read(16))
|
|
||||||
threshold = struct.unpack('d', f.read(8))[0]
|
|
||||||
times = list(struct.unpack(f'{nprocs}d', f.read(nprocs * 8)))
|
|
||||||
heavy = list(struct.unpack(f'{num_heavy}i', f.read(num_heavy * 4)))
|
|
||||||
|
|
||||||
# For each heavy rank, compute split: left half -> lighter neighbor, right half -> heavy rank
|
|
||||||
# (or vice versa depending on which neighbor is lighter)
|
|
||||||
splits = []
|
|
||||||
for hr in heavy:
|
|
||||||
prev_t = times[hr - 1] if hr > 0 else 1e30
|
|
||||||
next_t = times[hr + 1] if hr < nprocs - 1 else 1e30
|
|
||||||
if prev_t <= next_t:
|
|
||||||
splits.append((hr, hr - 1, hr)) # (block_id, r_left, r_right)
|
|
||||||
else:
|
|
||||||
splits.append((hr, hr, hr + 1))
|
|
||||||
|
|
||||||
# Also remap the displaced neighbor blocks
|
|
||||||
remaps = {}
|
|
||||||
for hr, r_l, r_r in splits:
|
|
||||||
if r_l != hr:
|
|
||||||
# We took r_l's slot, so remap block r_l to its other neighbor
|
|
||||||
displaced = r_l
|
|
||||||
if displaced > 0 and displaced - 1 not in [s[0] for s in splits]:
|
|
||||||
remaps[displaced] = displaced - 1
|
|
||||||
elif displaced < nprocs - 1:
|
|
||||||
remaps[displaced] = displaced + 1
|
|
||||||
else:
|
|
||||||
displaced = r_r
|
|
||||||
if displaced < nprocs - 1 and displaced + 1 not in [s[0] for s in splits]:
|
|
||||||
remaps[displaced] = displaced + 1
|
|
||||||
elif displaced > 0:
|
|
||||||
remaps[displaced] = displaced - 1
|
|
||||||
|
|
||||||
with open(sys.argv[2], 'w') as out:
|
|
||||||
out.write("/* Auto-generated from interp_lb_profile.bin — do not edit */\n")
|
|
||||||
out.write("#ifndef INTERP_LB_PROFILE_DATA_H\n")
|
|
||||||
out.write("#define INTERP_LB_PROFILE_DATA_H\n\n")
|
|
||||||
out.write(f"#define INTERP_LB_NPROCS {nprocs}\n")
|
|
||||||
out.write(f"#define INTERP_LB_NUM_HEAVY {num_heavy}\n\n")
|
|
||||||
out.write(f"static const int interp_lb_heavy_blocks[{num_heavy}] = {{")
|
|
||||||
out.write(", ".join(str(h) for h in heavy))
|
|
||||||
out.write("};\n\n")
|
|
||||||
out.write("/* Split table: {block_id, r_left, r_right} */\n")
|
|
||||||
out.write(f"static const int interp_lb_splits[{num_heavy}][3] = {{\n")
|
|
||||||
for bid, rl, rr in splits:
|
|
||||||
out.write(f" {{{bid}, {rl}, {rr}}},\n")
|
|
||||||
out.write("};\n\n")
|
|
||||||
out.write("/* Rank remap for displaced neighbor blocks */\n")
|
|
||||||
out.write(f"static const int interp_lb_num_remaps = {len(remaps)};\n")
|
|
||||||
out.write(f"static const int interp_lb_remaps[][2] = {{\n")
|
|
||||||
for src, dst in sorted(remaps.items()):
|
|
||||||
out.write(f" {{{src}, {dst}}},\n")
|
|
||||||
if not remaps:
|
|
||||||
out.write(" {-1, -1},\n")
|
|
||||||
out.write("};\n\n")
|
|
||||||
out.write("#endif /* INTERP_LB_PROFILE_DATA_H */\n")
|
|
||||||
|
|
||||||
print(f"Generated {sys.argv[2]}:")
|
|
||||||
print(f" {num_heavy} heavy blocks to split: {heavy}")
|
|
||||||
for bid, rl, rr in splits:
|
|
||||||
print(f" block {bid}: split -> rank {rl} (left), rank {rr} (right)")
|
|
||||||
for src, dst in sorted(remaps.items()):
|
|
||||||
print(f" block {src}: remap -> rank {dst}")
|
|
||||||
1118
generate_macrodef.py
1118
generate_macrodef.py
File diff suppressed because it is too large
Load Diff
@@ -1,220 +1,192 @@
|
|||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
##
|
##
|
||||||
## This file defines the commands used to build and run AMSS-NCKU
|
## This file defines the commands used to build and run AMSS-NCKU
|
||||||
## Author: Xiaoqu
|
## Author: Xiaoqu
|
||||||
## 2025/01/24
|
## 2025/01/24
|
||||||
##
|
##
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
## CPU core binding configuration using taskset
|
||||||
|
## taskset ensures all child processes inherit the CPU affinity mask
|
||||||
def get_last_n_cores_per_socket(n=32):
|
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
||||||
"""
|
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
||||||
Read CPU topology via lscpu and return a taskset -c string
|
#NUMACTL_CPU_BIND = "taskset -c 0-111"
|
||||||
selecting the last `n` cores of each NUMA node (socket).
|
NUMACTL_CPU_BIND = "taskset -c 0-47"
|
||||||
|
NUMACTL_CPU_BIND2 = "OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47} taskset -c 0-47"
|
||||||
Example: 2 sockets x 56 cores each, n=32 -> node0: 24-55, node1: 80-111
|
#NUMACTL_CPU_BIND2 = "taskset -c 0-1"
|
||||||
-> "taskset -c 24-55,80-111"
|
## Build parallelism configuration
|
||||||
"""
|
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
||||||
result = subprocess.run(["lscpu", "--parse=NODE,CPU"], capture_output=True, text=True)
|
## Set make -j to utilize available cores for faster builds
|
||||||
|
BUILD_JOBS = 32
|
||||||
# Build a dict: node_id -> sorted list of CPU ids
|
|
||||||
node_cpus = {}
|
|
||||||
for line in result.stdout.splitlines():
|
##################################################################
|
||||||
if line.startswith("#") or not line.strip():
|
|
||||||
continue
|
|
||||||
parts = line.split(",")
|
##################################################################
|
||||||
if len(parts) < 2:
|
|
||||||
continue
|
## Compile the AMSS-NCKU main program ABE
|
||||||
node_id, cpu_id = int(parts[0]), int(parts[1])
|
|
||||||
node_cpus.setdefault(node_id, []).append(cpu_id)
|
def makefile_ABE():
|
||||||
|
|
||||||
segments = []
|
print( )
|
||||||
for node_id in sorted(node_cpus):
|
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||||
cpus = sorted(node_cpus[node_id])
|
print( )
|
||||||
selected = cpus[-n:] # last n cores of this socket
|
|
||||||
segments.append(f"{selected[0]}-{selected[-1]}")
|
## Build command with CPU binding to nohz_full cores
|
||||||
|
if (input_data.GPU_Calculation == "no"):
|
||||||
cpu_str = ",".join(segments)
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
|
||||||
total = len(segments) * n
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
print(f" CPU binding: taskset -c {cpu_str} ({total} cores, last {n} per socket)")
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
||||||
return f"taskset -c {cpu_str}"
|
else:
|
||||||
|
print( " CPU/GPU numerical calculation setting is wrong " )
|
||||||
|
print( )
|
||||||
## CPU core binding: dynamically select the last 32 cores of each socket (64 cores total)
|
|
||||||
NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
|
## Execute the command with subprocess.Popen and stream output
|
||||||
|
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||||
## Build parallelism: match the number of bound cores
|
|
||||||
BUILD_JOBS = 64
|
## Read and print output lines as they arrive
|
||||||
|
for line in makefile_process.stdout:
|
||||||
|
print(line, end='') # stream output in real time
|
||||||
##################################################################
|
|
||||||
|
## Wait for the process to finish
|
||||||
|
makefile_return_code = makefile_process.wait()
|
||||||
|
if makefile_return_code != 0:
|
||||||
##################################################################
|
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
|
||||||
|
|
||||||
## Compile the AMSS-NCKU main program ABE
|
print( )
|
||||||
|
print( " Compilation of the AMSS-NCKU executable file ABE is finished " )
|
||||||
def makefile_ABE():
|
print( )
|
||||||
|
|
||||||
print( )
|
return
|
||||||
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
|
|
||||||
print( )
|
##################################################################
|
||||||
|
|
||||||
## Build command with CPU binding to nohz_full cores
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=optimize ABE"
|
##################################################################
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
||||||
else:
|
|
||||||
print( " CPU/GPU numerical calculation setting is wrong " )
|
def makefile_TwoPunctureABE():
|
||||||
print( )
|
|
||||||
|
print( )
|
||||||
## Execute the command with subprocess.Popen and stream output
|
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
|
||||||
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
print( )
|
||||||
|
|
||||||
## Read and print output lines as they arrive
|
## Build command with CPU binding to nohz_full cores
|
||||||
for line in makefile_process.stdout:
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
|
||||||
print(line, end='') # stream output in real time
|
|
||||||
|
## Execute the command with subprocess.Popen and stream output
|
||||||
## Wait for the process to finish
|
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||||
makefile_return_code = makefile_process.wait()
|
|
||||||
if makefile_return_code != 0:
|
## Read and print output lines as they arrive
|
||||||
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
|
for line in makefile_process.stdout:
|
||||||
|
print(line, end='') # stream output in real time
|
||||||
print( )
|
|
||||||
print( " Compilation of the AMSS-NCKU executable file ABE is finished " )
|
## Wait for the process to finish
|
||||||
print( )
|
makefile_return_code = makefile_process.wait()
|
||||||
|
if makefile_return_code != 0:
|
||||||
return
|
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
|
||||||
|
|
||||||
##################################################################
|
print( )
|
||||||
|
print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " )
|
||||||
|
print( )
|
||||||
|
|
||||||
##################################################################
|
return
|
||||||
|
|
||||||
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
##################################################################
|
||||||
|
|
||||||
def makefile_TwoPunctureABE():
|
|
||||||
|
|
||||||
print( )
|
##################################################################
|
||||||
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
|
|
||||||
print( )
|
## Run the AMSS-NCKU main program ABE
|
||||||
|
|
||||||
## Build command with CPU binding to nohz_full cores
|
def run_ABE():
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
|
|
||||||
|
print( )
|
||||||
## Execute the command with subprocess.Popen and stream output
|
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
||||||
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
print( )
|
||||||
|
|
||||||
## Read and print output lines as they arrive
|
## Define the command to run; cast other values to strings as needed
|
||||||
for line in makefile_process.stdout:
|
|
||||||
print(line, end='') # stream output in real time
|
if (input_data.GPU_Calculation == "no"):
|
||||||
|
#mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
## Wait for the process to finish
|
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
makefile_return_code = makefile_process.wait()
|
mpi_command = """ OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES=cores mpirun -np 1 --cpu-bind=sockets ./ABE """
|
||||||
if makefile_return_code != 0:
|
mpi_command_outfile = "ABE_out.log"
|
||||||
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
|
mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||||
print( )
|
mpi_command_outfile = "ABEGPU_out.log"
|
||||||
print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " )
|
|
||||||
print( )
|
## Execute the MPI command and stream output
|
||||||
|
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||||
return
|
|
||||||
|
## Write ABE run output to file while printing to stdout
|
||||||
##################################################################
|
with open(mpi_command_outfile, 'w') as file0:
|
||||||
|
## Read and print output lines; also write each line to file
|
||||||
|
for line in mpi_process.stdout:
|
||||||
|
print(line, end='') # stream output in real time
|
||||||
##################################################################
|
file0.write(line) # write the line to file
|
||||||
|
file0.flush() # flush to ensure each line is written immediately (optional)
|
||||||
## Run the AMSS-NCKU main program ABE
|
file0.close()
|
||||||
|
|
||||||
def run_ABE():
|
## Wait for the process to finish
|
||||||
|
mpi_return_code = mpi_process.wait()
|
||||||
print( )
|
|
||||||
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
|
print( )
|
||||||
print( )
|
print( " The ABE/ABEGPU simulation is finished " )
|
||||||
|
print( )
|
||||||
## Define the command to run; cast other values to strings as needed
|
|
||||||
|
return
|
||||||
if (input_data.GPU_Calculation == "no"):
|
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
##################################################################
|
||||||
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
|
||||||
mpi_command_outfile = "ABE_out.log"
|
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
##################################################################
|
||||||
mpi_command_outfile = "ABEGPU_out.log"
|
|
||||||
|
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
||||||
## Execute the MPI command and stream output
|
|
||||||
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
def run_TwoPunctureABE():
|
||||||
|
tp_time1=time.time()
|
||||||
## Write ABE run output to file while printing to stdout
|
print( )
|
||||||
with open(mpi_command_outfile, 'w') as file0:
|
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
||||||
## Read and print output lines; also write each line to file
|
print( )
|
||||||
for line in mpi_process.stdout:
|
|
||||||
print(line, end='') # stream output in real time
|
## Define the command to run
|
||||||
file0.write(line) # write the line to file
|
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
||||||
file0.flush() # flush to ensure each line is written immediately (optional)
|
TwoPuncture_command = " ./TwoPunctureABE"
|
||||||
file0.close()
|
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
||||||
|
|
||||||
## Wait for the process to finish
|
## Execute the command with subprocess.Popen and stream output
|
||||||
mpi_return_code = mpi_process.wait()
|
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
||||||
|
|
||||||
print( )
|
## Write TwoPunctureABE run output to file while printing to stdout
|
||||||
print( " The ABE/ABEGPU simulation is finished " )
|
with open(TwoPuncture_command_outfile, 'w') as file0:
|
||||||
print( )
|
## Read and print output lines; also write each line to file
|
||||||
|
for line in TwoPuncture_process.stdout:
|
||||||
return
|
print(line, end='') # stream output in real time
|
||||||
|
file0.write(line) # write the line to file
|
||||||
##################################################################
|
file0.flush() # flush to ensure each line is written immediately (optional)
|
||||||
|
file0.close()
|
||||||
|
|
||||||
|
## Wait for the process to finish
|
||||||
##################################################################
|
TwoPuncture_command_return_code = TwoPuncture_process.wait()
|
||||||
|
|
||||||
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
|
print( )
|
||||||
|
print( " The TwoPunctureABE simulation is finished " )
|
||||||
def run_TwoPunctureABE():
|
print( )
|
||||||
tp_time1=time.time()
|
tp_time2=time.time()
|
||||||
print( )
|
et=tp_time2-tp_time1
|
||||||
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
|
print(f"Used time: {et}")
|
||||||
print( )
|
return
|
||||||
|
|
||||||
## Define the command to run
|
##################################################################
|
||||||
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
|
|
||||||
TwoPuncture_command = " ./TwoPunctureABE"
|
|
||||||
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
|
|
||||||
|
|
||||||
## Execute the command with subprocess.Popen and stream output
|
|
||||||
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
|
|
||||||
|
|
||||||
## Write TwoPunctureABE run output to file while printing to stdout
|
|
||||||
with open(TwoPuncture_command_outfile, 'w') as file0:
|
|
||||||
## Read and print output lines; also write each line to file
|
|
||||||
for line in TwoPuncture_process.stdout:
|
|
||||||
print(line, end='') # stream output in real time
|
|
||||||
file0.write(line) # write the line to file
|
|
||||||
file0.flush() # flush to ensure each line is written immediately (optional)
|
|
||||||
file0.close()
|
|
||||||
|
|
||||||
## Wait for the process to finish
|
|
||||||
TwoPuncture_command_return_code = TwoPuncture_process.wait()
|
|
||||||
|
|
||||||
print( )
|
|
||||||
print( " The TwoPunctureABE simulation is finished " )
|
|
||||||
print( )
|
|
||||||
tp_time2=time.time()
|
|
||||||
et=tp_time2-tp_time1
|
|
||||||
print(f"Used time: {et}")
|
|
||||||
return
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
|
|
||||||
|
|||||||
1348
numerical_grid.py
1348
numerical_grid.py
File diff suppressed because it is too large
Load Diff
@@ -1,29 +0,0 @@
|
|||||||
import multiprocessing
|
|
||||||
|
|
||||||
def run_plot_task(task):
|
|
||||||
"""Execute a single plotting task.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
task : tuple
|
|
||||||
A tuple of (function, args_tuple) where function is a callable
|
|
||||||
plotting function and args_tuple contains its arguments.
|
|
||||||
"""
|
|
||||||
func, args = task
|
|
||||||
return func(*args)
|
|
||||||
|
|
||||||
|
|
||||||
def run_plot_tasks_parallel(plot_tasks):
|
|
||||||
"""Execute a list of independent plotting tasks in parallel.
|
|
||||||
|
|
||||||
Uses the 'fork' context to create worker processes so that the main
|
|
||||||
script is NOT re-imported/re-executed in child processes.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
plot_tasks : list of tuples
|
|
||||||
Each element is (function, args_tuple).
|
|
||||||
"""
|
|
||||||
ctx = multiprocessing.get_context('fork')
|
|
||||||
with ctx.Pool() as pool:
|
|
||||||
pool.map(run_plot_task, plot_tasks)
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -11,8 +11,6 @@
|
|||||||
import numpy ## numpy for array operations
|
import numpy ## numpy for array operations
|
||||||
import scipy ## scipy for interpolation and signal processing
|
import scipy ## scipy for interpolation and signal processing
|
||||||
import math
|
import math
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||||
import os ## os for system/file operations
|
import os ## os for system/file operations
|
||||||
|
|
||||||
|
|||||||
@@ -8,23 +8,16 @@
|
|||||||
##
|
##
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
## Restrict OpenMP to one thread per process so that running
|
|
||||||
## many workers in parallel does not create an O(workers * BLAS_threads)
|
|
||||||
## thread explosion. The variable MUST be set before numpy/scipy
|
|
||||||
## are imported, because the BLAS library reads them only at load time.
|
|
||||||
import os
|
|
||||||
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import scipy
|
import scipy
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from matplotlib.colors import LogNorm
|
from matplotlib.colors import LogNorm
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
## import torch
|
## import torch
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
|
|
||||||
@@ -199,19 +192,3 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
|
|||||||
|
|
||||||
####################################################################################
|
####################################################################################
|
||||||
|
|
||||||
|
|
||||||
####################################################################################
|
|
||||||
## Allow this module to be run as a standalone script so that each
|
|
||||||
## binary-data plot can be executed in a fresh subprocess whose BLAS
|
|
||||||
## environment variables (set above) take effect before numpy loads.
|
|
||||||
##
|
|
||||||
## Usage: python3 plot_binary_data.py <filename> <binary_outdir> <figure_outdir>
|
|
||||||
####################################################################################
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) != 4:
|
|
||||||
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
|
|
||||||
sys.exit(1)
|
|
||||||
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,8 +8,6 @@
|
|||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
import numpy ## numpy for array operations
|
import numpy ## numpy for array operations
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg') ## use non-interactive backend for multiprocessing safety
|
|
||||||
import matplotlib.pyplot as plt ## matplotlib for plotting
|
import matplotlib.pyplot as plt ## matplotlib for plotting
|
||||||
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
|
from mpl_toolkits.mplot3d import Axes3D ## needed for 3D plots
|
||||||
import glob
|
import glob
|
||||||
@@ -17,9 +15,6 @@ import os ## operating system utilities
|
|||||||
|
|
||||||
import plot_binary_data
|
import plot_binary_data
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
|
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
|
||||||
|
|
||||||
@@ -55,40 +50,10 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
|
|||||||
file_list.append(x)
|
file_list.append(x)
|
||||||
print(x)
|
print(x)
|
||||||
|
|
||||||
## Plot each file in parallel using subprocesses.
|
## Plot each file in the list
|
||||||
## Each subprocess is a fresh Python process where the BLAS thread-count
|
|
||||||
## environment variables (set at the top of plot_binary_data.py) take
|
|
||||||
## effect before numpy is imported. This avoids the thread explosion
|
|
||||||
## that occurs when multiprocessing.Pool with 'fork' context inherits
|
|
||||||
## already-initialized multi-threaded BLAS from the parent.
|
|
||||||
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
|
|
||||||
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
|
|
||||||
|
|
||||||
running = []
|
|
||||||
failed = []
|
|
||||||
for filename in file_list:
|
for filename in file_list:
|
||||||
print(filename)
|
print(filename)
|
||||||
proc = subprocess.Popen(
|
plot_binary_data.plot_binary_data(filename, binary_outdir, figure_outdir)
|
||||||
[sys.executable, script, filename, binary_outdir, figure_outdir],
|
|
||||||
)
|
|
||||||
running.append( (proc, filename) )
|
|
||||||
## Keep at most max_workers subprocesses active at a time
|
|
||||||
if len(running) >= max_workers:
|
|
||||||
p, fn = running.pop(0)
|
|
||||||
p.wait()
|
|
||||||
if p.returncode != 0:
|
|
||||||
failed.append(fn)
|
|
||||||
|
|
||||||
## Wait for all remaining subprocesses to finish
|
|
||||||
for p, fn in running:
|
|
||||||
p.wait()
|
|
||||||
if p.returncode != 0:
|
|
||||||
failed.append(fn)
|
|
||||||
|
|
||||||
if failed:
|
|
||||||
print( " WARNING: the following binary data plots failed:" )
|
|
||||||
for fn in failed:
|
|
||||||
print( " ", fn )
|
|
||||||
|
|
||||||
print( )
|
print( )
|
||||||
print( " Binary Data Plot Has been Finished " )
|
print( " Binary Data Plot Has been Finished " )
|
||||||
|
|||||||
@@ -1,133 +1,133 @@
|
|||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
##
|
##
|
||||||
## Update puncture parameters from TwoPuncture output
|
## Update puncture parameters from TwoPuncture output
|
||||||
## Author: Xiaoqu
|
## Author: Xiaoqu
|
||||||
## 2024/12/04
|
## 2024/12/04
|
||||||
##
|
##
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
import AMSS_NCKU_Input as input_data
|
import AMSS_NCKU_Input as input_data
|
||||||
import numpy
|
import numpy
|
||||||
import os
|
import os
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
def read_TwoPuncture_Output(Output_File_directory):
|
def read_TwoPuncture_Output(Output_File_directory):
|
||||||
|
|
||||||
dimensionless_mass_BH = numpy.zeros( input_data.puncture_number )
|
dimensionless_mass_BH = numpy.zeros( input_data.puncture_number )
|
||||||
bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole
|
bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole
|
||||||
position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole
|
position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole
|
||||||
momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole
|
momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole
|
||||||
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole
|
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole
|
||||||
|
|
||||||
# Read TwoPuncture output file
|
# Read TwoPuncture output file
|
||||||
data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") )
|
data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") )
|
||||||
# Ensure data is parsed as a 1-D array
|
# Ensure data is parsed as a 1-D array
|
||||||
data = data.reshape(-1)
|
data = data.reshape(-1)
|
||||||
|
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
|
|
||||||
## Read parameters for the first two punctures from TwoPuncture output
|
## Read parameters for the first two punctures from TwoPuncture output
|
||||||
## For additional punctures, read parameters from the input file
|
## For additional punctures, read parameters from the input file
|
||||||
if i<2:
|
if i<2:
|
||||||
bare_mass_BH[i] = data[12*i]
|
bare_mass_BH[i] = data[12*i]
|
||||||
dimensionless_mass_BH[i] = data[12*i+1]
|
dimensionless_mass_BH[i] = data[12*i+1]
|
||||||
position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ]
|
position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ]
|
||||||
momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ]
|
momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ]
|
||||||
angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ]
|
angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ]
|
||||||
else:
|
else:
|
||||||
dimensionless_mass_BH[i] = input_data.parameter_BH[i,0]
|
dimensionless_mass_BH[i] = input_data.parameter_BH[i,0]
|
||||||
bare_mass_BH[i] = input_data.parameter_BH[i,0]
|
bare_mass_BH[i] = input_data.parameter_BH[i,0]
|
||||||
position_BH[i] = input_data.position_BH[i]
|
position_BH[i] = input_data.position_BH[i]
|
||||||
momentum_BH[i] = input_data.momentum_BH[i]
|
momentum_BH[i] = input_data.momentum_BH[i]
|
||||||
## Read angular momentum according to symmetry
|
## Read angular momentum according to symmetry
|
||||||
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
if ( input_data.Symmetry == "equatorial-symmetry" ):
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
|
||||||
elif ( input_data.Symmetry == "no-symmetry" ):
|
elif ( input_data.Symmetry == "no-symmetry" ):
|
||||||
angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
|
||||||
|
|
||||||
return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH
|
return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
## Append the computed puncture information into the AMSS-NCKU input file
|
## Append the computed puncture information into the AMSS-NCKU input file
|
||||||
|
|
||||||
def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory):
|
def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory):
|
||||||
|
|
||||||
charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole
|
charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole
|
||||||
|
|
||||||
## If using Ansorg-TwoPuncture to solve the initial-data problem, read
|
## If using Ansorg-TwoPuncture to solve the initial-data problem, read
|
||||||
## bare masses, positions and angular momenta from TwoPuncture output
|
## bare masses, positions and angular momenta from TwoPuncture output
|
||||||
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
|
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
|
||||||
bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory)
|
bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory)
|
||||||
# set charge for each black hole
|
# set charge for each black hole
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1]
|
charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1]
|
||||||
|
|
||||||
## If using another method for initial data, read parameters directly from input
|
## If using another method for initial data, read parameters directly from input
|
||||||
else:
|
else:
|
||||||
position_BH = input_data.position_BH
|
position_BH = input_data.position_BH
|
||||||
momentum_BH = input_data.momentum_BH
|
momentum_BH = input_data.momentum_BH
|
||||||
## angular_momentum_BH = input_data.angular_momentum_BH
|
## angular_momentum_BH = input_data.angular_momentum_BH
|
||||||
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array
|
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array
|
||||||
mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array
|
mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array
|
||||||
|
|
||||||
## Set charge and spin angular momentum for each puncture
|
## Set charge and spin angular momentum for each puncture
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
|
|
||||||
if ( input_data.Symmetry == "octant-symmetry" ):
|
if ( input_data.Symmetry == "octant-symmetry" ):
|
||||||
mass_BH[i] = input_data.parameter_BH[i,0]
|
mass_BH[i] = input_data.parameter_BH[i,0]
|
||||||
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
|
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
|
||||||
elif ( input_data.Symmetry == "equatorial-symmetry" ):
|
elif ( input_data.Symmetry == "equatorial-symmetry" ):
|
||||||
mass_BH[i] = input_data.parameter_BH[i,0]
|
mass_BH[i] = input_data.parameter_BH[i,0]
|
||||||
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
|
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
|
||||||
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
|
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
|
||||||
elif ( input_data.Symmetry == "no-symmetry" ):
|
elif ( input_data.Symmetry == "no-symmetry" ):
|
||||||
mass_BH[i] = input_data.parameter_BH[i,0]
|
mass_BH[i] = input_data.parameter_BH[i,0]
|
||||||
angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
|
angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
|
||||||
charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1]
|
charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1]
|
||||||
|
|
||||||
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode
|
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode
|
||||||
|
|
||||||
## Output BSSN related settings
|
## Output BSSN related settings
|
||||||
|
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
print( "BSSN::chitiny = 1e-5", file=file1 )
|
print( "BSSN::chitiny = 1e-5", file=file1 )
|
||||||
print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 )
|
print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 )
|
||||||
print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 )
|
print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 )
|
||||||
|
|
||||||
for i in range(input_data.puncture_number):
|
for i in range(input_data.puncture_number):
|
||||||
|
|
||||||
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
|
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
|
||||||
print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 )
|
print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 )
|
||||||
else:
|
else:
|
||||||
print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 )
|
print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 )
|
||||||
|
|
||||||
print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 )
|
print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 )
|
||||||
print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 )
|
print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 )
|
||||||
print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 )
|
print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 )
|
||||||
print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 )
|
print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 )
|
||||||
print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 )
|
print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 )
|
||||||
print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 )
|
print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 )
|
||||||
print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 )
|
print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 )
|
||||||
print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 )
|
print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 )
|
||||||
print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 )
|
print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 )
|
||||||
print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 )
|
print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 )
|
||||||
|
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
|
|
||||||
file1.close()
|
file1.close()
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user