黄老板逆天重写

This commit is contained in:
wingrew
2026-03-01 05:48:40 +08:00
parent e09ae438a2
commit 19b0e79692
46 changed files with 85969 additions and 67883 deletions

12
.gitignore vendored
View File

@@ -1,6 +1,6 @@
__pycache__
GW150914
GW150914-origin
docs
*.tmp
__pycache__
GW150914
GW150914-origin
docs
*.tmp

4877
2.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation
MPI_processes = 2 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
@@ -50,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
Evolution_Step_Number = 6 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -49,32 +49,32 @@ import time
File_directory = os.path.join(input_data.File_directory)
## If the specified output directory exists, ask the user whether to continue
if os.path.exists(File_directory):
print( " Output dictionary has been existed !!! " )
print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
print( )
## Prompt whether to overwrite the existing directory
while True:
try:
inputvalue = input()
## If the user agrees to overwrite, proceed and remove the existing directory
if ( inputvalue == "continue" ):
print( " Continue the calculation !!! " )
print( )
break
## If the user chooses not to overwrite, exit and keep the existing directory
elif ( inputvalue == "stop" ):
print( " Stop the calculation !!! " )
sys.exit()
## If the user input is invalid, prompt again
else:
print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " )
except ValueError:
print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " )
# if os.path.exists(File_directory):
# print( " Output dictionary has been existed !!! " )
# print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
# print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
# print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
# print( )
# ## Prompt whether to overwrite the existing directory
# while True:
# try:
# inputvalue = input()
# ## If the user agrees to overwrite, proceed and remove the existing directory
# if ( inputvalue == "continue" ):
# print( " Continue the calculation !!! " )
# print( )
# break
# ## If the user chooses not to overwrite, exit and keep the existing directory
# elif ( inputvalue == "stop" ):
# print( " Stop the calculation !!! " )
# sys.exit()
# ## If the user input is invalid, prompt again
# else:
# print( " Please input your choice !!! " )
# print( " Input 'continue' or 'stop' in the terminal !!! " )
# except ValueError:
# print( " Please input your choice !!! " )
# print( " Input 'continue' or 'stop' in the terminal !!! " )
## Remove the existing output directory if present
shutil.rmtree(File_directory, ignore_errors=True)

View File

@@ -24,7 +24,7 @@ using namespace std;
#include "misc.h"
#include "macrodef.h"
#include <omp.h>
#ifndef ABEtype
#error "not define ABEtype"
#endif
@@ -69,8 +69,9 @@ int main(int argc, char *argv[])
double Begin_clock, End_clock;
if (myrank == 0)
{
{
Begin_clock = MPI_Wtime();
}
if (argc > 1)

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,7 @@ using namespace std;
#include "MPatch.h"
#include "Parallel.h"
#include "fmisc.h"
#include "xh_global_interp.h"
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{
@@ -394,7 +394,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
while (notfind && Bp) // run along Blocks
{
Block *BP = Bp->data;
bool flag = true;
for (int i = 0; i < dim; i++)
{
@@ -430,8 +429,10 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
@@ -441,6 +442,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
break;
Bp = Bp->next;
}
}
// Replace MPI_Allreduce with per-owner MPI_Bcast:
@@ -510,7 +512,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
// printf("here----\n");
// int zzz = 0;
int ordn = 2 * ghost_width;
MyList<var> *varl;
int num_var = 0;
@@ -529,30 +532,35 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++)
owner_rank[j] = -1;
double DH[dim], llb[dim], uub[dim];
double DH[dim];
for (int i = 0; i < dim; i++)
DH[i] = getdX(i);
// --- Interpolation phase (identical to original) ---
// printf("NN: %d, num_var = %d\n", NN, num_var);
#pragma omp parallel
{
#pragma omp for
for (int j = 0; j < NN; j++)
{
double pox[dim];
double pox[dim], llb[dim], uub[dim];
MyList<var> *varl1;
for (int i = 0; i < dim; i++)
{
pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{
cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++)
{
cout << XX[k][j];
if (k < dim - 1)
cout << ",";
else
cout << ") is out of current Patch." << endl;
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
// if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
// {
// cout << "Patch::Interp_Points: point (";
// for (int k = 0; k < dim; k++)
// {
// cout << XX[k][j];
// if (k < dim - 1)
// cout << ",";
// else
// cout << ") is out of current Patch." << endl;
// }
// MPI_Abort(MPI_COMM_WORLD, 1);
// }
}
MyList<Block> *Bp = blb;
@@ -584,21 +592,23 @@ void Patch::Interp_Points(MyList<var> *VarList,
break;
}
}
// printf("flag = %d\n", flag);
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl = VarList;
varl1 = VarList;
int k = 0;
while (varl)
while (varl1)
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl1->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl1->data->SoA, Symmetry);
varl1 = varl1->next;
k++;
// zzz += 1;
}
}
}
@@ -607,7 +617,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
Bp = Bp->next;
}
}
}
// printf("Interpolation done, zzz = %d\n", zzz);
// --- Error check for unfound points ---
for (int j = 0; j < NN; j++)
{
@@ -773,7 +784,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
int myrank, lmyrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_rank(Comm_here, &lmyrank);
int ordn = 2 * ghost_width;
MyList<var> *varl;
int num_var = 0;
@@ -863,7 +873,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
@@ -1095,7 +1105,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
@@ -1197,7 +1207,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int ordn = 2 * ghost_width;
MyList<var> *varl;
int num_var = 0;
@@ -1337,7 +1347,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;

View File

@@ -4,7 +4,7 @@
#include "prolongrestrict.h"
#include "misc.h"
#include "parameters.h"
#include <omp.h>
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
{
nx = Mymax(1, shape / min_width);
@@ -3338,7 +3338,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
{
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// double time1 = omp_get_wtime();
int DIM = dim;
if (dir != PACK && dir != UNPACK)
@@ -3361,7 +3361,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
varls = varls->next;
varld = varld->next;
}
if (varls || varld)
{
cout << "error in short data packer, var lists does not match." << endl;
@@ -3375,7 +3374,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
type = 2;
else
type = 3;
while (src && dst)
{
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
@@ -3385,6 +3383,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
varld = VarListd;
while (varls && varld)
{
if (data)
{
if (dir == PACK)
@@ -3405,6 +3404,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
}
if (dir == UNPACK) // from target data to corresponding grid
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
@@ -3418,8 +3418,14 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
}
dst = dst->next;
src = src->next;
}
}
// double time2 = omp_get_wtime();
// xxx += time2 - time1;
// if(myrank == 0){
// printf("prolong3 time = %lf\n", time2 - time1);
// }
return size_out;
}
int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
@@ -3514,7 +3520,7 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int node;
// double time1 = omp_get_wtime();
MPI_Request *reqs;
MPI_Status *stats;
reqs = new MPI_Request[2 * cpusize];
@@ -3583,7 +3589,9 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
if (rec_data[node])
delete[] rec_data[node];
}
// double time2 = omp_get_wtime();
// if (myrank == 0)
// printf("transfer time = %lf\n", time2 - time1);
delete[] reqs;
delete[] stats;
delete[] send_data;

View File

@@ -40,7 +40,7 @@ using namespace std;
#include "derivatives.h"
#include "ricci_gamma.h"
#include "xh_bssn_rhs_compute.h"
//================================================================================================
// define bssn_class
@@ -2029,6 +2029,7 @@ void bssn_class::Read_Ansorg()
void bssn_class::Evolve(int Steps)
{
clock_t prev_clock, curr_clock;
double prev_time, curr_time;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0;
#if 0
@@ -2141,8 +2142,10 @@ void bssn_class::Evolve(int Steps)
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; }
if (myrank == 0)
if (myrank == 0){
curr_clock = clock();
curr_time = omp_get_wtime();
}
#if (PSTR == 0)
RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
@@ -2198,12 +2201,17 @@ void bssn_class::Evolve(int Steps)
if (myrank == 0)
{
prev_clock = curr_clock;
prev_time = curr_time;
curr_clock = clock();
curr_time = omp_get_wtime();
cout << endl;
// cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
// << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
// << " seconds! " << endl;
// // cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl;
// cout << endl;
<< " Computer used " << (curr_time - prev_time)
<< " seconds! " << endl;
}
if (PhysTime >= TotalTime)
@@ -3092,7 +3100,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -3292,7 +3300,7 @@ void bssn_class::Step(int lev, int YN)
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1;
}
// cout<<"....................................."<<endl;
// rk4 substep and boundary
{
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
@@ -3457,7 +3465,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -3970,7 +3978,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -4312,7 +4320,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -4848,7 +4856,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -5048,7 +5056,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -7343,7 +7351,7 @@ void bssn_class::Constraint_Out()
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -7846,7 +7854,7 @@ void bssn_class::Interp_Constraint(bool infg)
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -8104,7 +8112,7 @@ void bssn_class::Compute_Constraint()
Block *cg = BP->data;
if (myrank == cg->rank)
{
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],

View File

@@ -106,38 +106,6 @@
call getpbh(BHN,Porg,Mass)
#endif
!!! sanity check (disabled in production builds for performance)
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)
if(dX.ne.dX) then
if(sum(chi).ne.sum(chi))write(*,*)"bssn.f90: find NaN in chi"
if(sum(trK).ne.sum(trK))write(*,*)"bssn.f90: find NaN in trk"
if(sum(dxx).ne.sum(dxx))write(*,*)"bssn.f90: find NaN in dxx"
if(sum(gxy).ne.sum(gxy))write(*,*)"bssn.f90: find NaN in gxy"
if(sum(gxz).ne.sum(gxz))write(*,*)"bssn.f90: find NaN in gxz"
if(sum(dyy).ne.sum(dyy))write(*,*)"bssn.f90: find NaN in dyy"
if(sum(gyz).ne.sum(gyz))write(*,*)"bssn.f90: find NaN in gyz"
if(sum(dzz).ne.sum(dzz))write(*,*)"bssn.f90: find NaN in dzz"
if(sum(Axx).ne.sum(Axx))write(*,*)"bssn.f90: find NaN in Axx"
if(sum(Axy).ne.sum(Axy))write(*,*)"bssn.f90: find NaN in Axy"
if(sum(Axz).ne.sum(Axz))write(*,*)"bssn.f90: find NaN in Axz"
if(sum(Ayy).ne.sum(Ayy))write(*,*)"bssn.f90: find NaN in Ayy"
if(sum(Ayz).ne.sum(Ayz))write(*,*)"bssn.f90: find NaN in Ayz"
if(sum(Azz).ne.sum(Azz))write(*,*)"bssn.f90: find NaN in Azz"
if(sum(Gamx).ne.sum(Gamx))write(*,*)"bssn.f90: find NaN in Gamx"
if(sum(Gamy).ne.sum(Gamy))write(*,*)"bssn.f90: find NaN in Gamy"
if(sum(Gamz).ne.sum(Gamz))write(*,*)"bssn.f90: find NaN in Gamz"
if(sum(Lap).ne.sum(Lap))write(*,*)"bssn.f90: find NaN in Lap"
if(sum(betax).ne.sum(betax))write(*,*)"bssn.f90: find NaN in betax"
if(sum(betay).ne.sum(betay))write(*,*)"bssn.f90: find NaN in betay"
if(sum(betaz).ne.sum(betaz))write(*,*)"bssn.f90: find NaN in betaz"
gont = 1
return
endif
#endif
PI = dacos(-ONE)
@@ -634,7 +602,7 @@
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
! now get physical second kind of connection
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF

View File

@@ -0,0 +1,26 @@
#include "xh_macrodef.h"
#include "xh_tool.h"
int f_compute_rhs_bssn(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co
);

View File

@@ -0,0 +1,66 @@
/* tetrad notes
v:r; u: phi; w: theta
tetradtype 0
v^a = (x,y,z)
orthonormal order: v,u,w
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
tetradtype 1
orthonormal order: w,u,v
m = (theta + i phi)/sqrt(2) following Sperhake, Eq.(3.2) of PRD 85, 124062(2012)
tetradtype 2
v_a = (x,y,z)
orthonormal order: v,u,w
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
*/
#define tetradtype 2
/* Cell center or Vertex center */
#define Cell
/* ghost_width meaning:
2nd order: 2
4th order: 3
6th order: 4
8th order: 5
*/
#define ghost_width 3
/* use shell or not */
#define WithShell
/* use constraint preserving boundary condition or not
only affect Z4c
*/
#define CPBC
/* Gauge condition type
0: B^i gauge
1: David's puncture gauge
2: MB B^i gauge
3: RIT B^i gauge
4: MB beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
5: RIT beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
6: MGB1 B^i gauge
7: MGB2 B^i gauge
*/
#define GAUGE 2
/* buffer points for CPBC boundary */
#define CPBC_ghost_width (ghost_width)
/* using BSSN variable for constraint violation and psi4 calculation: 0
using ADM variable for constraint violation and psi4 calculation: 1
*/
#define ABV 0
/* Type of Potential and Scalar Distribution in F(R) Scalar-Tensor Theory
1: Case C of 1112.3928, V=0
2: shell with a2^2*phi0/(1+a2^2), f(R) = R+a2*R^2 induced V
3: ground state of Schrodinger-Newton system, f(R) = R+a2*R^2 induced V
4: a2 = infinity and phi(r) = phi0 * 0.5 * ( tanh((r+r0)/sigma) - tanh((r-r0)/sigma) )
5: shell with phi(r) = phi0*Exp(-(r-r0)**2/sigma), V = 0
*/
#define EScalar_CC 2

View File

@@ -0,0 +1,338 @@
#ifndef SHARE_FUNC_H
#define SHARE_FUNC_H
#include <stdlib.h>
#include <stddef.h>
#include <math.h>
#include <stdio.h>
#include <omp.h>
/* 主网格0-based -> 1D */
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
const int ex1 = ex[0], ex2 = ex[1];
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
}
/*
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
* ord=2 => shift=1
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex
*/
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
const int shift = 1;
const int nx = ex[0] + 2; // ex1 + ord
const int ny = ex[1] + 2;
const int ii = iF + shift; // 0..ex1+1
const int jj = jF + shift; // 0..ex2+1
const int kk = kF + shift; // 0..ex3+1
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
* ord=3 => shift=2
* iF/jF/kF 是 Fortran 索引(可为负)
*/
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
const int shift = 2; // ord=3 -> -2..ex
const int nx = ex[0] + 3; // ex1 + ord
const int ny = ex[1] + 3;
const int ii = iF + shift; // 0..ex1+2
const int jj = jF + shift; // 0..ex2+2
const int kk = kF + shift; // 0..ex3+2
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
*
* C 里我们把:
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
* funcc 用“平移下标”存为一维数组:
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
* 总长度 nx = extc1 + ord
* 同理 ny = extc2 + ord, nz = extc3 + ord
*/
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
const int nx = extc[0], ny = extc[1];
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
}
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
const int shift = ord - 1; // iF = -shift .. extc1
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
const int ny = extc[1] + ord;
const int ii = iF + shift; // 0..extc1+shift
const int jj = jF + shift; // 0..extc2+shift
const int kk = kF + shift; // 0..extc3+shift
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* 等价于 Fortran:
* funcc(1:extc1,1:extc2,1:extc3)=func
* do i=0,ord-1
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
* enddo
* do i=0,ord-1
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
* enddo
* do i=0,ord-1
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
* enddo
*/
static inline void symmetry_bd(int ord,
const int extc[3],
const double *func,
double *funcc,
const double SoA[3])
{
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
for (int k0 = 0; k0 < extc3; ++k0) {
for (int j0 = 0; j0 < extc2; ++j0) {
for (int i0 = 0; i0 < extc1; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= ord - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= extc3; ++kF) {
for (int jF = 1; jF <= extc2; ++jF) {
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= ord - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= extc3; ++kF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= ord - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -ord + 1; jF <= extc2; ++jF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
}
}
}
}
#endif
/* 你已有的函数idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
static inline void fdderivs_xh(
int i0, int j0, int k0,
const int ex[3],
const double *fh,
int iminF, int jminF, int kminF,
int imaxF, int jmaxF, int kmaxF,
double Fdxdx, double Fdydy, double Fdzdz,
double Fdxdy, double Fdxdz, double Fdydz,
double Sdxdx, double Sdydy, double Sdzdz,
double Sdxdy, double Sdxdz, double Sdydz,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz
){
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double TWO = 2.0;
const int iF = i0 + 1;
const int jF = j0 + 1;
const int kF = k0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}
else {
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
}
}

View File

@@ -0,0 +1,27 @@
#include "xh_share_func.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps);
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3]);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
#include "../include/tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,7 @@
#include "include/bssn_rhs_compute.h"
int main() {
// 这里可以写一些测试代码,调用 f_compute_rhs_bssn 来验证它的正确性
// 例如,定义一些小的网格和初始条件,调用函数,并检查输出是否合理。
return 0;
}

View File

@@ -0,0 +1,65 @@
SoA[0] = SYM, SoA[1] = SYM, SoA[2] = SYM;
#pragma omp for collapse(3)
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = Lap[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
#pragma omp for collapse(3)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
#pragma omp for collapse(3)
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
#pragma omp for collapse(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
#pragma omp for collapse(3)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
fdderivs_xh(i0, j0, k0, ex, fh, iminF, jminF, kminF, ex1, ex2, ex3,
Fdxdx, Fdydy, Fdzdz, Fdxdy, Fdxdz, Fdydz,
Sdxdx, Sdydy, Sdzdz, Sdxdy, Sdxdz, Sdydz,
fxx,fxy,fxz,fyy,fyz,fzz
);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
#include "xh_tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,145 @@
#include "xh_tool.h"
/*
* C 版 fderivs
*
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/*
* Fortran loops:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
(kF + 2) <= ex3 && (kF - 2) >= kminF)
{
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
// elseif(i+1 <= imax .and. i-1 >= imin ...)
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
(kF + 1) <= ex3 && (kF - 1) >= kminF)
{
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,116 @@
#include "xh_tool.h"
/*
* C 版 kodis
*
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps)
{
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return;
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
for (int k0 = 0; k0 < ex3; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 < ex2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 < ex1; ++i0) {
const int iF = i0 + 1;
// Fortran if 条件:
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
{
const size_t p = idx_ex(i0, j0, k0, ex);
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,262 @@
#include "xh_tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// free(fh);
}

View File

@@ -8,7 +8,7 @@ include makefile.inc
$(f90) $(f90appflags) -c $< -o $@
.C.o:
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@
.for.o:
$(f77) -c $< -o $@
@@ -28,7 +28,8 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
NullShellPatch2_Evo.o writefile_f.o
NullShellPatch2_Evo.o writefile_f.o xh_bssn_rhs.o xh_fdderivs.o xh_fderivs.o xh_kodiss.o xh_lopsided.o \
xh_global_interp.o xh_polint3.o
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o surface_integral.o ShellPatch.o\
@@ -72,7 +73,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
initial_null2.h NullShellPatch2.h
initial_null2.h NullShellPatch2.h xh_bssn_rhs_compute.h xh_global_interp.h
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
@@ -96,7 +97,7 @@ misc.o : zbesh.o
# projects
ABE: $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)

View File

@@ -1,32 +1,32 @@
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## Intel oneAPI version with oneMKL (Optimized for performance)
filein = -I/usr/include/ -I${MKLROOT}/include
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
## -fprofile-instr-use: use collected profile data to guide optimization decisions
## (branch prediction, basic block layout, inlining, loop unrolling)
PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
-align array64byte -fpp -I${MKLROOT}/include
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## Intel oneAPI version with oneMKL (Optimized for performance)
filein = -I/usr/include/ -I${MKLROOT}/include
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
## -fprofile-instr-use: use collected profile data to guide optimization decisions
## (branch prediction, basic block layout, inlining, loop unrolling)
PROFDATA = /home/hxh/AMSS-NCKU/pgo_profile/default.profdata
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
-Dfortran3 -Dnewc -I${MKLROOT}/include
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-fprofile-instr-use=$(PROFDATA) \
-align array64byte -fpp -I${MKLROOT}/include
f90 = ifx
f77 = ifx
CXX = icpx
CC = icx
CLINKER = mpiicpx
Cu = nvcc
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc

View File

@@ -2653,6 +2653,7 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
// we have assumed there is only one box on this level,
// so we do not need loop boxes
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here);
double Mass_out = 0;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
#include "xh_tool.h"
extern "C"
{
int f_compute_rhs_bssn_xh(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co
);
}

View File

@@ -0,0 +1,311 @@
#include "xh_tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,145 @@
#include "xh_tool.h"
/*
* C 版 fderivs
*
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/*
* Fortran loops:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
(kF + 2) <= ex3 && (kF - 2) >= kminF)
{
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
// elseif(i+1 <= imax .and. i-1 >= imin ...)
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
(kF + 1) <= ex3 && (kF - 1) >= kminF)
{
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,143 @@
#include "xh_global_interp.h"
/* 你已有的 polin3由前面 Fortran->C 翻译得到) */
// void polin3(const double *x1a, const double *x2a, const double *x3a,
// const double *ya, double x1, double x2, double x3,
// double *y, double *dy, int ordn);
/*
你需要提供 decide3d 的实现(这里仅声明)。
Fortran: decide3d(ex,f,f,cxB,cxT,SoA,ya,ORDN,Symmetry)
- ex: [3]
- f: 三维场(列主序)
- cxB/cxT: 3 维窗口起止Fortran 1-based且可能 <=0
- SoA: [3]
- ya: 输出 ORDN^3 的采样块(列主序)
- return: 0 表示正常;非 0 表示错误(对应 Fortran logical = .true.
*/
// int xh_decide3d(const int ex[3],
// const double *f_in,
// const double *f_in2, /* Fortran 里传了 f,f按原样保留 */
// const int cxB[3],
// const int cxT[3],
// const double SoA[3],
// double *ya,
// int ordn,
// int symmetry);
/* 把 Fortran 1-based 下标 idxF (可为负/0) 映射到 C 的 X[idx] 访问(只用于 X(2-cxB) 这种表达式) */
static inline double X_at_FortranIndex(const double *X, int idxF) {
/* Fortran: X(1) 对应 C: X[0] */
return X[idxF - 1];
}
/* Fortran 整数截断idint 在这里可用 (int) 实现(对正数等价于 floor */
static inline int idint_like(double a) {
return (int)a; /* trunc toward zero */
}
/* global_interp 的 C 版 */
void xh_global_interp(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, /* f(ex1,ex2,ex3) column-major */
double &f_int,
double x1, double y1, double z1,
int ORDN,
const double SoA[3],
int symmetry)
{
// double time1, time2;
// time1 = omp_get_wtime();
enum { NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2 };
int j, m;
int imin, jmin, kmin;
int cxB[3], cxT[3], cxI[3], cmin[3], cmax[3];
double cx[3];
double dX, dY, dZ, ddy;
/* Fortran: imin=lbound(f,1) ... 通常是 1这里按 1 处理 */
imin = 1; jmin = 1; kmin = 1;
dX = X_at_FortranIndex(X, imin + 1) - X_at_FortranIndex(X, imin);
dY = X_at_FortranIndex(Y, jmin + 1) - X_at_FortranIndex(Y, jmin);
dZ = X_at_FortranIndex(Z, kmin + 1) - X_at_FortranIndex(Z, kmin);
/* x1a(j) = (j-1)*1.0 (j=1..ORDN) */
double *x1a = (double*)malloc((size_t)ORDN * sizeof(double));
double *ya = (double*)malloc((size_t)ORDN * (size_t)ORDN * (size_t)ORDN * sizeof(double));
if (!x1a || !ya) {
fprintf(stderr, "global_interp: malloc failed\n");
exit(1);
}
for (j = 0; j < ORDN; j++) x1a[j] = (double)j;
/* cxI(m) = idint((p - P(1))/dP + 0.4) + 1 (Fortran 1-based) */
cxI[0] = idint_like((x1 - X_at_FortranIndex(X, 1)) / dX + 0.4) + 1;
cxI[1] = idint_like((y1 - X_at_FortranIndex(Y, 1)) / dY + 0.4) + 1;
cxI[2] = idint_like((z1 - X_at_FortranIndex(Z, 1)) / dZ + 0.4) + 1;
/* cxB = cxI - ORDN/2 + 1 ; cxT = cxB + ORDN - 1 */
int half = ORDN / 2; /* Fortran 整数除法 */
for (m = 0; m < 3; m++) {
cxB[m] = cxI[m] - half + 1;
cxT[m] = cxB[m] + ORDN - 1;
}
/* cmin=1; cmax=ex */
cmin[0] = cmin[1] = cmin[2] = 1;
cmax[0] = ex[0];
cmax[1] = ex[1];
cmax[2] = ex[2];
/* 对称边界时允许 cxB 为负/0与 Fortran 一致) */
if (symmetry == OCTANT && fabs(X_at_FortranIndex(X, 1)) < dX) cmin[0] = -half + 2;
if (symmetry == OCTANT && fabs(X_at_FortranIndex(Y, 1)) < dY) cmin[1] = -half + 2;
if (symmetry != NO_SYMM && fabs(X_at_FortranIndex(Z, 1)) < dZ) cmin[2] = -half + 2;
/* 夹紧窗口 [cxB,cxT] 到 [cmin,cmax] */
for (m = 0; m < 3; m++) {
if (cxB[m] < cmin[m]) {
cxB[m] = cmin[m];
cxT[m] = cxB[m] + ORDN - 1;
}
if (cxT[m] > cmax[m]) {
cxT[m] = cmax[m];
cxB[m] = cxT[m] + 1 - ORDN;
}
}
/*
cx(m) 的计算:如果 cxB>0:
cx = (p - P(cxB))/dP
else:
cx = (p + P(2 - cxB))/dP
注意这里的 cxB 是 Fortran 1-based 语义下的整数,可能 <=0。
*/
if (cxB[0] > 0) cx[0] = (x1 - X_at_FortranIndex(X, cxB[0])) / dX;
else cx[0] = (x1 + X_at_FortranIndex(X, 2 - cxB[0])) / dX;
if (cxB[1] > 0) cx[1] = (y1 - X_at_FortranIndex(Y, cxB[1])) / dY;
else cx[1] = (y1 + X_at_FortranIndex(Y, 2 - cxB[1])) / dY;
if (cxB[2] > 0) cx[2] = (z1 - X_at_FortranIndex(Z, cxB[2])) / dZ;
else cx[2] = (z1 + X_at_FortranIndex(Z, 2 - cxB[2])) / dZ;
/* decide3d: 填充 ya(1:ORDN,1:ORDN,1:ORDN) */
if (xh_decide3d(ex, f, f, cxB, cxT, SoA, ya, ORDN, symmetry)) {
printf("global_interp position: %g %g %g\n", x1, y1, z1);
printf("data range: %g %g %g %g %g %g\n",
X_at_FortranIndex(X, 1), X_at_FortranIndex(X, ex[0]),
X_at_FortranIndex(Y, 1), X_at_FortranIndex(Y, ex[1]),
X_at_FortranIndex(Z, 1), X_at_FortranIndex(Z, ex[2]));
exit(1);
}
/* polin3(x1a,x1a,x1a,ya,cx(1),cx(2),cx(3),f_int,ddy,ORDN) */
xh_polin3(x1a, x1a, x1a, ya, cx[0], cx[1], cx[2], f_int, &ddy, ORDN);
free(x1a);
free(ya);
// time2 = omp_get_wtime();
// printf("Time for global_interp: %lf seconds\n", time2 - time1);
}

View File

@@ -0,0 +1,12 @@
#include "xh_po.h"
extern "C"{
void xh_global_interp(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, /* f(ex1,ex2,ex3) column-major */
double &f_int,
double x1, double y1, double z1,
int ORDN,
const double SoA[3],
int symmetry);
}

View File

@@ -0,0 +1,116 @@
#include "xh_tool.h"
/*
* C 版 kodis
*
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps)
{
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return;
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
for (int k0 = 0; k0 < ex3; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 < ex2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 < ex1; ++i0) {
const int iF = i0 + 1;
// Fortran if 条件:
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
{
const size_t p = idx_ex(i0, j0, k0, ex);
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,262 @@
#include "xh_tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// free(fh);
}

19
AMSS_NCKU_source/xh_po.h Normal file
View File

@@ -0,0 +1,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
int xh_decide3d(const int ex[3],
const double *f,
const double *fpi, /* 这里未用Fortran 也没用到 */
const int cxB[3],
const int cxT[3],
const double SoA[3],
double *ya,
int ordn,
int Symmetry);
void xh_polint(const double *xa, const double *ya, double x,
double *y, double *dy, int ordn);
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
const double *ya, double x1, double x2, double x3,
double &y, double *dy, int ordn);

View File

@@ -0,0 +1,258 @@
#include "xh_po.h"
/*
ex[0..2] == Fortran ex(1:3)
cxB/cxT == Fortran cxB(1:3), cxT(1:3) (可能 <=0)
SoA[0..2] == Fortran SoA(1:3)
f, fpi == Fortran f(ex1,ex2,ex3) column-major (1-based in formulas)
ya == 连续内存,尺寸为 ORDN^3对应 Fortran ya(cxB1:cxT1, cxB2:cxT2, cxB3:cxT3)
但注意:我们用 offset 映射把 Fortran 的 i/j/k 坐标写进去。
*/
static inline int imax(int a, int b) { return a > b ? a : b; }
static inline int imin(int a, int b) { return a < b ? a : b; }
/* f(i,j,k): Fortran column-major, i/j/k are Fortran 1-based in [1..ex] */
#define F(i,j,k) f[((i)-1) + ex1 * (((j)-1) + ex2 * ((k)-1))]
/*
ya(i,j,k): i in [cxB1..cxT1], j in [cxB2..cxT2], k in [cxB3..cxT3]
我们把它映射到 C 的 0..ORDN-1 立方体:
ii = i - cxB1
jj = j - cxB2
kk = k - cxB3
并按 column-major 存储(与 Fortran 一致,方便直接喂给你的 polin3
*/
#define YA(i,j,k) ya[((i)-cxB1) + ordn * (((j)-cxB2) + ordn * ((k)-cxB3))]
int xh_decide3d(const int ex[3],
const double *f,
const double *fpi, /* 这里未用Fortran 也没用到 */
const int cxB[3],
const int cxT[3],
const double SoA[3],
double *ya,
int ordn,
int Symmetry) /* Symmetry 在 decide3d 里也没直接用 */
{
(void)fpi;
(void)Symmetry;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
int fmin1[3], fmin2[3], fmax1[3], fmax2[3];
int i, j, k, m;
int gont = 0;
/* 方便 YA 宏使用 */
const int cxB1 = cxB[0], cxB2 = cxB[1], cxB3 = cxB[2];
for (m = 0; m < 3; m++) {
/* Fortran 的 “NaN 检查” 在整数上基本无意义,这里不额外处理 */
fmin1[m] = imax(1, cxB[m]);
fmax1[m] = cxT[m];
fmin2[m] = cxB[m];
fmax2[m] = imin(0, cxT[m]);
/* if((fmin1<=fmax1) and (fmin1<1 or fmax1>ex)) gont=true */
if ((fmin1[m] <= fmax1[m]) && (fmin1[m] < 1 || fmax1[m] > ex[m])) gont = 1;
/* if((fmin2<=fmax2) and (2-fmax2<1 or 2-fmin2>ex)) gont=true */
if ((fmin2[m] <= fmax2[m]) && (2 - fmax2[m] < 1 || 2 - fmin2[m] > ex[m])) gont = 1;
}
if (gont) {
printf("error in decide3d\n");
printf("cxB: %d %d %d cxT: %d %d %d ex: %d %d %d\n",
cxB[0], cxB[1], cxB[2], cxT[0], cxT[1], cxT[2], ex[0], ex[1], ex[2]);
printf("fmin1: %d %d %d fmax1: %d %d %d\n",
fmin1[0], fmin1[1], fmin1[2], fmax1[0], fmax1[1], fmax1[2]);
printf("fmin2: %d %d %d fmax2: %d %d %d\n",
fmin2[0], fmin2[1], fmin2[2], fmax2[0], fmax2[1], fmax2[2]);
return 1;
}
/* ---- 填充 ya完全照 Fortran 两大块循环写 ---- */
/* k in [fmin1(3)..fmax1(3)] */
for (k = fmin1[2]; k <= fmax1[2]; k++) {
/* j in [fmin1(2)..fmax1(2)] */
for (j = fmin1[1]; j <= fmax1[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,j,k) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, j, k);
}
/* i in [fmin2(1)..fmax2(1)] : ya(i,j,k)=f(2-i,j,k)*SoA(1) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, j, k) * SoA[0];
}
}
/* j in [fmin2(2)..fmax2(2)] */
for (j = fmin2[1]; j <= fmax2[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,2-j,k)*SoA(2) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, 2 - j, k) * SoA[1];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,k)*SoA(1)*SoA(2) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, 2 - j, k) * SoA[0] * SoA[1];
}
}
}
/* k in [fmin2(3)..fmax2(3)] */
for (k = fmin2[2]; k <= fmax2[2]; k++) {
/* j in [fmin1(2)..fmax1(2)] */
for (j = fmin1[1]; j <= fmax1[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,j,2-k)*SoA(3) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, j, 2 - k) * SoA[2];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,j,2-k)*SoA(1)*SoA(3) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, j, 2 - k) * SoA[0] * SoA[2];
}
}
/* j in [fmin2(2)..fmax2(2)] */
for (j = fmin2[1]; j <= fmax2[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,2-j,2-k)*SoA(2)*SoA(3) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, 2 - j, 2 - k) * SoA[1] * SoA[2];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,2-k)*SoA1*SoA2*SoA3 */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, 2 - j, 2 - k) * SoA[0] * SoA[1] * SoA[2];
}
}
}
return 0;
}
#undef F
#undef YA
void xh_polint(const double *xa, const double *ya, double x,
double *y, double *dy, int ordn)
{
int i, m, ns, n_m;
double dif, dift, hp, h, den_val;
double *c = (double*)malloc((size_t)ordn * sizeof(double));
double *d = (double*)malloc((size_t)ordn * sizeof(double));
double *ho = (double*)malloc((size_t)ordn * sizeof(double));
if (!c || !d || !ho) {
fprintf(stderr, "polint: malloc failed\n");
exit(1);
}
for (i = 0; i < ordn; i++) {
c[i] = ya[i];
d[i] = ya[i];
ho[i] = xa[i] - x;
}
ns = 0; // Fortran ns=1 -> C ns=0
dif = fabs(x - xa[0]);
for (i = 1; i < ordn; i++) {
dift = fabs(x - xa[i]);
if (dift < dif) {
ns = i;
dif = dift;
}
}
*y = ya[ns];
ns -= 1; // Fortran ns=ns-1
for (m = 1; m <= ordn - 1; m++) {
n_m = ordn - m; // number of active points this round
for (i = 0; i < n_m; i++) {
hp = ho[i];
h = ho[i + m];
den_val = hp - h;
if (den_val == 0.0) {
fprintf(stderr, "failure in polint for point %g\n", x);
fprintf(stderr, "with input points xa: ");
for (int t = 0; t < ordn; t++) fprintf(stderr, "%g ", xa[t]);
fprintf(stderr, "\n");
exit(1);
}
den_val = (c[i + 1] - d[i]) / den_val;
d[i] = h * den_val;
c[i] = hp * den_val;
}
// Fortran: if (2*ns < n_m) then dy=c(ns+1) else dy=d(ns); ns=ns-1
// Here ns is C-indexed and can be -1; logic still matches.
if (2 * ns < n_m) {
*dy = c[ns + 1];
} else {
*dy = d[ns];
ns -= 1;
}
*y += *dy;
}
free(c);
free(d);
free(ho);
}
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
const double *ya, double x1, double x2, double x3,
double &y, double *dy, int ordn)
{
// ya is ordn x ordn x ordn in Fortran layout (column-major)
#define YA3(i,j,k) ya[(i) + ordn*((j) + ordn*(k))] // i,j,k: 0..ordn-1
int j, k;
double dy_temp;
// yatmp(j,k) in Fortran code is ordn x ordn, treat column-major:
// yatmp(j,k) -> yatmp[j + ordn*k]
double *yatmp = (double*)malloc((size_t)ordn * (size_t)ordn * sizeof(double));
double *ymtmp = (double*)malloc((size_t)ordn * sizeof(double));
if (!yatmp || !ymtmp) {
fprintf(stderr, "polin3: malloc failed\n");
exit(1);
}
#define YAT(j,k) yatmp[(j) + ordn*(k)]
for (k = 0; k < ordn; k++) {
for (j = 0; j < ordn; j++) {
// call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp)
// ya(:,j,k) contiguous: base is &YA3(0,j,k)
xh_polint(x1a, &YA3(0, j, k), x1, &YAT(j, k), &dy_temp, ordn);
}
}
for (k = 0; k < ordn; k++) {
// call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp)
xh_polint(x2a, &YAT(0, k), x2, &ymtmp[k], &dy_temp, ordn);
}
xh_polint(x3a, ymtmp, x3, &y, dy, ordn);
#undef YAT
free(yatmp);
free(ymtmp);
#undef YA3
}

View File

@@ -0,0 +1,338 @@
#ifndef SHARE_FUNC_H
#define SHARE_FUNC_H
#include <stdlib.h>
#include <stddef.h>
#include <math.h>
#include <stdio.h>
#include <omp.h>
/* 主网格0-based -> 1D */
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
const int ex1 = ex[0], ex2 = ex[1];
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
}
/*
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
* ord=2 => shift=1
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex
*/
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
const int shift = 1;
const int nx = ex[0] + 2; // ex1 + ord
const int ny = ex[1] + 2;
const int ii = iF + shift; // 0..ex1+1
const int jj = jF + shift; // 0..ex2+1
const int kk = kF + shift; // 0..ex3+1
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
* ord=3 => shift=2
* iF/jF/kF 是 Fortran 索引(可为负)
*/
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
const int shift = 2; // ord=3 -> -2..ex
const int nx = ex[0] + 3; // ex1 + ord
const int ny = ex[1] + 3;
const int ii = iF + shift; // 0..ex1+2
const int jj = jF + shift; // 0..ex2+2
const int kk = kF + shift; // 0..ex3+2
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
*
* C 里我们把:
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
* funcc 用“平移下标”存为一维数组:
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
* 总长度 nx = extc1 + ord
* 同理 ny = extc2 + ord, nz = extc3 + ord
*/
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
const int nx = extc[0], ny = extc[1];
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
}
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
const int shift = ord - 1; // iF = -shift .. extc1
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
const int ny = extc[1] + ord;
const int ii = iF + shift; // 0..extc1+shift
const int jj = jF + shift; // 0..extc2+shift
const int kk = kF + shift; // 0..extc3+shift
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* 等价于 Fortran:
* funcc(1:extc1,1:extc2,1:extc3)=func
* do i=0,ord-1
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
* enddo
* do i=0,ord-1
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
* enddo
* do i=0,ord-1
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
* enddo
*/
static inline void symmetry_bd(int ord,
const int extc[3],
const double *func,
double *funcc,
const double SoA[3])
{
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
for (int k0 = 0; k0 < extc3; ++k0) {
for (int j0 = 0; j0 < extc2; ++j0) {
for (int i0 = 0; i0 < extc1; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= ord - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= extc3; ++kF) {
for (int jF = 1; jF <= extc2; ++jF) {
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= ord - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= extc3; ++kF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= ord - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -ord + 1; jF <= extc2; ++jF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
}
}
}
}
#endif
/* 你已有的函数idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
static inline void fdderivs_xh(
int i0, int j0, int k0,
const int ex[3],
const double *fh,
int iminF, int jminF, int kminF,
int imaxF, int jmaxF, int kmaxF,
double Fdxdx, double Fdydy, double Fdzdz,
double Fdxdy, double Fdxdz, double Fdydz,
double Sdxdx, double Sdydy, double Sdzdz,
double Sdxdy, double Sdxdz, double Sdydz,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz
){
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double TWO = 2.0;
const int iF = i0 + 1;
const int jF = j0 + 1;
const int kF = k0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}
else {
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
}
}

View File

@@ -0,0 +1,27 @@
#include "xh_share_func.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps);
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3]);

File diff suppressed because it is too large Load Diff

View File

@@ -1,195 +1,195 @@
##################################################################
##
## Generate input file for the AMSS-NCKU TwoPuncture routine
## Author: Xiaoqu
## 2024/11/27
## Modified: 2025/01/21
##
##################################################################
import numpy
import os
import AMSS_NCKU_Input as input_data ## import program input file
import math
##################################################################
## Import binary black hole coordinates
## If puncture data are set to "Automatically-BBH", compute initial orbital
## positions and momenta according to the settings and rescale the total
## binary mass to M = 1 for TwoPuncture input.
if (input_data.puncture_data_set == "Automatically-BBH" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
## Load binary separation and eccentricity
distance = input_data.Distance
e0 = input_data.e0
## Set binary component coordinates
## Note: place the larger-mass black hole at positive y and the
## smaller-mass black hole at negative y to follow Brugmann's convention
## Coordinate convention for TwoPuncture input (Brugmann):
## -----0-----> y
## - +
BBH_X1 = 0.0
BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q )
BBH_Z1 = 0.0
BBH_X2 = 0.0
BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q )
BBH_Z2 = 0.0
position_BH = numpy.zeros( (2,3) )
position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1]
position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2]
## Optionally load momentum from parameter file
## momentum_BH = input_data.momentum_BH
## Compute orbital momenta using the BBH_orbit_parameter module
import BBH_orbit_parameter
## Use the dimensionless spins defined in BBH_orbit_parameter
BBH_S1 = BBH_orbit_parameter.S1
BBH_S2 = BBH_orbit_parameter.S2
momentum_BH = numpy.zeros( (2,3) )
## Compute initial orbital momenta from post-Newtonian-based routine
momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 )
## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ]
elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ]
else:
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
else:
angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
#######################################################
## If puncture data are set to "Manually", read initial positions and momenta
## directly from the parameter file. Rescale the total binary mass to M=1
## for TwoPuncture input.
elif (input_data.puncture_data_set == "Manually" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
parameter_BH = input_data.parameter_BH
position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH
## Compute binary separation and load eccentricity
distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 )
e0 = input_data.e0
## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ]
elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ]
else:
angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1:
angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i]
else:
angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
##################################################################
## Write the above binary data into the AMSS-NCKU TwoPuncture input file
def generate_AMSSNCKU_TwoPuncture_input():
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w")
print( "# -----0-----> y", file=file1 )
print( "# - + use Brugmann's convention", file=file1 )
print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically
print( "ABE::mm = -1.0", file=file1 )
print( "# b = D/2", file=file1 )
print( "ABE::b = ", ( distance / 2.0 ), file=file1 )
print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 )
print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 )
print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 )
print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 )
print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 )
print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 )
print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 )
print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 )
print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 )
print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 )
print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 )
print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 )
print( "ABE::Mp = ", BBH_M1, file=file1 )
print( "ABE::Mm = ", BBH_M2, file=file1 )
print( "ABE::admtol = 1.e-8", file=file1 )
print( "ABE::Newtontol = 5.e-12", file=file1 )
print( "ABE::nA = 50", file=file1 )
print( "ABE::nB = 50", file=file1 )
print( "ABE::nphi = 26", file=file1 )
print( "ABE::Newtonmaxit = 50", file=file1 )
file1.close()
return file1
##################################################################
##################################################################
##
## Generate input file for the AMSS-NCKU TwoPuncture routine
## Author: Xiaoqu
## 2024/11/27
## Modified: 2025/01/21
##
##################################################################
import numpy
import os
import AMSS_NCKU_Input as input_data ## import program input file
import math
##################################################################
## Import binary black hole coordinates
## If puncture data are set to "Automatically-BBH", compute initial orbital
## positions and momenta according to the settings and rescale the total
## binary mass to M = 1 for TwoPuncture input.
if (input_data.puncture_data_set == "Automatically-BBH" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
## Load binary separation and eccentricity
distance = input_data.Distance
e0 = input_data.e0
## Set binary component coordinates
## Note: place the larger-mass black hole at positive y and the
## smaller-mass black hole at negative y to follow Brugmann's convention
## Coordinate convention for TwoPuncture input (Brugmann):
## -----0-----> y
## - +
BBH_X1 = 0.0
BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q )
BBH_Z1 = 0.0
BBH_X2 = 0.0
BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q )
BBH_Z2 = 0.0
position_BH = numpy.zeros( (2,3) )
position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1]
position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2]
## Optionally load momentum from parameter file
## momentum_BH = input_data.momentum_BH
## Compute orbital momenta using the BBH_orbit_parameter module
import BBH_orbit_parameter
## Use the dimensionless spins defined in BBH_orbit_parameter
BBH_S1 = BBH_orbit_parameter.S1
BBH_S2 = BBH_orbit_parameter.S2
momentum_BH = numpy.zeros( (2,3) )
## Compute initial orbital momenta from post-Newtonian-based routine
momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 )
## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ]
elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ]
else:
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
else:
angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
#######################################################
## If puncture data are set to "Manually", read initial positions and momenta
## directly from the parameter file. Rescale the total binary mass to M=1
## for TwoPuncture input.
elif (input_data.puncture_data_set == "Manually" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
parameter_BH = input_data.parameter_BH
position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH
## Compute binary separation and load eccentricity
distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 )
e0 = input_data.e0
## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ]
elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ]
else:
angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1:
angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i]
else:
angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
##################################################################
## Write the above binary data into the AMSS-NCKU TwoPuncture input file
def generate_AMSSNCKU_TwoPuncture_input():
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w")
print( "# -----0-----> y", file=file1 )
print( "# - + use Brugmann's convention", file=file1 )
print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically
print( "ABE::mm = -1.0", file=file1 )
print( "# b = D/2", file=file1 )
print( "ABE::b = ", ( distance / 2.0 ), file=file1 )
print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 )
print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 )
print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 )
print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 )
print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 )
print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 )
print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 )
print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 )
print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 )
print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 )
print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 )
print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 )
print( "ABE::Mp = ", BBH_M1, file=file1 )
print( "ABE::Mm = ", BBH_M2, file=file1 )
print( "ABE::admtol = 1.e-8", file=file1 )
print( "ABE::Newtontol = 5.e-12", file=file1 )
print( "ABE::nA = 50", file=file1 )
print( "ABE::nB = 50", file=file1 )
print( "ABE::nphi = 26", file=file1 )
print( "ABE::Newtonmaxit = 50", file=file1 )
file1.close()
return file1
##################################################################

File diff suppressed because it is too large Load Diff

View File

@@ -1,191 +1,192 @@
##################################################################
##
## This file defines the commands used to build and run AMSS-NCKU
## Author: Xiaoqu
## 2025/01/24
##
##################################################################
import AMSS_NCKU_Input as input_data
import subprocess
import time
## CPU core binding configuration using taskset
## taskset ensures all child processes inherit the CPU affinity mask
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
#NUMACTL_CPU_BIND = "taskset -c 0-111"
NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 96
##################################################################
##################################################################
## Compile the AMSS-NCKU main program ABE
def makefile_ABE():
print( )
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
## Build command with CPU binding to nohz_full cores
if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
elif (input_data.GPU_Calculation == "yes"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
else:
print( " CPU/GPU numerical calculation setting is wrong " )
print( )
## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Read and print output lines as they arrive
for line in makefile_process.stdout:
print(line, end='') # stream output in real time
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0:
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
print( )
print( " Compilation of the AMSS-NCKU executable file ABE is finished " )
print( )
return
##################################################################
##################################################################
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
def makefile_TwoPunctureABE():
print( )
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Build command with CPU binding to nohz_full cores
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Read and print output lines as they arrive
for line in makefile_process.stdout:
print(line, end='') # stream output in real time
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0:
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
print( )
print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " )
print( )
return
##################################################################
##################################################################
## Run the AMSS-NCKU main program ABE
def run_ABE():
print( )
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
for line in mpi_process.stdout:
print(line, end='') # stream output in real time
file0.write(line) # write the line to file
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Wait for the process to finish
mpi_return_code = mpi_process.wait()
print( )
print( " The ABE/ABEGPU simulation is finished " )
print( )
return
##################################################################
##################################################################
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
def run_TwoPunctureABE():
tp_time1=time.time()
print( )
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Define the command to run
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
TwoPuncture_command = " ./TwoPunctureABE"
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
for line in TwoPuncture_process.stdout:
print(line, end='') # stream output in real time
file0.write(line) # write the line to file
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Wait for the process to finish
TwoPuncture_command_return_code = TwoPuncture_process.wait()
print( )
print( " The TwoPunctureABE simulation is finished " )
print( )
tp_time2=time.time()
et=tp_time2-tp_time1
print(f"Used time: {et}")
return
##################################################################
##################################################################
##
## This file defines the commands used to build and run AMSS-NCKU
## Author: Xiaoqu
## 2025/01/24
##
##################################################################
import AMSS_NCKU_Input as input_data
import subprocess
import time
## CPU core binding configuration using taskset
## taskset ensures all child processes inherit the CPU affinity mask
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
#NUMACTL_CPU_BIND = "taskset -c 0-111"
NUMACTL_CPU_BIND = "taskset -c 0-47"
NUMACTL_CPU_BIND2 = "OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47} taskset -c 0-47"
#NUMACTL_CPU_BIND2 = "taskset -c 0-1"
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 32
##################################################################
##################################################################
## Compile the AMSS-NCKU main program ABE
def makefile_ABE():
print( )
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
## Build command with CPU binding to nohz_full cores
if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
elif (input_data.GPU_Calculation == "yes"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
else:
print( " CPU/GPU numerical calculation setting is wrong " )
print( )
## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Read and print output lines as they arrive
for line in makefile_process.stdout:
print(line, end='') # stream output in real time
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0:
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
print( )
print( " Compilation of the AMSS-NCKU executable file ABE is finished " )
print( )
return
##################################################################
##################################################################
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
def makefile_TwoPunctureABE():
print( )
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Build command with CPU binding to nohz_full cores
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Read and print output lines as they arrive
for line in makefile_process.stdout:
print(line, end='') # stream output in real time
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0:
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
print( )
print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " )
print( )
return
##################################################################
##################################################################
## Run the AMSS-NCKU main program ABE
def run_ABE():
print( )
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"):
#mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command = """ OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES=cores mpirun -np 1 --cpu-bind=sockets ./ABE """
mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
for line in mpi_process.stdout:
print(line, end='') # stream output in real time
file0.write(line) # write the line to file
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Wait for the process to finish
mpi_return_code = mpi_process.wait()
print( )
print( " The ABE/ABEGPU simulation is finished " )
print( )
return
##################################################################
##################################################################
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
def run_TwoPunctureABE():
tp_time1=time.time()
print( )
print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Define the command to run
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
TwoPuncture_command = " ./TwoPunctureABE"
TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
for line in TwoPuncture_process.stdout:
print(line, end='') # stream output in real time
file0.write(line) # write the line to file
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Wait for the process to finish
TwoPuncture_command_return_code = TwoPuncture_process.wait()
print( )
print( " The TwoPunctureABE simulation is finished " )
print( )
tp_time2=time.time()
et=tp_time2-tp_time1
print(f"Used time: {et}")
return
##################################################################

File diff suppressed because it is too large Load Diff

View File

@@ -1,133 +1,133 @@
##################################################################
##
## Update puncture parameters from TwoPuncture output
## Author: Xiaoqu
## 2024/12/04
##
##################################################################
import AMSS_NCKU_Input as input_data
import numpy
import os
##################################################################
##################################################################
def read_TwoPuncture_Output(Output_File_directory):
dimensionless_mass_BH = numpy.zeros( input_data.puncture_number )
bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole
position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole
momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole
# Read TwoPuncture output file
data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") )
# Ensure data is parsed as a 1-D array
data = data.reshape(-1)
for i in range(input_data.puncture_number):
## Read parameters for the first two punctures from TwoPuncture output
## For additional punctures, read parameters from the input file
if i<2:
bare_mass_BH[i] = data[12*i]
dimensionless_mass_BH[i] = data[12*i+1]
position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ]
momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ]
angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ]
else:
dimensionless_mass_BH[i] = input_data.parameter_BH[i,0]
bare_mass_BH[i] = input_data.parameter_BH[i,0]
position_BH[i] = input_data.position_BH[i]
momentum_BH[i] = input_data.momentum_BH[i]
## Read angular momentum according to symmetry
if ( input_data.Symmetry == "equatorial-symmetry" ):
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH
##################################################################
##################################################################
## Append the computed puncture information into the AMSS-NCKU input file
def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory):
charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole
## If using Ansorg-TwoPuncture to solve the initial-data problem, read
## bare masses, positions and angular momenta from TwoPuncture output
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory)
# set charge for each black hole
for i in range(input_data.puncture_number):
charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1]
## If using another method for initial data, read parameters directly from input
else:
position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array
mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array
## Set charge and spin angular momentum for each puncture
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "octant-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "equatorial-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1]
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode
## Output BSSN related settings
print( file=file1 )
print( "BSSN::chitiny = 1e-5", file=file1 )
print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 )
print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 )
for i in range(input_data.puncture_number):
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 )
else:
print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 )
print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 )
print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 )
print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 )
print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 )
print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 )
print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 )
print( file=file1 )
file1.close()
return
#################################################
##################################################################
##
## Update puncture parameters from TwoPuncture output
## Author: Xiaoqu
## 2024/12/04
##
##################################################################
import AMSS_NCKU_Input as input_data
import numpy
import os
##################################################################
##################################################################
def read_TwoPuncture_Output(Output_File_directory):
dimensionless_mass_BH = numpy.zeros( input_data.puncture_number )
bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole
position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole
momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole
# Read TwoPuncture output file
data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") )
# Ensure data is parsed as a 1-D array
data = data.reshape(-1)
for i in range(input_data.puncture_number):
## Read parameters for the first two punctures from TwoPuncture output
## For additional punctures, read parameters from the input file
if i<2:
bare_mass_BH[i] = data[12*i]
dimensionless_mass_BH[i] = data[12*i+1]
position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ]
momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ]
angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ]
else:
dimensionless_mass_BH[i] = input_data.parameter_BH[i,0]
bare_mass_BH[i] = input_data.parameter_BH[i,0]
position_BH[i] = input_data.position_BH[i]
momentum_BH[i] = input_data.momentum_BH[i]
## Read angular momentum according to symmetry
if ( input_data.Symmetry == "equatorial-symmetry" ):
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH
##################################################################
##################################################################
## Append the computed puncture information into the AMSS-NCKU input file
def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory):
charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole
## If using Ansorg-TwoPuncture to solve the initial-data problem, read
## bare masses, positions and angular momenta from TwoPuncture output
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory)
# set charge for each black hole
for i in range(input_data.puncture_number):
charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1]
## If using another method for initial data, read parameters directly from input
else:
position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH
## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array
mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array
## Set charge and spin angular momentum for each puncture
for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "octant-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "equatorial-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0]
angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1]
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode
## Output BSSN related settings
print( file=file1 )
print( "BSSN::chitiny = 1e-5", file=file1 )
print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 )
print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 )
for i in range(input_data.puncture_number):
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 )
else:
print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 )
print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 )
print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 )
print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 )
print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 )
print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 )
print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 )
print( file=file1 )
file1.close()
return
#################################################