Compare commits

..

2 Commits

Author SHA1 Message Date
wingrew
19b0e79692 黄老板逆天重写 2026-03-01 05:48:40 +08:00
e09ae438a2 Cache data_packer lengths in Sync_start to skip redundant buffer-size traversals
The data_packer(NULL, ...) calls that compute send/recv buffer lengths
traverse all grid segments × variables × nprocs on every Sync_start
invocation, even though lengths never change once the cache is built.
Add a lengths_valid flag to SyncCache so these length computations are
done once and reused on subsequent calls (4× per RK4 step).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-02-10 21:39:22 +08:00
84 changed files with 86019 additions and 68328 deletions

12
.gitignore vendored
View File

@@ -1,6 +1,6 @@
__pycache__ __pycache__
GW150914 GW150914
GW150914-origin GW150914-origin
docs docs
*.tmp *.tmp

4877
2.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ import numpy
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 1 ## number of processes (MPI removed, single-process mode) MPI_processes = 2 ## number of mpi processes used in the simulation
GPU_Calculation = "no" ## Use GPU or not GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface) ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
@@ -50,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT' D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT" Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number Evolution_Step_Number = 6 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -49,32 +49,32 @@ import time
File_directory = os.path.join(input_data.File_directory) File_directory = os.path.join(input_data.File_directory)
## If the specified output directory exists, ask the user whether to continue ## If the specified output directory exists, ask the user whether to continue
if os.path.exists(File_directory): # if os.path.exists(File_directory):
print( " Output dictionary has been existed !!! " ) # print( " Output dictionary has been existed !!! " )
print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " ) # print( " If you want to overwrite the existing file directory, please input 'continue' in the terminal !! " )
print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " ) # print( " If you want to retain the existing file directory, please input 'stop' in the terminal to stop the " )
print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " ) # print( " simulation. Then you can reset the output dictionary in the input script file AMSS_NCKU_Input.py !!! " )
print( ) # print( )
## Prompt whether to overwrite the existing directory # ## Prompt whether to overwrite the existing directory
while True: # while True:
try: # try:
inputvalue = input() # inputvalue = input()
## If the user agrees to overwrite, proceed and remove the existing directory # ## If the user agrees to overwrite, proceed and remove the existing directory
if ( inputvalue == "continue" ): # if ( inputvalue == "continue" ):
print( " Continue the calculation !!! " ) # print( " Continue the calculation !!! " )
print( ) # print( )
break # break
## If the user chooses not to overwrite, exit and keep the existing directory # ## If the user chooses not to overwrite, exit and keep the existing directory
elif ( inputvalue == "stop" ): # elif ( inputvalue == "stop" ):
print( " Stop the calculation !!! " ) # print( " Stop the calculation !!! " )
sys.exit() # sys.exit()
## If the user input is invalid, prompt again # ## If the user input is invalid, prompt again
else: # else:
print( " Please input your choice !!! " ) # print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " ) # print( " Input 'continue' or 'stop' in the terminal !!! " )
except ValueError: # except ValueError:
print( " Please input your choice !!! " ) # print( " Please input your choice !!! " )
print( " Input 'continue' or 'stop' in the terminal !!! " ) # print( " Input 'continue' or 'stop' in the terminal !!! " )
## Remove the existing output directory if present ## Remove the existing output directory if present
shutil.rmtree(File_directory, ignore_errors=True) shutil.rmtree(File_directory, ignore_errors=True)

View File

@@ -20,15 +20,11 @@ using namespace std;
#include <map.h> #include <map.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "macrodef.h" #include "macrodef.h"
#include <omp.h>
#ifndef ABEtype #ifndef ABEtype
#error "not define ABEtype" #error "not define ABEtype"
#endif #endif
@@ -73,8 +69,9 @@ int main(int argc, char *argv[])
double Begin_clock, End_clock; double Begin_clock, End_clock;
if (myrank == 0) if (myrank == 0)
{ {
Begin_clock = MPI_Wtime(); Begin_clock = MPI_Wtime();
} }
if (argc > 1) if (argc > 1)

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#define PI M_PI #define PI M_PI

File diff suppressed because it is too large Load Diff

View File

@@ -2,11 +2,7 @@
#ifndef BLOCK_H #ifndef BLOCK_H
#define BLOCK_H #define BLOCK_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "macrodef.h" //need dim here; Vertex or Cell #include "macrodef.h" //need dim here; Vertex or Cell
#include "var.h" #include "var.h"
#include "MyList.h" #include "MyList.h"

View File

@@ -4,11 +4,7 @@
#include <stdarg.h> #include <stdarg.h>
#include <string.h> #include <string.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "myglobal.h" #include "myglobal.h"

View File

@@ -13,7 +13,7 @@ using namespace std;
#include "MPatch.h" #include "MPatch.h"
#include "Parallel.h" #include "Parallel.h"
#include "fmisc.h" #include "fmisc.h"
#include "xh_global_interp.h"
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{ {
@@ -394,7 +394,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
while (notfind && Bp) // run along Blocks while (notfind && Bp) // run along Blocks
{ {
Block *BP = Bp->data; Block *BP = Bp->data;
bool flag = true; bool flag = true;
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
{ {
@@ -430,8 +429,10 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0; int k = 0;
while (varl) // run along variables while (varl) // run along variables
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;
} }
@@ -441,6 +442,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
break; break;
Bp = Bp->next; Bp = Bp->next;
} }
} }
// Replace MPI_Allreduce with per-owner MPI_Bcast: // Replace MPI_Allreduce with per-owner MPI_Bcast:
@@ -510,7 +512,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
int myrank, nprocs; int myrank, nprocs;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
// printf("here----\n");
// int zzz = 0;
int ordn = 2 * ghost_width; int ordn = 2 * ghost_width;
MyList<var> *varl; MyList<var> *varl;
int num_var = 0; int num_var = 0;
@@ -529,30 +532,35 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim], llb[dim], uub[dim]; double DH[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
// --- Interpolation phase (identical to original) --- // --- Interpolation phase (identical to original) ---
// printf("NN: %d, num_var = %d\n", NN, num_var);
#pragma omp parallel
{
#pragma omp for
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
{ {
double pox[dim]; double pox[dim], llb[dim], uub[dim];
MyList<var> *varl1;
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
{ {
pox[i] = XX[i][j]; pox[i] = XX[i][j];
if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i])) // if (myrank == 0 && (XX[i][j] < bbox[i] + lli[i] * DH[i] || XX[i][j] > bbox[dim + i] - uui[i] * DH[i]))
{ // {
cout << "Patch::Interp_Points: point ("; // cout << "Patch::Interp_Points: point (";
for (int k = 0; k < dim; k++) // for (int k = 0; k < dim; k++)
{ // {
cout << XX[k][j]; // cout << XX[k][j];
if (k < dim - 1) // if (k < dim - 1)
cout << ","; // cout << ",";
else // else
cout << ") is out of current Patch." << endl; // cout << ") is out of current Patch." << endl;
} // }
MPI_Abort(MPI_COMM_WORLD, 1); // MPI_Abort(MPI_COMM_WORLD, 1);
} // }
} }
MyList<Block> *Bp = blb; MyList<Block> *Bp = blb;
@@ -584,21 +592,23 @@ void Patch::Interp_Points(MyList<var> *VarList,
break; break;
} }
} }
// printf("flag = %d\n", flag);
if (flag) if (flag)
{ {
notfind = false; notfind = false;
owner_rank[j] = BP->rank; owner_rank[j] = BP->rank;
if (myrank == BP->rank) if (myrank == BP->rank)
{ {
varl = VarList; varl1 = VarList;
int k = 0; int k = 0;
while (varl) while (varl1)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl1->data->sgfn], Shellf[j * num_var + k],
varl = varl->next; pox[0], pox[1], pox[2], ordn, varl1->data->SoA, Symmetry);
varl1 = varl1->next;
k++; k++;
// zzz += 1;
} }
} }
} }
@@ -607,7 +617,8 @@ void Patch::Interp_Points(MyList<var> *VarList,
Bp = Bp->next; Bp = Bp->next;
} }
} }
}
// printf("Interpolation done, zzz = %d\n", zzz);
// --- Error check for unfound points --- // --- Error check for unfound points ---
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
{ {
@@ -773,7 +784,6 @@ void Patch::Interp_Points(MyList<var> *VarList,
int myrank, lmyrank; int myrank, lmyrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
MPI_Comm_rank(Comm_here, &lmyrank); MPI_Comm_rank(Comm_here, &lmyrank);
int ordn = 2 * ghost_width; int ordn = 2 * ghost_width;
MyList<var> *varl; MyList<var> *varl;
int num_var = 0; int num_var = 0;
@@ -863,7 +873,7 @@ void Patch::Interp_Points(MyList<var> *VarList,
int k = 0; int k = 0;
while (varl) // run along variables while (varl) // run along variables
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;
@@ -1095,7 +1105,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{ {
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry); // pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;
@@ -1197,7 +1207,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
// NOTE: we do not Synchnize variables here, make sure of that before calling this routine // NOTE: we do not Synchnize variables here, make sure of that before calling this routine
int myrank; int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int ordn = 2 * ghost_width; int ordn = 2 * ghost_width;
MyList<var> *varl; MyList<var> *varl;
int num_var = 0; int num_var = 0;
@@ -1337,7 +1347,7 @@ bool Patch::Interp_ONE_Point(MyList<var> *VarList, double *XX,
{ {
// shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn], // shellf[j*num_var+k] = Parallel::global_interp(dim,BP->shape,BP->X,BP->fgfs[varl->data->sgfn],
// pox,ordn,varl->data->SoA,Symmetry); // pox,ordn,varl->data->SoA,Symmetry);
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k], xh_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], shellf[k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next; varl = varl->next;
k++; k++;

View File

@@ -2,11 +2,7 @@
#ifndef PATCH_H #ifndef PATCH_H
#define PATCH_H #define PATCH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "var.h" #include "var.h"

View File

@@ -8,11 +8,7 @@
#include <limits.h> #include <limits.h>
#include <float.h> #include <float.h>
#include <math.h> #include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "util_Table.h" #include "util_Table.h"
#include "cctk.h" #include "cctk.h"

View File

@@ -23,11 +23,7 @@ using namespace std;
#include <complex.h> #include <complex.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "Parallel.h" #include "Parallel.h"

View File

@@ -23,11 +23,7 @@ using namespace std;
#include <complex.h> #include <complex.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "Parallel.h" #include "Parallel.h"

View File

@@ -4,7 +4,7 @@
#include "prolongrestrict.h" #include "prolongrestrict.h"
#include "misc.h" #include "misc.h"
#include "parameters.h" #include "parameters.h"
#include <omp.h>
int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion int Parallel::partition1(int &nx, int split_size, int min_width, int cpusize, int shape) // special for 1 diemnsion
{ {
nx = Mymax(1, shape / min_width); nx = Mymax(1, shape / min_width);
@@ -3338,7 +3338,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
{ {
int myrank; int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
// double time1 = omp_get_wtime();
int DIM = dim; int DIM = dim;
if (dir != PACK && dir != UNPACK) if (dir != PACK && dir != UNPACK)
@@ -3361,7 +3361,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
varls = varls->next; varls = varls->next;
varld = varld->next; varld = varld->next;
} }
if (varls || varld) if (varls || varld)
{ {
cout << "error in short data packer, var lists does not match." << endl; cout << "error in short data packer, var lists does not match." << endl;
@@ -3375,7 +3374,6 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
type = 2; type = 2;
else else
type = 3; type = 3;
while (src && dst) while (src && dst)
{ {
if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) || if ((dir == PACK && dst->data->Bg->rank == rank_in && src->data->Bg->rank == myrank) ||
@@ -3385,6 +3383,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
varld = VarListd; varld = VarListd;
while (varls && varld) while (varls && varld)
{ {
if (data) if (data)
{ {
if (dir == PACK) if (dir == PACK)
@@ -3405,6 +3404,7 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn], f_prolong3(DIM, src->data->Bg->bbox, src->data->Bg->bbox + dim, src->data->Bg->shape, src->data->Bg->fgfs[varls->data->sgfn],
dst->data->llb, dst->data->uub, dst->data->shape, data + size_out, dst->data->llb, dst->data->uub, dst->data->shape, data + size_out,
dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry); dst->data->llb, dst->data->uub, varls->data->SoA, Symmetry);
} }
if (dir == UNPACK) // from target data to corresponding grid if (dir == UNPACK) // from target data to corresponding grid
f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn], f_copy(DIM, dst->data->Bg->bbox, dst->data->Bg->bbox + dim, dst->data->Bg->shape, dst->data->Bg->fgfs[varld->data->sgfn],
@@ -3418,8 +3418,14 @@ int Parallel::data_packer(double *data, MyList<Parallel::gridseg> *src, MyList<P
} }
dst = dst->next; dst = dst->next;
src = src->next; src = src->next;
}
}
// double time2 = omp_get_wtime();
// xxx += time2 - time1;
// if(myrank == 0){
// printf("prolong3 time = %lf\n", time2 - time1);
// }
return size_out; return size_out;
} }
int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir, int Parallel::data_packermix(double *data, MyList<Parallel::gridseg> *src, MyList<Parallel::gridseg> *dst, int rank_in, int dir,
@@ -3514,7 +3520,7 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int node; int node;
// double time1 = omp_get_wtime();
MPI_Request *reqs; MPI_Request *reqs;
MPI_Status *stats; MPI_Status *stats;
reqs = new MPI_Request[2 * cpusize]; reqs = new MPI_Request[2 * cpusize];
@@ -3583,7 +3589,9 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
if (rec_data[node]) if (rec_data[node])
delete[] rec_data[node]; delete[] rec_data[node];
} }
// double time2 = omp_get_wtime();
// if (myrank == 0)
// printf("transfer time = %lf\n", time2 - time1);
delete[] reqs; delete[] reqs;
delete[] stats; delete[] stats;
delete[] send_data; delete[] send_data;
@@ -3853,7 +3861,8 @@ void Parallel::Sync_merged(MyList<Patch> *PatL, MyList<var> *VarList, int Symmet
Parallel::SyncCache::SyncCache() Parallel::SyncCache::SyncCache()
: valid(false), cpusize(0), combined_src(0), combined_dst(0), : valid(false), cpusize(0), combined_src(0), combined_dst(0),
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0) send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
lengths_valid(false)
{ {
} }
// SyncCache invalidate: free grid segment lists but keep buffers // SyncCache invalidate: free grid segment lists but keep buffers
@@ -3871,6 +3880,7 @@ void Parallel::SyncCache::invalidate()
send_lengths[i] = recv_lengths[i] = 0; send_lengths[i] = recv_lengths[i] = 0;
} }
valid = false; valid = false;
lengths_valid = false;
} }
// SyncCache destroy: free everything // SyncCache destroy: free everything
void Parallel::SyncCache::destroy() void Parallel::SyncCache::destroy()
@@ -4172,8 +4182,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
{ {
if (node == myrank) if (node == myrank)
{ {
int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); int length;
cache.recv_lengths[node] = length; if (!cache.lengths_valid) {
length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
cache.recv_lengths[node] = length;
} else {
length = cache.recv_lengths[node];
}
if (length > 0) if (length > 0)
{ {
if (length > cache.recv_buf_caps[node]) if (length > cache.recv_buf_caps[node])
@@ -4187,8 +4202,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
} }
else else
{ {
int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); int slength;
cache.send_lengths[node] = slength; if (!cache.lengths_valid) {
slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
cache.send_lengths[node] = slength;
} else {
slength = cache.send_lengths[node];
}
if (slength > 0) if (slength > 0)
{ {
if (slength > cache.send_buf_caps[node]) if (slength > cache.send_buf_caps[node])
@@ -4200,8 +4220,13 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
} }
int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry); int rlength;
cache.recv_lengths[node] = rlength; if (!cache.lengths_valid) {
rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
cache.recv_lengths[node] = rlength;
} else {
rlength = cache.recv_lengths[node];
}
if (rlength > 0) if (rlength > 0)
{ {
if (rlength > cache.recv_buf_caps[node]) if (rlength > cache.recv_buf_caps[node])
@@ -4214,6 +4239,7 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
} }
} }
} }
cache.lengths_valid = true;
} }
// Sync_finish: wait for async MPI operations and unpack // Sync_finish: wait for async MPI operations and unpack
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,

View File

@@ -97,6 +97,7 @@ namespace Parallel
MPI_Request *reqs; MPI_Request *reqs;
MPI_Status *stats; MPI_Status *stats;
int max_reqs; int max_reqs;
bool lengths_valid;
SyncCache(); SyncCache();
void invalidate(); void invalidate();
void destroy(); void destroy();

View File

@@ -2,11 +2,7 @@
#ifndef SHELLPATCH_H #ifndef SHELLPATCH_H
#define SHELLPATCH_H #define SHELLPATCH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "MyList.h" #include "MyList.h"
#include "Block.h" #include "Block.h"
#include "Parallel.h" #include "Parallel.h"

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "cgh.h" #include "cgh.h"
#include "ShellPatch.h" #include "ShellPatch.h"

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "cgh.h" #include "cgh.h"
#include "ShellPatch.h" #include "ShellPatch.h"

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "cgh.h" #include "cgh.h"
#include "ShellPatch.h" #include "ShellPatch.h"

View File

@@ -40,7 +40,7 @@ using namespace std;
#include "derivatives.h" #include "derivatives.h"
#include "ricci_gamma.h" #include "ricci_gamma.h"
#include "xh_bssn_rhs_compute.h"
//================================================================================================ //================================================================================================
// define bssn_class // define bssn_class
@@ -2029,6 +2029,7 @@ void bssn_class::Read_Ansorg()
void bssn_class::Evolve(int Steps) void bssn_class::Evolve(int Steps)
{ {
clock_t prev_clock, curr_clock; clock_t prev_clock, curr_clock;
double prev_time, curr_time;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0; LastAnas = 0;
#if 0 #if 0
@@ -2141,8 +2142,10 @@ void bssn_class::Evolve(int Steps)
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6) // if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; } // { GH->levels=GH->movls; }
if (myrank == 0) if (myrank == 0){
curr_clock = clock(); curr_clock = clock();
curr_time = omp_get_wtime();
}
#if (PSTR == 0) #if (PSTR == 0)
RecursiveStep(0); RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) #elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
@@ -2198,12 +2201,17 @@ void bssn_class::Evolve(int Steps)
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
prev_time = curr_time;
curr_clock = clock(); curr_clock = clock();
curr_time = omp_get_wtime();
cout << endl; cout << endl;
// cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
// << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
// << " seconds! " << endl;
// // cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " " cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " Computer used " << (curr_time - prev_time)
<< " seconds! " << endl; << " seconds! " << endl;
// cout << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)
@@ -3092,7 +3100,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -3292,7 +3300,7 @@ void bssn_class::Step(int lev, int YN)
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl; << cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
ERROR = 1; ERROR = 1;
} }
// cout<<"....................................."<<endl;
// rk4 substep and boundary // rk4 substep and boundary
{ {
MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList; MyList<var> *varl0 = StateList, *varl = SynchList_pre, *varlrhs = RHSList;
@@ -3457,7 +3465,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -3970,7 +3978,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -4312,7 +4320,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -4848,7 +4856,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -5048,7 +5056,7 @@ void bssn_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif #endif
if (f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -7343,7 +7351,7 @@ void bssn_class::Constraint_Out()
Block *cg = BP->data; Block *cg = BP->data;
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -7846,7 +7854,7 @@ void bssn_class::Interp_Constraint(bool infg)
Block *cg = BP->data; Block *cg = BP->data;
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -8104,7 +8112,7 @@ void bssn_class::Compute_Constraint()
Block *cg = BP->data; Block *cg = BP->data;
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
f_compute_rhs_bssn(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], f_compute_rhs_bssn_xh(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "macrodef.h" #include "macrodef.h"
#include "cgh.h" #include "cgh.h"

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "macrodef.h" #include "macrodef.h"
#include "cgh.h" #include "cgh.h"

View File

@@ -106,38 +106,6 @@
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
#endif #endif
!!! sanity check (disabled in production builds for performance)
#ifdef DEBUG
dX = sum(chi)+sum(trK)+sum(dxx)+sum(gxy)+sum(gxz)+sum(dyy)+sum(gyz)+sum(dzz) &
+sum(Axx)+sum(Axy)+sum(Axz)+sum(Ayy)+sum(Ayz)+sum(Azz) &
+sum(Gamx)+sum(Gamy)+sum(Gamz) &
+sum(Lap)+sum(betax)+sum(betay)+sum(betaz)
if(dX.ne.dX) then
if(sum(chi).ne.sum(chi))write(*,*)"bssn.f90: find NaN in chi"
if(sum(trK).ne.sum(trK))write(*,*)"bssn.f90: find NaN in trk"
if(sum(dxx).ne.sum(dxx))write(*,*)"bssn.f90: find NaN in dxx"
if(sum(gxy).ne.sum(gxy))write(*,*)"bssn.f90: find NaN in gxy"
if(sum(gxz).ne.sum(gxz))write(*,*)"bssn.f90: find NaN in gxz"
if(sum(dyy).ne.sum(dyy))write(*,*)"bssn.f90: find NaN in dyy"
if(sum(gyz).ne.sum(gyz))write(*,*)"bssn.f90: find NaN in gyz"
if(sum(dzz).ne.sum(dzz))write(*,*)"bssn.f90: find NaN in dzz"
if(sum(Axx).ne.sum(Axx))write(*,*)"bssn.f90: find NaN in Axx"
if(sum(Axy).ne.sum(Axy))write(*,*)"bssn.f90: find NaN in Axy"
if(sum(Axz).ne.sum(Axz))write(*,*)"bssn.f90: find NaN in Axz"
if(sum(Ayy).ne.sum(Ayy))write(*,*)"bssn.f90: find NaN in Ayy"
if(sum(Ayz).ne.sum(Ayz))write(*,*)"bssn.f90: find NaN in Ayz"
if(sum(Azz).ne.sum(Azz))write(*,*)"bssn.f90: find NaN in Azz"
if(sum(Gamx).ne.sum(Gamx))write(*,*)"bssn.f90: find NaN in Gamx"
if(sum(Gamy).ne.sum(Gamy))write(*,*)"bssn.f90: find NaN in Gamy"
if(sum(Gamz).ne.sum(Gamz))write(*,*)"bssn.f90: find NaN in Gamz"
if(sum(Lap).ne.sum(Lap))write(*,*)"bssn.f90: find NaN in Lap"
if(sum(betax).ne.sum(betax))write(*,*)"bssn.f90: find NaN in betax"
if(sum(betay).ne.sum(betay))write(*,*)"bssn.f90: find NaN in betay"
if(sum(betaz).ne.sum(betaz))write(*,*)"bssn.f90: find NaN in betaz"
gont = 1
return
endif
#endif
PI = dacos(-ONE) PI = dacos(-ONE)
@@ -634,7 +602,7 @@
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1 gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1 gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1 gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
! now get physical second kind of connection
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF

View File

@@ -20,11 +20,7 @@ using namespace std;
#include <map.h> #include <map.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "macrodef.h" #include "macrodef.h"
#include "misc.h" #include "misc.h"

View File

@@ -2,11 +2,7 @@
#ifndef CGH_H #ifndef CGH_H
#define CGH_H #define CGH_H
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "MyList.h" #include "MyList.h"
#include "MPatch.h" #include "MPatch.h"
#include "macrodef.h" #include "macrodef.h"

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <time.h> #include <time.h>
#include <stdlib.h> #include <stdlib.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "var.h" #include "var.h"
#include "MyList.h" #include "MyList.h"

View File

@@ -69,7 +69,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -152,7 +151,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -220,7 +218,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -285,7 +282,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -375,7 +371,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -474,7 +469,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -537,7 +531,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -601,7 +594,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -665,7 +657,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -728,7 +719,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -790,7 +780,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -877,7 +866,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1009,7 +997,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1164,7 +1151,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1241,7 +1227,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1312,7 +1297,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1417,7 +1401,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1593,7 +1576,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1661,7 +1643,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1731,7 +1712,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1801,7 +1781,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1872,7 +1851,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -1941,7 +1919,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2034,7 +2011,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2151,7 +2127,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2237,7 +2212,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2314,7 +2288,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2433,7 +2406,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2621,7 +2593,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2694,7 +2665,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2770,7 +2740,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2846,7 +2815,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -2927,7 +2895,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3006,7 +2973,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3114,7 +3080,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3251,7 +3216,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3347,7 +3311,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3432,7 +3395,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3568,7 +3530,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3841,7 +3802,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -3923,7 +3883,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -4008,7 +3967,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -4093,7 +4051,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -4196,7 +4153,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
@@ -4297,7 +4253,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3)-1 do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1

View File

@@ -81,7 +81,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -180,7 +179,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -264,7 +262,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -345,7 +342,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -447,7 +443,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -558,7 +553,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -633,7 +627,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -709,7 +702,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -785,7 +777,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -860,7 +851,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -934,7 +924,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1030,7 +1019,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1146,7 +1134,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1240,7 +1227,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1328,7 +1314,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1445,7 +1430,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1596,7 +1580,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1676,7 +1659,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1758,7 +1740,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1840,7 +1821,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1923,7 +1903,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2004,7 +1983,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2109,7 +2087,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2242,7 +2219,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2345,7 +2321,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2439,7 +2414,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2570,7 +2544,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2770,7 +2743,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2855,7 +2827,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -2943,7 +2914,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3031,7 +3001,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3124,7 +3093,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3215,7 +3183,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3335,7 +3302,6 @@
fy = ZEO fy = ZEO
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3488,7 +3454,6 @@
fx = ZEO fx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3601,7 +3566,6 @@
fy = ZEO fy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3703,7 +3667,6 @@
fz = ZEO fz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -3851,7 +3814,6 @@
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4136,7 +4098,6 @@
fxx = ZEO fxx = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4230,7 +4191,6 @@
fyy = ZEO fyy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4327,7 +4287,6 @@
fzz = ZEO fzz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4424,7 +4383,6 @@
fxy = ZEO fxy = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4539,7 +4497,6 @@
fxz = ZEO fxz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4652,7 +4609,6 @@
fyz = ZEO fyz = ZEO
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4723,7 +4679,6 @@ subroutine fderivs_shc(ex,f,fx,fy,fz,crho,sigma,R,SYM1,SYM2,SYM3,Symmetry,Lev,ss
#if 0 #if 0
integer :: i,j,k integer :: i,j,k
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -4774,7 +4729,6 @@ subroutine fdderivs_shc(ex,f,fxx,fxy,fxz,fyy,fyz,fzz,crho,sigma,R,SYM1,SYM2,SYM3
#if 0 #if 0
integer :: i,j,k integer :: i,j,k
!$omp parallel do collapse(2) private(i,j,k)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -27,7 +27,6 @@
!~~~~~~> !~~~~~~>
!$omp parallel do collapse(2) private(i,j,k,lgxx,lgyy,lgzz,ldetg,lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz,ltrA,lscale)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -105,7 +104,6 @@
!~~~~~~> !~~~~~~>
!$omp parallel do collapse(2) private(i,j,k,lgxx,lgyy,lgzz,lscale,lgxy,lgxz,lgyz,lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz,ltrA)
do k=1,ex(3) do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -6,11 +6,7 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "util_Table.h" #include "util_Table.h"
#include "cctk.h" #include "cctk.h"

View File

@@ -0,0 +1,26 @@
#include "xh_macrodef.h"
#include "xh_tool.h"
int f_compute_rhs_bssn(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co
);

View File

@@ -0,0 +1,66 @@
/* tetrad notes
v:r; u: phi; w: theta
tetradtype 0
v^a = (x,y,z)
orthonormal order: v,u,w
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
tetradtype 1
orthonormal order: w,u,v
m = (theta + i phi)/sqrt(2) following Sperhake, Eq.(3.2) of PRD 85, 124062(2012)
tetradtype 2
v_a = (x,y,z)
orthonormal order: v,u,w
m = (phi - i theta)/sqrt(2) following Frans, Eq.(8) of PRD 75, 124018(2007)
*/
#define tetradtype 2
/* Cell center or Vertex center */
#define Cell
/* ghost_width meaning:
2nd order: 2
4th order: 3
6th order: 4
8th order: 5
*/
#define ghost_width 3
/* use shell or not */
#define WithShell
/* use constraint preserving boundary condition or not
only affect Z4c
*/
#define CPBC
/* Gauge condition type
0: B^i gauge
1: David's puncture gauge
2: MB B^i gauge
3: RIT B^i gauge
4: MB beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
5: RIT beta gauge (beta gauge not means Eq.(3) of PRD 84, 124006)
6: MGB1 B^i gauge
7: MGB2 B^i gauge
*/
#define GAUGE 2
/* buffer points for CPBC boundary */
#define CPBC_ghost_width (ghost_width)
/* using BSSN variable for constraint violation and psi4 calculation: 0
using ADM variable for constraint violation and psi4 calculation: 1
*/
#define ABV 0
/* Type of Potential and Scalar Distribution in F(R) Scalar-Tensor Theory
1: Case C of 1112.3928, V=0
2: shell with a2^2*phi0/(1+a2^2), f(R) = R+a2*R^2 induced V
3: ground state of Schrodinger-Newton system, f(R) = R+a2*R^2 induced V
4: a2 = infinity and phi(r) = phi0 * 0.5 * ( tanh((r+r0)/sigma) - tanh((r-r0)/sigma) )
5: shell with phi(r) = phi0*Exp(-(r-r0)**2/sigma), V = 0
*/
#define EScalar_CC 2

View File

@@ -0,0 +1,338 @@
#ifndef SHARE_FUNC_H
#define SHARE_FUNC_H
#include <stdlib.h>
#include <stddef.h>
#include <math.h>
#include <stdio.h>
#include <omp.h>
/* 主网格0-based -> 1D */
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
const int ex1 = ex[0], ex2 = ex[1];
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
}
/*
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
* ord=2 => shift=1
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex
*/
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
const int shift = 1;
const int nx = ex[0] + 2; // ex1 + ord
const int ny = ex[1] + 2;
const int ii = iF + shift; // 0..ex1+1
const int jj = jF + shift; // 0..ex2+1
const int kk = kF + shift; // 0..ex3+1
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
* ord=3 => shift=2
* iF/jF/kF 是 Fortran 索引(可为负)
*/
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
const int shift = 2; // ord=3 -> -2..ex
const int nx = ex[0] + 3; // ex1 + ord
const int ny = ex[1] + 3;
const int ii = iF + shift; // 0..ex1+2
const int jj = jF + shift; // 0..ex2+2
const int kk = kF + shift; // 0..ex3+2
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
*
* C 里我们把:
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
* funcc 用“平移下标”存为一维数组:
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
* 总长度 nx = extc1 + ord
* 同理 ny = extc2 + ord, nz = extc3 + ord
*/
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
const int nx = extc[0], ny = extc[1];
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
}
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
const int shift = ord - 1; // iF = -shift .. extc1
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
const int ny = extc[1] + ord;
const int ii = iF + shift; // 0..extc1+shift
const int jj = jF + shift; // 0..extc2+shift
const int kk = kF + shift; // 0..extc3+shift
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* 等价于 Fortran:
* funcc(1:extc1,1:extc2,1:extc3)=func
* do i=0,ord-1
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
* enddo
* do i=0,ord-1
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
* enddo
* do i=0,ord-1
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
* enddo
*/
static inline void symmetry_bd(int ord,
const int extc[3],
const double *func,
double *funcc,
const double SoA[3])
{
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
for (int k0 = 0; k0 < extc3; ++k0) {
for (int j0 = 0; j0 < extc2; ++j0) {
for (int i0 = 0; i0 < extc1; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= ord - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= extc3; ++kF) {
for (int jF = 1; jF <= extc2; ++jF) {
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= ord - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= extc3; ++kF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= ord - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -ord + 1; jF <= extc2; ++jF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
}
}
}
}
#endif
/* 你已有的函数idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
static inline void fdderivs_xh(
int i0, int j0, int k0,
const int ex[3],
const double *fh,
int iminF, int jminF, int kminF,
int imaxF, int jmaxF, int kmaxF,
double Fdxdx, double Fdydy, double Fdzdz,
double Fdxdy, double Fdxdz, double Fdydz,
double Sdxdx, double Sdydy, double Sdzdz,
double Sdxdy, double Sdxdz, double Sdydz,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz
){
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double TWO = 2.0;
const int iF = i0 + 1;
const int jF = j0 + 1;
const int kF = k0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}
else {
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
}
}

View File

@@ -0,0 +1,27 @@
#include "xh_share_func.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps);
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3]);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
#include "../include/tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,7 @@
#include "include/bssn_rhs_compute.h"
int main() {
// 这里可以写一些测试代码,调用 f_compute_rhs_bssn 来验证它的正确性
// 例如,定义一些小的网格和初始条件,调用函数,并检查输出是否合理。
return 0;
}

View File

@@ -0,0 +1,65 @@
SoA[0] = SYM, SoA[1] = SYM, SoA[2] = SYM;
#pragma omp for collapse(3)
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = Lap[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
#pragma omp for collapse(3)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
#pragma omp for collapse(3)
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
#pragma omp for collapse(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
#pragma omp for collapse(3)
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
fdderivs_xh(i0, j0, k0, ex, fh, iminF, jminF, kminF, ex1, ex2, ex3,
Fdxdx, Fdydy, Fdzdz, Fdxdy, Fdxdz, Fdydz,
Sdxdx, Sdydy, Sdzdz, Sdxdy, Sdxdz, Sdydz,
fxx,fxy,fxz,fyy,fyz,fzz
);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,311 @@
#include "xh_tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,145 @@
#include "xh_tool.h"
/*
* C 版 fderivs
*
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/*
* Fortran loops:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
(kF + 2) <= ex3 && (kF - 2) >= kminF)
{
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
// elseif(i+1 <= imax .and. i-1 >= imin ...)
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
(kF + 1) <= ex3 && (kF - 1) >= kminF)
{
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,116 @@
#include "xh_tool.h"
/*
* C 版 kodis
*
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps)
{
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return;
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
for (int k0 = 0; k0 < ex3; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 < ex2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 < ex1; ++i0) {
const int iF = i0 + 1;
// Fortran if 条件:
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
{
const size_t p = idx_ex(i0, j0, k0, ex);
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,262 @@
#include "xh_tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// free(fh);
}

View File

@@ -6,11 +6,7 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "cctk.h" #include "cctk.h"

View File

@@ -65,8 +65,7 @@ real*8,intent(in) :: eps
! dx^4 ! dx^4
! note the sign (-1)^r-1, now r=2 ! note the sign (-1)^r-1, now r=2
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -160,8 +159,7 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
call symmetry_bd(3,ex,f,fh,SoA) call symmetry_bd(3,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -275,8 +273,7 @@ real*8,intent(in) :: eps
! dx^8 ! dx^8
! note the sign (-1)^r-1, now r=4 ! note the sign (-1)^r-1, now r=4
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -388,8 +385,7 @@ real*8,intent(in) :: eps
! dx^10 ! dx^10
! note the sign (-1)^r-1, now r=5 ! note the sign (-1)^r-1, now r=5
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -80,8 +80,7 @@ real*8,intent(in) :: eps
! dx^4 ! dx^4
! note the sign (-1)^r-1, now r=2 ! note the sign (-1)^r-1, now r=2
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -179,8 +178,7 @@ real*8,intent(in) :: eps
! dx^4 ! dx^4
! note the sign (-1)^r-1, now r=2 ! note the sign (-1)^r-1, now r=2
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -275,8 +273,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(2,ex,f,fh,SoA) call symmetry_stbd(2,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -372,8 +369,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(3,ex,f,fh,SoA) call symmetry_stbd(3,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -514,8 +510,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(3,ex,f,fh,SoA) call symmetry_stbd(3,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -603,8 +598,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(3,ex,f,fh,SoA) call symmetry_stbd(3,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -700,8 +694,7 @@ real*8,intent(in) :: eps
! dx^8 ! dx^8
! note the sign (-1)^r-1, now r=4 ! note the sign (-1)^r-1, now r=4
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -801,8 +794,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(4,ex,f,fh,SoA) call symmetry_stbd(4,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -911,8 +903,7 @@ real*8,intent(in) :: eps
! dx^10 ! dx^10
! note the sign (-1)^r-1, now r=5 ! note the sign (-1)^r-1, now r=5
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)
@@ -1015,8 +1006,7 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
call symmetry_stbd(5,ex,f,fh,SoA) call symmetry_stbd(5,ex,f,fh,SoA)
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)
do k=1,ex(3)
do j=1,ex(2) do j=1,ex(2)
do i=1,ex(1) do i=1,ex(1)

View File

@@ -68,8 +68,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! upper bound set ex-1 only for efficiency, ! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also ! the loop body will set ex 0 also
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)-1
do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
! x direction ! x direction
@@ -234,8 +233,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! upper bound set ex-1 only for efficiency, ! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also ! the loop body will set ex 0 also
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)-1
do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
#if 0 #if 0
@@ -560,8 +558,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! upper bound set ex-1 only for efficiency, ! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also ! the loop body will set ex 0 also
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)-1
do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
! x direction ! x direction
@@ -777,8 +774,7 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
! upper bound set ex-1 only for efficiency, ! upper bound set ex-1 only for efficiency,
! the loop body will set ex 0 also ! the loop body will set ex 0 also
!$omp parallel do collapse(2) private(i,j,k) do k=1,ex(3)-1
do k=1,ex(3)-1
do j=1,ex(2)-1 do j=1,ex(2)-1
do i=1,ex(1)-1 do i=1,ex(1)-1
! x direction ! x direction

View File

@@ -8,7 +8,7 @@ include makefile.inc
$(f90) $(f90appflags) -c $< -o $@ $(f90) $(f90appflags) -c $< -o $@
.C.o: .C.o:
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@
.for.o: .for.o:
$(f77) -c $< -o $@ $(f77) -c $< -o $@
@@ -28,7 +28,8 @@ C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\ bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\ bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\ Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
NullShellPatch2_Evo.o writefile_f.o NullShellPatch2_Evo.o writefile_f.o xh_bssn_rhs.o xh_fdderivs.o xh_fderivs.o xh_kodiss.o xh_lopsided.o \
xh_global_interp.o xh_polint3.o
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
cgh.o surface_integral.o ShellPatch.o\ cgh.o surface_integral.o ShellPatch.o\
@@ -72,7 +73,7 @@ $(C++FILES): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\ fadmquantites_bssn.h cpbc.h getnp4.h initial_null.h NullEvol.h\
NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\ NullShellPatch.h initial_maxwell.h bssnEM_class.h getnpem2.h\
empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\ empart.h NullNews.h kodiss.h Parallel_bam.h ricci_gamma.h\
initial_null2.h NullShellPatch2.h initial_null2.h NullShellPatch2.h xh_bssn_rhs_compute.h xh_global_interp.h
$(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\ $(C++FILES_GPU): Block.h enforce_algebra.h fmisc.h initial_puncture.h macrodef.h\
misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\ misc.h monitor.h MyList.h Parallel.h MPatch.h prolongrestrict.h\
@@ -95,7 +96,7 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
misc.o : zbesh.o misc.o : zbesh.o
# projects # projects
ABE: $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) ABE: $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
$(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) $(CLINKER) $(CXXAPPFLAGS) -qopenmp -o $@ $(C++FILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) ABEGPU: $(C++FILES_GPU) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)

View File

@@ -1,30 +1,32 @@
## GCC version (commented out) ## GCC version (commented out)
## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## filein = -I/usr/include -I/usr/lib/x86_64-linux-gnu/mpich/include -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/ ## filein = -I/usr/include/ -I/usr/include/openmpi-x86_64/ -I/usr/lib/x86_64-linux-gnu/openmpi/include/ -I/usr/lib/x86_64-linux-gnu/openmpi/lib/ -I/usr/lib/gcc/x86_64-linux-gnu/11/ -I/usr/include/c++/11/
## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran ## LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -L/usr/lib/gcc/x86_64-linux-gnu/11 -lgfortran -lmpi -lgfortran
## Intel oneAPI version with oneMKL (Optimized for performance) ## Intel oneAPI version with oneMKL (Optimized for performance)
filein = -I/usr/include/ -I${MKLROOT}/include filein = -I/usr/include/ -I${MKLROOT}/include
## Using OpenMP-threaded MKL for parallel performance ## Using sequential MKL (OpenMP disabled for better single-threaded performance)
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library ## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lifcore -limf -lpthread -lm -ldl LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl
## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization) ## Aggressive optimization flags + PGO Phase 2 (profile-guided optimization)
## -fprofile-instr-use: use collected profile data to guide optimization decisions ## -fprofile-instr-use: use collected profile data to guide optimization decisions
## (branch prediction, basic block layout, inlining, loop unrolling) ## (branch prediction, basic block layout, inlining, loop unrolling)
PROFDATA = /home/amss/AMSS-NCKU/pgo_profile/default.profdata PROFDATA = /home/hxh/AMSS-NCKU/pgo_profile/default.profdata
CXXAPPFLAGS = -O3 -march=native -fp-model fast=2 -fma -ipo -qopenmp \ CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-DMPI_STUB -Dfortran3 -Dnewc -I${MKLROOT}/include -fprofile-instr-use=$(PROFDATA) \
f90appflags = -O3 -march=native -fp-model fast=2 -fma -ipo -qopenmp \ -Dfortran3 -Dnewc -I${MKLROOT}/include
-align array64byte -fpp -I${MKLROOT}/include f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
f90 = ifx -fprofile-instr-use=$(PROFDATA) \
f77 = ifx -align array64byte -fpp -I${MKLROOT}/include
CXX = icpx f90 = ifx
CC = icx f77 = ifx
CLINKER = icpx CXX = icpx
CC = icx
Cu = nvcc CLINKER = mpiicpx
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc Cu = nvcc
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc

View File

@@ -14,11 +14,7 @@ using namespace std;
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "macrodef.h" #include "macrodef.h"

View File

@@ -24,11 +24,7 @@ using namespace std;
#include <complex.h> #include <complex.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
namespace misc namespace misc
{ {

View File

@@ -20,11 +20,7 @@ using namespace std;
#endif #endif
#include <time.h> #include <time.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
class monitor class monitor
{ {

View File

@@ -1,153 +0,0 @@
#ifndef MPI_STUB_H
#define MPI_STUB_H
/*
* MPI Stub Header — single-process shim for AMSS-NCKU ABE solver.
* Provides all MPI types, constants, and functions used in the codebase
* as no-ops or trivial implementations for nprocs=1, myrank=0.
*/
#include <cstring>
#include <cstdlib>
#include <cstdio>
#include <time.h>
/* ── Types ─────────────────────────────────────────────────────────── */
typedef int MPI_Comm;
typedef int MPI_Datatype;
typedef int MPI_Op;
typedef int MPI_Request;
typedef int MPI_Group;
typedef struct MPI_Status {
int MPI_SOURCE;
int MPI_TAG;
int MPI_ERROR;
} MPI_Status;
/* ── Constants ─────────────────────────────────────────────────────── */
#define MPI_COMM_WORLD 0
#define MPI_INT 1
#define MPI_DOUBLE 2
#define MPI_DOUBLE_PRECISION 2
#define MPI_DOUBLE_INT 3
#define MPI_SUM 1
#define MPI_MAX 2
#define MPI_MAXLOC 3
#define MPI_STATUS_IGNORE ((MPI_Status *)0)
#define MPI_STATUSES_IGNORE ((MPI_Status *)0)
#define MPI_MAX_PROCESSOR_NAME 256
/* ── Helper: sizeof for MPI_Datatype ──────────────────────────────── */
static inline size_t mpi_stub_sizeof(MPI_Datatype type) {
switch (type) {
case MPI_INT: return sizeof(int);
case MPI_DOUBLE: return sizeof(double);
case MPI_DOUBLE_INT: return sizeof(double) + sizeof(int);
default: return 0;
}
}
/* ── Init / Finalize ──────────────────────────────────────────────── */
static inline int MPI_Init(int *, char ***) { return 0; }
static inline int MPI_Finalize() { return 0; }
/* ── Communicator queries ─────────────────────────────────────────── */
static inline int MPI_Comm_rank(MPI_Comm, int *rank) { *rank = 0; return 0; }
static inline int MPI_Comm_size(MPI_Comm, int *size) { *size = 1; return 0; }
static inline int MPI_Comm_split(MPI_Comm comm, int, int, MPI_Comm *newcomm) {
*newcomm = comm;
return 0;
}
static inline int MPI_Comm_free(MPI_Comm *) { return 0; }
/* ── Group operations ─────────────────────────────────────────────── */
static inline int MPI_Comm_group(MPI_Comm, MPI_Group *group) {
*group = 0;
return 0;
}
static inline int MPI_Group_translate_ranks(MPI_Group, int n,
const int *ranks1, MPI_Group, int *ranks2) {
for (int i = 0; i < n; ++i) ranks2[i] = ranks1[i];
return 0;
}
static inline int MPI_Group_free(MPI_Group *) { return 0; }
/* ── Collective operations ────────────────────────────────────────── */
static inline int MPI_Allreduce(const void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype, MPI_Op, MPI_Comm) {
std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype));
return 0;
}
static inline int MPI_Iallreduce(const void *sendbuf, void *recvbuf,
int count, MPI_Datatype datatype, MPI_Op, MPI_Comm,
MPI_Request *request) {
std::memcpy(recvbuf, sendbuf, count * mpi_stub_sizeof(datatype));
*request = 0;
return 0;
}
static inline int MPI_Bcast(void *, int, MPI_Datatype, int, MPI_Comm) {
return 0;
}
static inline int MPI_Barrier(MPI_Comm) { return 0; }
/* ── Point-to-point (never reached with nprocs=1) ─────────────────── */
static inline int MPI_Send(const void *, int, MPI_Datatype, int, int, MPI_Comm) {
return 0;
}
static inline int MPI_Recv(void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *) {
return 0;
}
static inline int MPI_Isend(const void *, int, MPI_Datatype, int, int, MPI_Comm,
MPI_Request *req) {
*req = 0;
return 0;
}
static inline int MPI_Irecv(void *, int, MPI_Datatype, int, int, MPI_Comm,
MPI_Request *req) {
*req = 0;
return 0;
}
/* ── Completion ───────────────────────────────────────────────────── */
static inline int MPI_Wait(MPI_Request *, MPI_Status *) { return 0; }
static inline int MPI_Waitall(int, MPI_Request *, MPI_Status *) { return 0; }
/* ── Utility ──────────────────────────────────────────────────────── */
static inline int MPI_Abort(MPI_Comm, int error_code) {
std::fprintf(stderr, "MPI_Abort called with error code %d\n", error_code);
std::exit(error_code);
return 0;
}
static inline double MPI_Wtime() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.0e-9;
}
static inline int MPI_Get_processor_name(char *name, int *resultlen) {
const char *stub_name = "localhost";
std::strcpy(name, stub_name);
*resultlen = (int)std::strlen(stub_name);
return 0;
}
#endif /* MPI_STUB_H */

View File

@@ -24,11 +24,7 @@ using namespace std;
#include <map.h> #include <map.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
namespace parameters namespace parameters
{ {

View File

@@ -30,11 +30,7 @@ using namespace std;
#include <sys/time.h> #include <sys/time.h>
#include <sys/resource.h> #include <sys/resource.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
/* Real time */ /* Real time */
#define TimerSignal SIGALRM #define TimerSignal SIGALRM

View File

@@ -109,33 +109,23 @@
if( RK4 == 0 ) then if( RK4 == 0 ) then
!$omp parallel workshare
f1 = f0 + HLF * dT * f_rhs f1 = f0 + HLF * dT * f_rhs
!$omp end parallel workshare
elseif(RK4 == 1 ) then elseif(RK4 == 1 ) then
!$omp parallel workshare
f_rhs = f_rhs + TWO * f1 f_rhs = f_rhs + TWO * f1
!$omp end parallel workshare
!$omp parallel workshare
f1 = f0 + HLF * dT * f1 f1 = f0 + HLF * dT * f1
!$omp end parallel workshare
elseif(RK4 == 2 ) then elseif(RK4 == 2 ) then
!$omp parallel workshare
f_rhs = f_rhs + TWO * f1 f_rhs = f_rhs + TWO * f1
!$omp end parallel workshare
!$omp parallel workshare
f1 = f0 + dT * f1 f1 = f0 + dT * f1
!$omp end parallel workshare
elseif( RK4 == 3 ) then elseif( RK4 == 3 ) then
!$omp parallel workshare
f1 = f0 +F1o6 * dT *(f1 + f_rhs) f1 = f0 +F1o6 * dT *(f1 + f_rhs)
!$omp end parallel workshare
else else
@@ -144,7 +134,7 @@
endif endif
return return
end subroutine rungekutta4_rout end subroutine rungekutta4_rout
!----------------------------------------------------------------------------- !-----------------------------------------------------------------------------
@@ -225,19 +215,15 @@
if( RK4 == 0 ) then if( RK4 == 0 ) then
!$omp parallel workshare
f1 = f0 + dT * f_rhs f1 = f0 + dT * f_rhs
!$omp end parallel workshare
else else
!$omp parallel workshare
f1 = f0 + HLF * dT * (f1+f_rhs) f1 = f0 + HLF * dT * (f1+f_rhs)
!$omp end parallel workshare
endif endif
return return
end subroutine icn_rout end subroutine icn_rout
!~~~~~~~~~~~~~~~~~~ !~~~~~~~~~~~~~~~~~~
@@ -253,10 +239,8 @@
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f_rhs real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) ::f1 real*8, dimension(ex(1),ex(2),ex(3)),intent(out) ::f1
!$omp parallel workshare
f1 = f0 + dT * f_rhs f1 = f0 + dT * f_rhs
!$omp end parallel workshare
return return
end subroutine euler_rout end subroutine euler_rout

View File

@@ -19,11 +19,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "cgh.h" #include "cgh.h"
#include "ShellPatch.h" #include "ShellPatch.h"

View File

@@ -18,11 +18,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "microdef.h" #include "microdef.h"

View File

@@ -3,11 +3,7 @@
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "util_Table.h" #include "util_Table.h"
#include "cctk.h" #include "cctk.h"

View File

@@ -20,11 +20,7 @@ using namespace std;
#include <math.h> #include <math.h>
#include <map.h> #include <map.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "cgh.h" #include "cgh.h"
@@ -2657,6 +2653,7 @@ void surface_integral::surf_MassPAng(double rex, int lev, cgh *GH, var *chi, var
// we have assumed there is only one box on this level, // we have assumed there is only one box on this level,
// so we do not need loop boxes // so we do not need loop boxes
GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here); GH->PatL[lev]->data->Interp_Points(DG_List, n_tot, pox, shellf, Symmetry, Comm_here);
double Mass_out = 0; double Mass_out = 0;

View File

@@ -18,11 +18,7 @@ using namespace std;
#include <math.h> #include <math.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "macrodef.h" #include "macrodef.h"

View File

@@ -20,11 +20,7 @@ using namespace std;
#include <map.h> #include <map.h>
#endif #endif
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "misc.h" #include "misc.h"
#include "macrodef.h" #include "macrodef.h"

View File

@@ -9,11 +9,7 @@
using namespace std; using namespace std;
#include <time.h> #include <time.h>
#ifdef MPI_STUB
#include "mpi_stub.h"
#else
#include <mpi.h> #include <mpi.h>
#endif
#include "var.h" #include "var.h"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
#include "xh_tool.h"
extern "C"
{
int f_compute_rhs_bssn_xh(int *ex, double &T,
double *X, double *Y, double *Z,
double *chi, double *trK,
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
double *Gamx, double *Gamy, double *Gamz,
double *Lap, double *betax, double *betay, double *betaz,
double *dtSfx, double *dtSfy, double *dtSfz,
double *chi_rhs, double *trK_rhs,
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
double *rho, double *Sx, double *Sy, double *Sz,
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
int &Symmetry, int &Lev, double &eps, int &co
);
}

View File

@@ -0,0 +1,311 @@
#include "xh_tool.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff;
const int NO_SYMM = 0, EQ_SYMM = 1;
const double ZEO = 0.0, ONE = 1.0, TWO = 2.0;
const double F1o4 = 2.5e-1; // 1/4
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double F1o12 = ONE / 12.0;
const double F1o144 = ONE / 144.0;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
/* fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2 */
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
/* 系数:按 Fortran 原式 */
const double Sdxdx = ONE / (dX * dX);
const double Sdydy = ONE / (dY * dY);
const double Sdzdz = ONE / (dZ * dZ);
const double Fdxdx = F1o12 / (dX * dX);
const double Fdydy = F1o12 / (dY * dY);
const double Fdzdz = F1o12 / (dZ * dZ);
const double Sdxdy = F1o4 / (dX * dY);
const double Sdxdz = F1o4 / (dX * dZ);
const double Sdydz = F1o4 / (dY * dZ);
const double Fdxdy = F1o144 / (dX * dY);
const double Fdxdz = F1o144 / (dX * dZ);
const double Fdydz = F1o144 / (dY * dZ);
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// symmetry_bd(2, ex, f, fh, SoA);
const double SoA[3] = { SYM1, SYM2, SYM3 };
for (int k0 = 0; k0 < ex[2]; ++k0) {
for (int j0 = 0; j0 < ex[1]; ++j0) {
for (int i0 = 0; i0 < ex[0]; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
fh[idx_funcc_F(iF, jF, kF, 2, ex)] = f[idx_func0(i0, j0, k0, ex)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= 2 - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int jF = 1; jF <= ex[1]; ++jF) {
fh[idx_funcc_F(iF_dst, jF, kF, 2, ex)] =
fh[idx_funcc_F(iF_src, jF, kF, 2, ex)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= 2 - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= ex[2]; ++kF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF_dst, kF, 2, ex)] =
fh[idx_funcc_F(iF, jF_src, kF, 2, ex)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= 2 - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -2 + 1; jF <= ex[1]; ++jF) {
for (int iF = -2 + 1; iF <= ex[0]; ++iF) {
fh[idx_funcc_F(iF, jF, kF_dst, 2, ex)] =
fh[idx_funcc_F(iF, jF, kF_src, 2, ex)] * SoA[2];
}
}
}
/* 输出清零fxx,fyy,fzz,fxy,fxz,fyz = 0 */
// const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
// for (size_t p = 0; p < all; ++p) {
// fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
// fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
// }
/*
* Fortran:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶:完全照搬 Fortran 的括号结构 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}else{
fxx[p] = 0.0;
fyy[p] = 0.0;
fzz[p] = 0.0;
fxy[p] = 0.0;
fxz[p] = 0.0;
fyz[p] = 0.0;
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,145 @@
#include "xh_tool.h"
/*
* C 版 fderivs
*
* Fortran:
* subroutine fderivs(ex,f,fx,fy,fz,X,Y,Z,SYM1,SYM2,SYM3,symmetry,onoff)
*
* 约定:
* f, fx, fy, fz: ex1*ex2*ex3按 idx_ex 布局
* X: ex1, Y: ex2, Z: ex3
*/
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff)
{
(void)onoff; // Fortran 里没用到
const double ZEO = 0.0, ONE = 1.0;
const double TWO = 2.0, EIT = 8.0;
const double F12 = 12.0;
const int NO_SYMM = 0, EQ_SYMM = 1; // OCTANT=2 在本子程序里不直接用
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -1;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -1;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -1;
// SoA(1:3) = SYM1,SYM2,SYM3
const double SoA[3] = { SYM1, SYM2, SYM3 };
// fh: (ex1+2)*(ex2+2)*(ex3+2) because ord=2
const size_t nx = (size_t)ex1 + 2;
const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
// double *fh = (double*)malloc(fh_size * sizeof(double));
if (!fh) return;
// call symmetry_bd(2,ex,f,fh,SoA)
symmetry_bd(2, ex, f, fh, SoA);
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
// fx = fy = fz = 0
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
for (size_t p = 0; p < all; ++p) {
fx[p] = ZEO;
fy[p] = ZEO;
fz[p] = ZEO;
}
/*
* Fortran loops:
* do k=1,ex3-1
* do j=1,ex2-1
* do i=1,ex1-1
*
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
if ((iF + 2) <= ex1 && (iF - 2) >= iminF &&
(jF + 2) <= ex2 && (jF - 2) >= jminF &&
(kF + 2) <= ex3 && (kF - 2) >= kminF)
{
fx[p] = d12dx * (
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)]
);
fy[p] = d12dy * (
fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)]
);
fz[p] = d12dz * (
fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] -
EIT * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
EIT * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
);
}
// elseif(i+1 <= imax .and. i-1 >= imin ...)
else if ((iF + 1) <= ex1 && (iF - 1) >= iminF &&
(jF + 1) <= ex2 && (jF - 1) >= jminF &&
(kF + 1) <= ex3 && (kF - 1) >= kminF)
{
fx[p] = d2dx * (
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fy[p] = d2dy * (
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fz[p] = d2dz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,143 @@
#include "xh_global_interp.h"
/* 你已有的 polin3由前面 Fortran->C 翻译得到) */
// void polin3(const double *x1a, const double *x2a, const double *x3a,
// const double *ya, double x1, double x2, double x3,
// double *y, double *dy, int ordn);
/*
你需要提供 decide3d 的实现(这里仅声明)。
Fortran: decide3d(ex,f,f,cxB,cxT,SoA,ya,ORDN,Symmetry)
- ex: [3]
- f: 三维场(列主序)
- cxB/cxT: 3 维窗口起止Fortran 1-based且可能 <=0
- SoA: [3]
- ya: 输出 ORDN^3 的采样块(列主序)
- return: 0 表示正常;非 0 表示错误(对应 Fortran logical = .true.
*/
// int xh_decide3d(const int ex[3],
// const double *f_in,
// const double *f_in2, /* Fortran 里传了 f,f按原样保留 */
// const int cxB[3],
// const int cxT[3],
// const double SoA[3],
// double *ya,
// int ordn,
// int symmetry);
/* 把 Fortran 1-based 下标 idxF (可为负/0) 映射到 C 的 X[idx] 访问(只用于 X(2-cxB) 这种表达式) */
static inline double X_at_FortranIndex(const double *X, int idxF) {
/* Fortran: X(1) 对应 C: X[0] */
return X[idxF - 1];
}
/* Fortran 整数截断idint 在这里可用 (int) 实现(对正数等价于 floor */
static inline int idint_like(double a) {
return (int)a; /* trunc toward zero */
}
/* global_interp 的 C 版 */
void xh_global_interp(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, /* f(ex1,ex2,ex3) column-major */
double &f_int,
double x1, double y1, double z1,
int ORDN,
const double SoA[3],
int symmetry)
{
// double time1, time2;
// time1 = omp_get_wtime();
enum { NO_SYMM = 0, EQUATORIAL = 1, OCTANT = 2 };
int j, m;
int imin, jmin, kmin;
int cxB[3], cxT[3], cxI[3], cmin[3], cmax[3];
double cx[3];
double dX, dY, dZ, ddy;
/* Fortran: imin=lbound(f,1) ... 通常是 1这里按 1 处理 */
imin = 1; jmin = 1; kmin = 1;
dX = X_at_FortranIndex(X, imin + 1) - X_at_FortranIndex(X, imin);
dY = X_at_FortranIndex(Y, jmin + 1) - X_at_FortranIndex(Y, jmin);
dZ = X_at_FortranIndex(Z, kmin + 1) - X_at_FortranIndex(Z, kmin);
/* x1a(j) = (j-1)*1.0 (j=1..ORDN) */
double *x1a = (double*)malloc((size_t)ORDN * sizeof(double));
double *ya = (double*)malloc((size_t)ORDN * (size_t)ORDN * (size_t)ORDN * sizeof(double));
if (!x1a || !ya) {
fprintf(stderr, "global_interp: malloc failed\n");
exit(1);
}
for (j = 0; j < ORDN; j++) x1a[j] = (double)j;
/* cxI(m) = idint((p - P(1))/dP + 0.4) + 1 (Fortran 1-based) */
cxI[0] = idint_like((x1 - X_at_FortranIndex(X, 1)) / dX + 0.4) + 1;
cxI[1] = idint_like((y1 - X_at_FortranIndex(Y, 1)) / dY + 0.4) + 1;
cxI[2] = idint_like((z1 - X_at_FortranIndex(Z, 1)) / dZ + 0.4) + 1;
/* cxB = cxI - ORDN/2 + 1 ; cxT = cxB + ORDN - 1 */
int half = ORDN / 2; /* Fortran 整数除法 */
for (m = 0; m < 3; m++) {
cxB[m] = cxI[m] - half + 1;
cxT[m] = cxB[m] + ORDN - 1;
}
/* cmin=1; cmax=ex */
cmin[0] = cmin[1] = cmin[2] = 1;
cmax[0] = ex[0];
cmax[1] = ex[1];
cmax[2] = ex[2];
/* 对称边界时允许 cxB 为负/0与 Fortran 一致) */
if (symmetry == OCTANT && fabs(X_at_FortranIndex(X, 1)) < dX) cmin[0] = -half + 2;
if (symmetry == OCTANT && fabs(X_at_FortranIndex(Y, 1)) < dY) cmin[1] = -half + 2;
if (symmetry != NO_SYMM && fabs(X_at_FortranIndex(Z, 1)) < dZ) cmin[2] = -half + 2;
/* 夹紧窗口 [cxB,cxT] 到 [cmin,cmax] */
for (m = 0; m < 3; m++) {
if (cxB[m] < cmin[m]) {
cxB[m] = cmin[m];
cxT[m] = cxB[m] + ORDN - 1;
}
if (cxT[m] > cmax[m]) {
cxT[m] = cmax[m];
cxB[m] = cxT[m] + 1 - ORDN;
}
}
/*
cx(m) 的计算:如果 cxB>0:
cx = (p - P(cxB))/dP
else:
cx = (p + P(2 - cxB))/dP
注意这里的 cxB 是 Fortran 1-based 语义下的整数,可能 <=0。
*/
if (cxB[0] > 0) cx[0] = (x1 - X_at_FortranIndex(X, cxB[0])) / dX;
else cx[0] = (x1 + X_at_FortranIndex(X, 2 - cxB[0])) / dX;
if (cxB[1] > 0) cx[1] = (y1 - X_at_FortranIndex(Y, cxB[1])) / dY;
else cx[1] = (y1 + X_at_FortranIndex(Y, 2 - cxB[1])) / dY;
if (cxB[2] > 0) cx[2] = (z1 - X_at_FortranIndex(Z, cxB[2])) / dZ;
else cx[2] = (z1 + X_at_FortranIndex(Z, 2 - cxB[2])) / dZ;
/* decide3d: 填充 ya(1:ORDN,1:ORDN,1:ORDN) */
if (xh_decide3d(ex, f, f, cxB, cxT, SoA, ya, ORDN, symmetry)) {
printf("global_interp position: %g %g %g\n", x1, y1, z1);
printf("data range: %g %g %g %g %g %g\n",
X_at_FortranIndex(X, 1), X_at_FortranIndex(X, ex[0]),
X_at_FortranIndex(Y, 1), X_at_FortranIndex(Y, ex[1]),
X_at_FortranIndex(Z, 1), X_at_FortranIndex(Z, ex[2]));
exit(1);
}
/* polin3(x1a,x1a,x1a,ya,cx(1),cx(2),cx(3),f_int,ddy,ORDN) */
xh_polin3(x1a, x1a, x1a, ya, cx[0], cx[1], cx[2], f_int, &ddy, ORDN);
free(x1a);
free(ya);
// time2 = omp_get_wtime();
// printf("Time for global_interp: %lf seconds\n", time2 - time1);
}

View File

@@ -0,0 +1,12 @@
#include "xh_po.h"
extern "C"{
void xh_global_interp(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, /* f(ex1,ex2,ex3) column-major */
double &f_int,
double x1, double y1, double z1,
int ORDN,
const double SoA[3],
int symmetry);
}

View File

@@ -0,0 +1,116 @@
#include "xh_tool.h"
/*
* C 版 kodis
*
* Fortran signature:
* subroutine kodis(ex,X,Y,Z,f,f_rhs,SoA,Symmetry,eps)
*
* 约定:
* X: ex1, Y: ex2, Z: ex3
* f, f_rhs: ex1*ex2*ex3 按 idx_ex 布局
* SoA[3]
* eps: double
*/
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps)
{
const double ONE = 1.0, SIX = 6.0, FIT = 15.0, TWT = 20.0;
const double cof = 64.0; // 2^6
const int NO_SYMM = 0, OCTANT = 2;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// Fortran: dX = X(2)-X(1) -> C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
(void)ONE; // ONE 在原 Fortran 里只是参数,这里不一定用得上
// Fortran: imax=ex(1) 等是 1-based 上界
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran: imin=jmin=kmin=1某些对称情况变 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry == OCTANT && fabs(X[0]) < dX) iminF = -2;
if (Symmetry == OCTANT && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3),对应 ord=3
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return;
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran loops:
* do k=1,ex3
* do j=1,ex2
* do i=1,ex1
*
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
* 并定义 Fortran index: iF=i0+1, ...
*/
for (int k0 = 0; k0 < ex3; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 < ex2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 < ex1; ++i0) {
const int iF = i0 + 1;
// Fortran if 条件:
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
{
const size_t p = idx_ex(i0, j0, k0, ex);
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
const double Dx_term =
( (fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
TWT * fh[idx_fh_F(iF , jF, kF, ex)] ) / dX;
const double Dy_term =
( (fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
TWT * fh[idx_fh_F(iF, jF , kF, ex)] ) / dY;
const double Dz_term =
( (fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
TWT * fh[idx_fh_F(iF, jF, kF , ex)] ) / dZ;
// Fortran:
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
}
}
}
}
// free(fh);
}

View File

@@ -0,0 +1,262 @@
#include "xh_tool.h"
/*
* 你需要提供 symmetry_bd 的 C 版本(或 Fortran 绑到 C 的接口)。
* Fortran: call symmetry_bd(3,ex,f,fh,SoA)
*
* 约定:
* nghost = 3
* ex[3] = {ex1,ex2,ex3}
* f = 原始网格 (ex1*ex2*ex3)
* fh = 扩展网格 ((ex1+3)*(ex2+3)*(ex3+3)),对应 Fortran 的 (-2:ex1, ...)
* SoA[3] = 输入参数
*/
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3])
{
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
const double TWO = 2.0, F6 = 6.0, F18 = 18.0;
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
const int NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2;
(void)OCTANT; // 这里和 Fortran 一样只是定义了不用也没关系
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
// 对应 Fortran: dX = X(2)-X(1) Fortran 1-based
// C: X[1]-X[0]
const double dX = X[1] - X[0];
const double dY = Y[1] - Y[0];
const double dZ = Z[1] - Z[0];
const double d12dx = ONE / F12 / dX;
const double d12dy = ONE / F12 / dY;
const double d12dz = ONE / F12 / dZ;
// Fortran 里算了 d2dx/d2dy/d2dz 但本 subroutine 里没用到(保持一致也算出来)
const double d2dx = ONE / TWO / dX;
const double d2dy = ONE / TWO / dY;
const double d2dz = ONE / TWO / dZ;
(void)d2dx; (void)d2dy; (void)d2dz;
// Fortran:
// imax = ex(1); jmax = ex(2); kmax = ex(3)
const int imaxF = ex1;
const int jmaxF = ex2;
const int kmaxF = ex3;
// Fortran:
// imin=jmin=kmin=1; 若满足对称条件则设为 -2
int iminF = 1, jminF = 1, kminF = 1;
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
// 分配 fh大小 (ex1+3)*(ex2+3)*(ex3+3)
const size_t nx = (size_t)ex1 + 3;
const size_t ny = (size_t)ex2 + 3;
const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL;
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; // 内存不足:直接返回(你也可以改成 abort/报错)
// Fortran: call symmetry_bd(3,ex,f,fh,SoA)
symmetry_bd(3, ex, f, fh, SoA);
/*
* Fortran 主循环:
* do k=1,ex(3)-1
* do j=1,ex(2)-1
* do i=1,ex(1)-1
*
* 转成 C 0-based
* k0 = 0..ex3-2, j0 = 0..ex2-2, i0 = 0..ex1-2
*
* 并且 Fortran 里的 i/j/k 在 fh 访问时,仍然是 Fortran 索引值:
* iF=i0+1, jF=j0+1, kF=k0+1
*/
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
const int kF = k0 + 1;
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
const int jF = j0 + 1;
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
// ---------------- x direction ----------------
const double sfx = Sfx[p];
if (sfx > ZEO) {
// Fortran: if(i+3 <= imax)
// iF+3 <= ex1 <=> i0+4 <= ex1 <=> i0 <= ex1-4
if (i0 <= ex1 - 4) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
// elseif(i+2 <= imax) <=> i0 <= ex1-3
else if (i0 <= ex1 - 3) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i+1 <= imax) <=> i0 <= ex1-2循环里总成立
else if (i0 <= ex1 - 2) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
} else if (sfx < ZEO) {
// Fortran: if(i-3 >= imin)
// (iF-3) >= iminF <=> (i0-2) >= iminF
if ((i0 - 2) >= iminF) {
f_rhs[p] -= sfx * d12dx *
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
}
// elseif(i-2 >= imin) <=> (i0-1) >= iminF
else if ((i0 - 1) >= iminF) {
f_rhs[p] += sfx * d12dx *
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
}
// elseif(i-1 >= imin) <=> i0 >= iminF
else if (i0 >= iminF) {
f_rhs[p] += sfx * d12dx *
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
}
}
// ---------------- y direction ----------------
const double sfy = Sfy[p];
if (sfy > ZEO) {
// jF+3 <= ex2 <=> j0+4 <= ex2 <=> j0 <= ex2-4
if (j0 <= ex2 - 4) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
} else if (j0 <= ex2 - 3) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 <= ex2 - 2) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
}
} else if (sfy < ZEO) {
if ((j0 - 2) >= jminF) {
f_rhs[p] -= sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
} else if ((j0 - 1) >= jminF) {
f_rhs[p] += sfy * d12dy *
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
} else if (j0 >= jminF) {
f_rhs[p] += sfy * d12dy *
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
}
}
// ---------------- z direction ----------------
const double sfz = Sfz[p];
if (sfz > ZEO) {
if (k0 <= ex3 - 4) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
} else if (k0 <= ex3 - 3) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 <= ex3 - 2) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
}
} else if (sfz < ZEO) {
if ((k0 - 2) >= kminF) {
f_rhs[p] -= sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
} else if ((k0 - 1) >= kminF) {
f_rhs[p] += sfz * d12dz *
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
} else if (k0 >= kminF) {
f_rhs[p] += sfz * d12dz *
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
}
}
}
}
}
// free(fh);
}

19
AMSS_NCKU_source/xh_po.h Normal file
View File

@@ -0,0 +1,19 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
int xh_decide3d(const int ex[3],
const double *f,
const double *fpi, /* 这里未用Fortran 也没用到 */
const int cxB[3],
const int cxT[3],
const double SoA[3],
double *ya,
int ordn,
int Symmetry);
void xh_polint(const double *xa, const double *ya, double x,
double *y, double *dy, int ordn);
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
const double *ya, double x1, double x2, double x3,
double &y, double *dy, int ordn);

View File

@@ -0,0 +1,258 @@
#include "xh_po.h"
/*
ex[0..2] == Fortran ex(1:3)
cxB/cxT == Fortran cxB(1:3), cxT(1:3) (可能 <=0)
SoA[0..2] == Fortran SoA(1:3)
f, fpi == Fortran f(ex1,ex2,ex3) column-major (1-based in formulas)
ya == 连续内存,尺寸为 ORDN^3对应 Fortran ya(cxB1:cxT1, cxB2:cxT2, cxB3:cxT3)
但注意:我们用 offset 映射把 Fortran 的 i/j/k 坐标写进去。
*/
static inline int imax(int a, int b) { return a > b ? a : b; }
static inline int imin(int a, int b) { return a < b ? a : b; }
/* f(i,j,k): Fortran column-major, i/j/k are Fortran 1-based in [1..ex] */
#define F(i,j,k) f[((i)-1) + ex1 * (((j)-1) + ex2 * ((k)-1))]
/*
ya(i,j,k): i in [cxB1..cxT1], j in [cxB2..cxT2], k in [cxB3..cxT3]
我们把它映射到 C 的 0..ORDN-1 立方体:
ii = i - cxB1
jj = j - cxB2
kk = k - cxB3
并按 column-major 存储(与 Fortran 一致,方便直接喂给你的 polin3
*/
#define YA(i,j,k) ya[((i)-cxB1) + ordn * (((j)-cxB2) + ordn * ((k)-cxB3))]
int xh_decide3d(const int ex[3],
const double *f,
const double *fpi, /* 这里未用Fortran 也没用到 */
const int cxB[3],
const int cxT[3],
const double SoA[3],
double *ya,
int ordn,
int Symmetry) /* Symmetry 在 decide3d 里也没直接用 */
{
(void)fpi;
(void)Symmetry;
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
int fmin1[3], fmin2[3], fmax1[3], fmax2[3];
int i, j, k, m;
int gont = 0;
/* 方便 YA 宏使用 */
const int cxB1 = cxB[0], cxB2 = cxB[1], cxB3 = cxB[2];
for (m = 0; m < 3; m++) {
/* Fortran 的 “NaN 检查” 在整数上基本无意义,这里不额外处理 */
fmin1[m] = imax(1, cxB[m]);
fmax1[m] = cxT[m];
fmin2[m] = cxB[m];
fmax2[m] = imin(0, cxT[m]);
/* if((fmin1<=fmax1) and (fmin1<1 or fmax1>ex)) gont=true */
if ((fmin1[m] <= fmax1[m]) && (fmin1[m] < 1 || fmax1[m] > ex[m])) gont = 1;
/* if((fmin2<=fmax2) and (2-fmax2<1 or 2-fmin2>ex)) gont=true */
if ((fmin2[m] <= fmax2[m]) && (2 - fmax2[m] < 1 || 2 - fmin2[m] > ex[m])) gont = 1;
}
if (gont) {
printf("error in decide3d\n");
printf("cxB: %d %d %d cxT: %d %d %d ex: %d %d %d\n",
cxB[0], cxB[1], cxB[2], cxT[0], cxT[1], cxT[2], ex[0], ex[1], ex[2]);
printf("fmin1: %d %d %d fmax1: %d %d %d\n",
fmin1[0], fmin1[1], fmin1[2], fmax1[0], fmax1[1], fmax1[2]);
printf("fmin2: %d %d %d fmax2: %d %d %d\n",
fmin2[0], fmin2[1], fmin2[2], fmax2[0], fmax2[1], fmax2[2]);
return 1;
}
/* ---- 填充 ya完全照 Fortran 两大块循环写 ---- */
/* k in [fmin1(3)..fmax1(3)] */
for (k = fmin1[2]; k <= fmax1[2]; k++) {
/* j in [fmin1(2)..fmax1(2)] */
for (j = fmin1[1]; j <= fmax1[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,j,k) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, j, k);
}
/* i in [fmin2(1)..fmax2(1)] : ya(i,j,k)=f(2-i,j,k)*SoA(1) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, j, k) * SoA[0];
}
}
/* j in [fmin2(2)..fmax2(2)] */
for (j = fmin2[1]; j <= fmax2[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya(i,j,k)=f(i,2-j,k)*SoA(2) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, 2 - j, k) * SoA[1];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,k)*SoA(1)*SoA(2) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, 2 - j, k) * SoA[0] * SoA[1];
}
}
}
/* k in [fmin2(3)..fmax2(3)] */
for (k = fmin2[2]; k <= fmax2[2]; k++) {
/* j in [fmin1(2)..fmax1(2)] */
for (j = fmin1[1]; j <= fmax1[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,j,2-k)*SoA(3) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, j, 2 - k) * SoA[2];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,j,2-k)*SoA(1)*SoA(3) */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, j, 2 - k) * SoA[0] * SoA[2];
}
}
/* j in [fmin2(2)..fmax2(2)] */
for (j = fmin2[1]; j <= fmax2[1]; j++) {
/* i in [fmin1(1)..fmax1(1)] : ya=f(i,2-j,2-k)*SoA(2)*SoA(3) */
for (i = fmin1[0]; i <= fmax1[0]; i++) {
YA(i, j, k) = F(i, 2 - j, 2 - k) * SoA[1] * SoA[2];
}
/* i in [fmin2(1)..fmax2(1)] : ya=f(2-i,2-j,2-k)*SoA1*SoA2*SoA3 */
for (i = fmin2[0]; i <= fmax2[0]; i++) {
YA(i, j, k) = F(2 - i, 2 - j, 2 - k) * SoA[0] * SoA[1] * SoA[2];
}
}
}
return 0;
}
#undef F
#undef YA
void xh_polint(const double *xa, const double *ya, double x,
double *y, double *dy, int ordn)
{
int i, m, ns, n_m;
double dif, dift, hp, h, den_val;
double *c = (double*)malloc((size_t)ordn * sizeof(double));
double *d = (double*)malloc((size_t)ordn * sizeof(double));
double *ho = (double*)malloc((size_t)ordn * sizeof(double));
if (!c || !d || !ho) {
fprintf(stderr, "polint: malloc failed\n");
exit(1);
}
for (i = 0; i < ordn; i++) {
c[i] = ya[i];
d[i] = ya[i];
ho[i] = xa[i] - x;
}
ns = 0; // Fortran ns=1 -> C ns=0
dif = fabs(x - xa[0]);
for (i = 1; i < ordn; i++) {
dift = fabs(x - xa[i]);
if (dift < dif) {
ns = i;
dif = dift;
}
}
*y = ya[ns];
ns -= 1; // Fortran ns=ns-1
for (m = 1; m <= ordn - 1; m++) {
n_m = ordn - m; // number of active points this round
for (i = 0; i < n_m; i++) {
hp = ho[i];
h = ho[i + m];
den_val = hp - h;
if (den_val == 0.0) {
fprintf(stderr, "failure in polint for point %g\n", x);
fprintf(stderr, "with input points xa: ");
for (int t = 0; t < ordn; t++) fprintf(stderr, "%g ", xa[t]);
fprintf(stderr, "\n");
exit(1);
}
den_val = (c[i + 1] - d[i]) / den_val;
d[i] = h * den_val;
c[i] = hp * den_val;
}
// Fortran: if (2*ns < n_m) then dy=c(ns+1) else dy=d(ns); ns=ns-1
// Here ns is C-indexed and can be -1; logic still matches.
if (2 * ns < n_m) {
*dy = c[ns + 1];
} else {
*dy = d[ns];
ns -= 1;
}
*y += *dy;
}
free(c);
free(d);
free(ho);
}
void xh_polin3(const double *x1a, const double *x2a, const double *x3a,
const double *ya, double x1, double x2, double x3,
double &y, double *dy, int ordn)
{
// ya is ordn x ordn x ordn in Fortran layout (column-major)
#define YA3(i,j,k) ya[(i) + ordn*((j) + ordn*(k))] // i,j,k: 0..ordn-1
int j, k;
double dy_temp;
// yatmp(j,k) in Fortran code is ordn x ordn, treat column-major:
// yatmp(j,k) -> yatmp[j + ordn*k]
double *yatmp = (double*)malloc((size_t)ordn * (size_t)ordn * sizeof(double));
double *ymtmp = (double*)malloc((size_t)ordn * sizeof(double));
if (!yatmp || !ymtmp) {
fprintf(stderr, "polin3: malloc failed\n");
exit(1);
}
#define YAT(j,k) yatmp[(j) + ordn*(k)]
for (k = 0; k < ordn; k++) {
for (j = 0; j < ordn; j++) {
// call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp)
// ya(:,j,k) contiguous: base is &YA3(0,j,k)
xh_polint(x1a, &YA3(0, j, k), x1, &YAT(j, k), &dy_temp, ordn);
}
}
for (k = 0; k < ordn; k++) {
// call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp)
xh_polint(x2a, &YAT(0, k), x2, &ymtmp[k], &dy_temp, ordn);
}
xh_polint(x3a, ymtmp, x3, &y, dy, ordn);
#undef YAT
free(yatmp);
free(ymtmp);
#undef YA3
}

View File

@@ -0,0 +1,338 @@
#ifndef SHARE_FUNC_H
#define SHARE_FUNC_H
#include <stdlib.h>
#include <stddef.h>
#include <math.h>
#include <stdio.h>
#include <omp.h>
/* 主网格0-based -> 1D */
static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) {
const int ex1 = ex[0], ex2 = ex[1];
return (size_t)i0 + (size_t)j0 * (size_t)ex1 + (size_t)k0 * (size_t)ex1 * (size_t)ex2;
}
/*
* fh 对应 Fortran: fh(-1:ex1, -1:ex2, -1:ex3)
* ord=2 => shift=1
* iF/jF/kF 为 Fortran 索引(可为 -1,0,1..ex
*/
static inline size_t idx_fh_F_ord2(int iF, int jF, int kF, const int ex[3]) {
const int shift = 1;
const int nx = ex[0] + 2; // ex1 + ord
const int ny = ex[1] + 2;
const int ii = iF + shift; // 0..ex1+1
const int jj = jF + shift; // 0..ex2+1
const int kk = kF + shift; // 0..ex3+1
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* fh 对应 Fortran: fh(-2:ex1, -2:ex2, -2:ex3)
* ord=3 => shift=2
* iF/jF/kF 是 Fortran 索引(可为负)
*/
static inline size_t idx_fh_F(int iF, int jF, int kF, const int ex[3]) {
const int shift = 2; // ord=3 -> -2..ex
const int nx = ex[0] + 3; // ex1 + ord
const int ny = ex[1] + 3;
const int ii = iF + shift; // 0..ex1+2
const int jj = jF + shift; // 0..ex2+2
const int kk = kF + shift; // 0..ex3+2
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* func: (1..extc1, 1..extc2, 1..extc3) 1-based in Fortran
* funcc: (-ord+1..extc1, -ord+1..extc2, -ord+1..extc3) in Fortran
*
* C 里我们把:
* func 视为 0-based: i0=0..extc1-1, j0=0..extc2-1, k0=0..extc3-1
* funcc 用“平移下标”存为一维数组:
* iF in [-ord+1..extc1] -> ii = iF + (ord-1) in [0..extc1+ord-1]
* 总长度 nx = extc1 + ord
* 同理 ny = extc2 + ord, nz = extc3 + ord
*/
static inline size_t idx_func0(int i0, int j0, int k0, const int extc[3]) {
const int nx = extc[0], ny = extc[1];
return (size_t)i0 + (size_t)j0 * (size_t)nx + (size_t)k0 * (size_t)nx * (size_t)ny;
}
static inline size_t idx_funcc_F(int iF, int jF, int kF, int ord, const int extc[3]) {
const int shift = ord - 1; // iF = -shift .. extc1
const int nx = extc[0] + ord; // [-shift..extc1] 共 extc1+ord 个
const int ny = extc[1] + ord;
const int ii = iF + shift; // 0..extc1+shift
const int jj = jF + shift; // 0..extc2+shift
const int kk = kF + shift; // 0..extc3+shift
return (size_t)ii + (size_t)jj * (size_t)nx + (size_t)kk * (size_t)nx * (size_t)ny;
}
/*
* 等价于 Fortran:
* funcc(1:extc1,1:extc2,1:extc3)=func
* do i=0,ord-1
* funcc(-i,1:extc2,1:extc3) = funcc(i+1,1:extc2,1:extc3)*SoA(1)
* enddo
* do i=0,ord-1
* funcc(:,-i,1:extc3) = funcc(:,i+1,1:extc3)*SoA(2)
* enddo
* do i=0,ord-1
* funcc(:,:,-i) = funcc(:,:,i+1)*SoA(3)
* enddo
*/
static inline void symmetry_bd(int ord,
const int extc[3],
const double *func,
double *funcc,
const double SoA[3])
{
const int extc1 = extc[0], extc2 = extc[1], extc3 = extc[2];
// 1) funcc(1:extc1,1:extc2,1:extc3) = func
// Fortran 的 (iF=1..extc1) 对应 C 的 func(i0=0..extc1-1)
for (int k0 = 0; k0 < extc3; ++k0) {
for (int j0 = 0; j0 < extc2; ++j0) {
for (int i0 = 0; i0 < extc1; ++i0) {
const int iF = i0 + 1, jF = j0 + 1, kF = k0 + 1;
funcc[idx_funcc_F(iF, jF, kF, ord, extc)] = func[idx_func0(i0, j0, k0, extc)];
}
}
}
// 2) do i=0..ord-1: funcc(-i, 1:extc2, 1:extc3) = funcc(i+1, ...)*SoA(1)
for (int ii = 0; ii <= ord - 1; ++ii) {
const int iF_dst = -ii; // 0, -1, -2, ...
const int iF_src = ii + 1; // 1, 2, 3, ...
for (int kF = 1; kF <= extc3; ++kF) {
for (int jF = 1; jF <= extc2; ++jF) {
funcc[idx_funcc_F(iF_dst, jF, kF, ord, extc)] =
funcc[idx_funcc_F(iF_src, jF, kF, ord, extc)] * SoA[0];
}
}
}
// 3) do i=0..ord-1: funcc(:,-i, 1:extc3) = funcc(:, i+1, 1:extc3)*SoA(2)
// 注意 Fortran 这里的 ":" 表示 iF 从 (-ord+1..extc1) 全覆盖
for (int jj = 0; jj <= ord - 1; ++jj) {
const int jF_dst = -jj;
const int jF_src = jj + 1;
for (int kF = 1; kF <= extc3; ++kF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF_dst, kF, ord, extc)] =
funcc[idx_funcc_F(iF, jF_src, kF, ord, extc)] * SoA[1];
}
}
}
// 4) do i=0..ord-1: funcc(:,:,-i) = funcc(:,:, i+1)*SoA(3)
for (int kk = 0; kk <= ord - 1; ++kk) {
const int kF_dst = -kk;
const int kF_src = kk + 1;
for (int jF = -ord + 1; jF <= extc2; ++jF) {
for (int iF = -ord + 1; iF <= extc1; ++iF) {
funcc[idx_funcc_F(iF, jF, kF_dst, ord, extc)] =
funcc[idx_funcc_F(iF, jF, kF_src, ord, extc)] * SoA[2];
}
}
}
}
#endif
/* 你已有的函数idx_ex / idx_fh_F_ord2 以及 fh 的布局 */
static inline void fdderivs_xh(
int i0, int j0, int k0,
const int ex[3],
const double *fh,
int iminF, int jminF, int kminF,
int imaxF, int jmaxF, int kmaxF,
double Fdxdx, double Fdydy, double Fdzdz,
double Fdxdy, double Fdxdz, double Fdydz,
double Sdxdx, double Sdydy, double Sdzdz,
double Sdxdy, double Sdxdz, double Sdydz,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz
){
const double F8 = 8.0;
const double F16 = 16.0;
const double F30 = 30.0;
const double TWO = 2.0;
const int iF = i0 + 1;
const int jF = j0 + 1;
const int kF = k0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex);
/* 高阶分支i±2,j±2,k±2 都在范围内 */
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
{
fxx[p] = Fdxdx * (
-fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF + 2, jF, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Fdydy * (
-fh[idx_fh_F_ord2(iF, jF - 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF + 2, kF, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Fdzdz * (
-fh[idx_fh_F_ord2(iF, jF, kF - 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
F30 * fh[idx_fh_F_ord2(iF, jF, kF, ex)] -
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)] +
F16 * fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
/* fxy 高阶 */
{
const double t_jm2 =
( fh[idx_fh_F_ord2(iF - 2, jF - 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 2, kF, ex)] );
const double t_jm1 =
( fh[idx_fh_F_ord2(iF - 2, jF - 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF - 1, kF, ex)] );
const double t_jp1 =
( fh[idx_fh_F_ord2(iF - 2, jF + 1, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 1, kF, ex)] );
const double t_jp2 =
( fh[idx_fh_F_ord2(iF - 2, jF + 2, kF, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF + 2, kF, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF + 2, kF, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF + 2, kF, ex)] );
fxy[p] = Fdxdy * ( t_jm2 - F8 * t_jm1 + F8 * t_jp1 - t_jp2 );
}
/* fxz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF - 2, jF, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF - 1, jF, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF + 1, jF, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF + 2, jF, kF + 2, ex)] );
fxz[p] = Fdxdz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
/* fyz 高阶 */
{
const double t_km2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 2, ex)] );
const double t_km1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF - 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF - 1, ex)] );
const double t_kp1 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 1, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 1, ex)] );
const double t_kp2 =
( fh[idx_fh_F_ord2(iF, jF - 2, kF + 2, ex)]
-F8*fh[idx_fh_F_ord2(iF, jF - 1, kF + 2, ex)]
+F8*fh[idx_fh_F_ord2(iF, jF + 1, kF + 2, ex)]
- fh[idx_fh_F_ord2(iF, jF + 2, kF + 2, ex)] );
fyz[p] = Fdydz * ( t_km2 - F8 * t_km1 + F8 * t_kp1 - t_kp2 );
}
}
/* 二阶分支i±1,j±1,k±1 在范围内 */
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
{
fxx[p] = Sdxdx * (
fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
);
fyy[p] = Sdydy * (
fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
);
fzz[p] = Sdzdz * (
fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] -
TWO * fh[idx_fh_F_ord2(iF, jF, kF, ex)] +
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
);
fxy[p] = Sdxdy * (
fh[idx_fh_F_ord2(iF - 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF - 1, kF, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF + 1, kF, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF + 1, kF, ex)]
);
fxz[p] = Sdxdz * (
fh[idx_fh_F_ord2(iF - 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF + 1, jF, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF - 1, jF, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF + 1, jF, kF + 1, ex)]
);
fyz[p] = Sdydz * (
fh[idx_fh_F_ord2(iF, jF - 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF + 1, kF - 1, ex)] -
fh[idx_fh_F_ord2(iF, jF - 1, kF + 1, ex)] +
fh[idx_fh_F_ord2(iF, jF + 1, kF + 1, ex)]
);
}
else {
fxx[p] = 0.0; fyy[p] = 0.0; fzz[p] = 0.0;
fxy[p] = 0.0; fxz[p] = 0.0; fyz[p] = 0.0;
}
}

View File

@@ -0,0 +1,27 @@
#include "xh_share_func.h"
void fdderivs(const int ex[3],
const double *f,
double *fxx, double *fxy, double *fxz,
double *fyy, double *fyz, double *fzz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void fderivs(const int ex[3],
const double *f,
double *fx, double *fy, double *fz,
const double *X, const double *Y, const double *Z,
double SYM1, double SYM2, double SYM3,
int Symmetry, int onoff);
void kodis(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double SoA[3],
int Symmetry, double eps);
void lopsided(const int ex[3],
const double *X, const double *Y, const double *Z,
const double *f, double *f_rhs,
const double *Sfx, const double *Sfy, const double *Sfz,
int Symmetry, const double SoA[3]);

File diff suppressed because it is too large Load Diff

View File

@@ -1,195 +1,195 @@
################################################################## ##################################################################
## ##
## Generate input file for the AMSS-NCKU TwoPuncture routine ## Generate input file for the AMSS-NCKU TwoPuncture routine
## Author: Xiaoqu ## Author: Xiaoqu
## 2024/11/27 ## 2024/11/27
## Modified: 2025/01/21 ## Modified: 2025/01/21
## ##
################################################################## ##################################################################
import numpy import numpy
import os import os
import AMSS_NCKU_Input as input_data ## import program input file import AMSS_NCKU_Input as input_data ## import program input file
import math import math
################################################################## ##################################################################
## Import binary black hole coordinates ## Import binary black hole coordinates
## If puncture data are set to "Automatically-BBH", compute initial orbital ## If puncture data are set to "Automatically-BBH", compute initial orbital
## positions and momenta according to the settings and rescale the total ## positions and momenta according to the settings and rescale the total
## binary mass to M = 1 for TwoPuncture input. ## binary mass to M = 1 for TwoPuncture input.
if (input_data.puncture_data_set == "Automatically-BBH" ): if (input_data.puncture_data_set == "Automatically-BBH" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0] mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ): if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" ) print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" ) print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q ) BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q ) BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
## Load binary separation and eccentricity ## Load binary separation and eccentricity
distance = input_data.Distance distance = input_data.Distance
e0 = input_data.e0 e0 = input_data.e0
## Set binary component coordinates ## Set binary component coordinates
## Note: place the larger-mass black hole at positive y and the ## Note: place the larger-mass black hole at positive y and the
## smaller-mass black hole at negative y to follow Brugmann's convention ## smaller-mass black hole at negative y to follow Brugmann's convention
## Coordinate convention for TwoPuncture input (Brugmann): ## Coordinate convention for TwoPuncture input (Brugmann):
## -----0-----> y ## -----0-----> y
## - + ## - +
BBH_X1 = 0.0 BBH_X1 = 0.0
BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q ) BBH_Y1 = distance * 1.0 / ( 1 + mass_ratio_Q )
BBH_Z1 = 0.0 BBH_Z1 = 0.0
BBH_X2 = 0.0 BBH_X2 = 0.0
BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q ) BBH_Y2 = - distance * mass_ratio_Q / ( 1 + mass_ratio_Q )
BBH_Z2 = 0.0 BBH_Z2 = 0.0
position_BH = numpy.zeros( (2,3) ) position_BH = numpy.zeros( (2,3) )
position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1] position_BH[0] = [BBH_X1, BBH_Y1, BBH_Z1]
position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2] position_BH[1] = [BBH_X2, BBH_Y2, BBH_Z2]
## Optionally load momentum from parameter file ## Optionally load momentum from parameter file
## momentum_BH = input_data.momentum_BH ## momentum_BH = input_data.momentum_BH
## Compute orbital momenta using the BBH_orbit_parameter module ## Compute orbital momenta using the BBH_orbit_parameter module
import BBH_orbit_parameter import BBH_orbit_parameter
## Use the dimensionless spins defined in BBH_orbit_parameter ## Use the dimensionless spins defined in BBH_orbit_parameter
BBH_S1 = BBH_orbit_parameter.S1 BBH_S1 = BBH_orbit_parameter.S1
BBH_S2 = BBH_orbit_parameter.S2 BBH_S2 = BBH_orbit_parameter.S2
momentum_BH = numpy.zeros( (2,3) ) momentum_BH = numpy.zeros( (2,3) )
## Compute initial orbital momenta from post-Newtonian-based routine ## Compute initial orbital momenta from post-Newtonian-based routine
momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 ) momentum_BH[0], momentum_BH[1] = BBH_orbit_parameter.generate_BBH_orbit_parameters( BBH_M1, BBH_M2, BBH_S1, BBH_S2, distance, e0 )
## Set spin angular momentum input for TwoPuncture ## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply ## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1. ## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH ## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ): if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0: if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * input_data.parameter_BH[i,2] ]
elif i==1: elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * input_data.parameter_BH[i,2] ]
else: else:
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ): elif ( input_data.Symmetry == "no-symmetry" ):
if i==0: if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1: elif i==1:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
else: else:
angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (input_data.parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
####################################################### #######################################################
## If puncture data are set to "Manually", read initial positions and momenta ## If puncture data are set to "Manually", read initial positions and momenta
## directly from the parameter file. Rescale the total binary mass to M=1 ## directly from the parameter file. Rescale the total binary mass to M=1
## for TwoPuncture input. ## for TwoPuncture input.
elif (input_data.puncture_data_set == "Manually" ): elif (input_data.puncture_data_set == "Manually" ):
mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0] mass_ratio_Q = input_data.parameter_BH[0,0] / input_data.parameter_BH[1,0]
if ( mass_ratio_Q < 1.0 ): if ( mass_ratio_Q < 1.0 ):
print( " mass_ratio setting is wrong, please reset!!!" ) print( " mass_ratio setting is wrong, please reset!!!" )
print( " set the first black hole to be the larger mass!!!" ) print( " set the first black hole to be the larger mass!!!" )
BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q ) BBH_M1 = mass_ratio_Q / ( 1.0 + mass_ratio_Q )
BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q ) BBH_M2 = 1.0 / ( 1.0 + mass_ratio_Q )
parameter_BH = input_data.parameter_BH parameter_BH = input_data.parameter_BH
position_BH = input_data.position_BH position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH momentum_BH = input_data.momentum_BH
## Compute binary separation and load eccentricity ## Compute binary separation and load eccentricity
distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 ) distance = math.sqrt( (position_BH[0,0]-position_BH[1,0])**2 + (position_BH[0,1]-position_BH[1,1])**2 + (position_BH[0,2]-position_BH[1,2])**2 )
e0 = input_data.e0 e0 = input_data.e0
## Set spin angular momentum input for TwoPuncture ## Set spin angular momentum input for TwoPuncture
## Note: these are dimensional angular momenta (not dimensionless); multiply ## Note: these are dimensional angular momenta (not dimensionless); multiply
## by the square of the mass scale. Here masses are scaled so total M=1. ## by the square of the mass scale. Here masses are scaled so total M=1.
## angular_momentum_BH = input_data.angular_momentum_BH ## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) )
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "equatorial-symmetry" ): if ( input_data.Symmetry == "equatorial-symmetry" ):
if i==0: if i==0:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M1**2) * parameter_BH[i,2] ]
elif i==1: elif i==1:
angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (BBH_M2**2) * parameter_BH[i,2] ]
else: else:
angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (parameter_BH[i,0]**2) * parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ): elif ( input_data.Symmetry == "no-symmetry" ):
if i==0: if i==0:
angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (BBH_M1**2) * input_data.dimensionless_spin_BH[i]
elif i==1: elif i==1:
angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (BBH_M2**2) * input_data.dimensionless_spin_BH[i]
else: else:
angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (parameter_BH[i,0]**2) * input_data.dimensionless_spin_BH[i]
################################################################## ##################################################################
## Write the above binary data into the AMSS-NCKU TwoPuncture input file ## Write the above binary data into the AMSS-NCKU TwoPuncture input file
def generate_AMSSNCKU_TwoPuncture_input(): def generate_AMSSNCKU_TwoPuncture_input():
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w") file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU-TwoPuncture.input"), "w")
print( "# -----0-----> y", file=file1 ) print( "# -----0-----> y", file=file1 )
print( "# - + use Brugmann's convention", file=file1 ) print( "# - + use Brugmann's convention", file=file1 )
print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically print( "ABE::mp = -1.0", file=file1 ) ## use negative values so the code solves for bare masses automatically
print( "ABE::mm = -1.0", file=file1 ) print( "ABE::mm = -1.0", file=file1 )
print( "# b = D/2", file=file1 ) print( "# b = D/2", file=file1 )
print( "ABE::b = ", ( distance / 2.0 ), file=file1 ) print( "ABE::b = ", ( distance / 2.0 ), file=file1 )
print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 ) print( "ABE::P_plusx = ", momentum_BH[0,0], file=file1 )
print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 ) print( "ABE::P_plusy = ", momentum_BH[0,1], file=file1 )
print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 ) print( "ABE::P_plusz = ", momentum_BH[0,2], file=file1 )
print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 ) print( "ABE::P_minusx = ", momentum_BH[1,0], file=file1 )
print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 ) print( "ABE::P_minusy = ", momentum_BH[1,1], file=file1 )
print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 ) print( "ABE::P_minusz = ", momentum_BH[1,2], file=file1 )
print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 ) print( "ABE::S_plusx = ", angular_momentum_BH[0,0], file=file1 )
print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 ) print( "ABE::S_plusy = ", angular_momentum_BH[0,1], file=file1 )
print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 ) print( "ABE::S_plusz = ", angular_momentum_BH[0,2], file=file1 )
print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 ) print( "ABE::S_minusx = ", angular_momentum_BH[1,0], file=file1 )
print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 ) print( "ABE::S_minusy = ", angular_momentum_BH[1,1], file=file1 )
print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 ) print( "ABE::S_minusz = ", angular_momentum_BH[1,2], file=file1 )
print( "ABE::Mp = ", BBH_M1, file=file1 ) print( "ABE::Mp = ", BBH_M1, file=file1 )
print( "ABE::Mm = ", BBH_M2, file=file1 ) print( "ABE::Mm = ", BBH_M2, file=file1 )
print( "ABE::admtol = 1.e-8", file=file1 ) print( "ABE::admtol = 1.e-8", file=file1 )
print( "ABE::Newtontol = 5.e-12", file=file1 ) print( "ABE::Newtontol = 5.e-12", file=file1 )
print( "ABE::nA = 50", file=file1 ) print( "ABE::nA = 50", file=file1 )
print( "ABE::nB = 50", file=file1 ) print( "ABE::nB = 50", file=file1 )
print( "ABE::nphi = 26", file=file1 ) print( "ABE::nphi = 26", file=file1 )
print( "ABE::Newtonmaxit = 50", file=file1 ) print( "ABE::Newtonmaxit = 50", file=file1 )
file1.close() file1.close()
return file1 return file1
################################################################## ##################################################################

File diff suppressed because it is too large Load Diff

View File

@@ -1,203 +1,192 @@
################################################################## ##################################################################
## ##
## This file defines the commands used to build and run AMSS-NCKU ## This file defines the commands used to build and run AMSS-NCKU
## Author: Xiaoqu ## Author: Xiaoqu
## 2025/01/24 ## 2025/01/24
## ##
################################################################## ##################################################################
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import subprocess import subprocess
import time import time
import os ## CPU core binding configuration using taskset
## taskset ensures all child processes inherit the CPU affinity mask
## OpenMP configuration for threaded Fortran kernels ## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## OMP_NUM_THREADS: set to number of physical cores (not hyperthreads) ## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
## OMP_PROC_BIND: bind threads to cores to avoid migration overhead #NUMACTL_CPU_BIND = "taskset -c 0-111"
## OMP_STACKSIZE: each thread needs stack space for fh arrays (~3.6MB) NUMACTL_CPU_BIND = "taskset -c 0-47"
if "OMP_NUM_THREADS" not in os.environ: NUMACTL_CPU_BIND2 = "OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47} taskset -c 0-47"
os.environ["OMP_NUM_THREADS"] = "96" #NUMACTL_CPU_BIND2 = "taskset -c 0-1"
os.environ["OMP_STACKSIZE"] = "16M" ## Build parallelism configuration
os.environ["OMP_PROC_BIND"] = "close" ## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
os.environ["OMP_PLACES"] = "cores" ## Set make -j to utilize available cores for faster builds
## CPU core binding configuration using taskset BUILD_JOBS = 32
## taskset ensures all child processes inherit the CPU affinity mask
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores ##################################################################
#NUMACTL_CPU_BIND = "taskset -c 0-111"
#NUMACTL_CPU_BIND = "taskset -c 16-47,64-95"
#NUMACTL_CPU_BIND = "taskset -c 8-15" ##################################################################
NUMACTL_CPU_BIND = ""
## Compile the AMSS-NCKU main program ABE
## Build parallelism configuration
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores def makefile_ABE():
## Set make -j to utilize available cores for faster builds
BUILD_JOBS = 16 print( )
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
##################################################################
## Build command with CPU binding to nohz_full cores
if (input_data.GPU_Calculation == "no"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
################################################################## elif (input_data.GPU_Calculation == "yes"):
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
## Compile the AMSS-NCKU main program ABE else:
print( " CPU/GPU numerical calculation setting is wrong " )
def makefile_ABE(): print( )
print( ) ## Execute the command with subprocess.Popen and stream output
print( " Compiling the AMSS-NCKU executable file ABE/ABEGPU " ) makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
print( )
## Read and print output lines as they arrive
## Build command with CPU binding to nohz_full cores for line in makefile_process.stdout:
if (input_data.GPU_Calculation == "no"): print(line, end='') # stream output in real time
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
elif (input_data.GPU_Calculation == "yes"): ## Wait for the process to finish
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU" makefile_return_code = makefile_process.wait()
else: if makefile_return_code != 0:
print( " CPU/GPU numerical calculation setting is wrong " ) raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
print( )
print( )
## Execute the command with subprocess.Popen and stream output print( " Compilation of the AMSS-NCKU executable file ABE is finished " )
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) print( )
## Read and print output lines as they arrive return
for line in makefile_process.stdout:
print(line, end='') # stream output in real time ##################################################################
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0: ##################################################################
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
print( )
print( " Compilation of the AMSS-NCKU executable file ABE is finished " ) def makefile_TwoPunctureABE():
print( )
print( )
return print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " )
print( )
##################################################################
## Build command with CPU binding to nohz_full cores
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
################################################################## ## Execute the command with subprocess.Popen and stream output
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Compile the AMSS-NCKU TwoPuncture program TwoPunctureABE
## Read and print output lines as they arrive
def makefile_TwoPunctureABE(): for line in makefile_process.stdout:
print(line, end='') # stream output in real time
print( )
print( " Compiling the AMSS-NCKU executable file TwoPunctureABE " ) ## Wait for the process to finish
print( ) makefile_return_code = makefile_process.wait()
if makefile_return_code != 0:
## Build command with CPU binding to nohz_full cores raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} TwoPunctureABE"
print( )
## Execute the command with subprocess.Popen and stream output print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " )
makefile_process = subprocess.Popen(makefile_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) print( )
## Read and print output lines as they arrive return
for line in makefile_process.stdout:
print(line, end='') # stream output in real time ##################################################################
## Wait for the process to finish
makefile_return_code = makefile_process.wait()
if makefile_return_code != 0: ##################################################################
raise subprocess.CalledProcessError(makefile_return_code, makefile_command)
## Run the AMSS-NCKU main program ABE
print( )
print( " Compilation of the AMSS-NCKU executable file TwoPunctureABE is finished " ) def run_ABE():
print( )
print( )
return print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( )
##################################################################
## Define the command to run; cast other values to strings as needed
if (input_data.GPU_Calculation == "no"):
################################################################## #mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
## Run the AMSS-NCKU main program ABE mpi_command = """ OMP_NUM_THREADS=48 OMP_PROC_BIND=close OMP_PLACES=cores mpirun -np 1 --cpu-bind=sockets ./ABE """
mpi_command_outfile = "ABE_out.log"
def run_ABE(): elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND2 + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
print( ) mpi_command_outfile = "ABEGPU_out.log"
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) ## Execute the MPI command and stream output
mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
## Define the command to run; cast other values to strings as needed
## Write ABE run output to file while printing to stdout
if (input_data.GPU_Calculation == "no"): with open(mpi_command_outfile, 'w') as file0:
run_command = NUMACTL_CPU_BIND + " ./ABE" ## Read and print output lines; also write each line to file
run_command_outfile = "ABE_out.log" for line in mpi_process.stdout:
elif (input_data.GPU_Calculation == "yes"): print(line, end='') # stream output in real time
run_command = NUMACTL_CPU_BIND + " ./ABEGPU" file0.write(line) # write the line to file
run_command_outfile = "ABEGPU_out.log" file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Execute the command and stream output
run_process = subprocess.Popen(run_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) ## Wait for the process to finish
mpi_return_code = mpi_process.wait()
## Write ABE run output to file while printing to stdout
with open(run_command_outfile, 'w') as file0: print( )
## Read and print output lines; also write each line to file print( " The ABE/ABEGPU simulation is finished " )
for line in run_process.stdout: print( )
print(line, end='') # stream output in real time
file0.write(line) # write the line to file return
file0.flush() # flush to ensure each line is written immediately (optional)
file0.close() ##################################################################
## Wait for the process to finish
run_return_code = run_process.wait()
##################################################################
print( )
print( " The ABE/ABEGPU simulation is finished " ) ## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE
print( )
def run_TwoPunctureABE():
return tp_time1=time.time()
print( )
################################################################## print( " Running the AMSS-NCKU executable file TwoPunctureABE " )
print( )
## Define the command to run
################################################################## #TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE"
TwoPuncture_command = " ./TwoPunctureABE"
## Run the AMSS-NCKU TwoPuncture program TwoPunctureABE TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
def run_TwoPunctureABE(): ## Execute the command with subprocess.Popen and stream output
tp_time1=time.time() TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
print( )
print( " Running the AMSS-NCKU executable file TwoPunctureABE " ) ## Write TwoPunctureABE run output to file while printing to stdout
print( ) with open(TwoPuncture_command_outfile, 'w') as file0:
## Read and print output lines; also write each line to file
## Define the command to run for line in TwoPuncture_process.stdout:
#TwoPuncture_command = NUMACTL_CPU_BIND + " ./TwoPunctureABE" print(line, end='') # stream output in real time
TwoPuncture_command = " ./TwoPunctureABE" file0.write(line) # write the line to file
TwoPuncture_command_outfile = "TwoPunctureABE_out.log" file0.flush() # flush to ensure each line is written immediately (optional)
file0.close()
## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) ## Wait for the process to finish
TwoPuncture_command_return_code = TwoPuncture_process.wait()
## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0: print( )
## Read and print output lines; also write each line to file print( " The TwoPunctureABE simulation is finished " )
for line in TwoPuncture_process.stdout: print( )
print(line, end='') # stream output in real time tp_time2=time.time()
file0.write(line) # write the line to file et=tp_time2-tp_time1
file0.flush() # flush to ensure each line is written immediately (optional) print(f"Used time: {et}")
file0.close() return
## Wait for the process to finish ##################################################################
TwoPuncture_command_return_code = TwoPuncture_process.wait()
print( )
print( " The TwoPunctureABE simulation is finished " )
print( )
tp_time2=time.time()
et=tp_time2-tp_time1
print(f"Used time: {et}")
return
##################################################################

File diff suppressed because it is too large Load Diff

View File

@@ -1,133 +1,133 @@
################################################################## ##################################################################
## ##
## Update puncture parameters from TwoPuncture output ## Update puncture parameters from TwoPuncture output
## Author: Xiaoqu ## Author: Xiaoqu
## 2024/12/04 ## 2024/12/04
## ##
################################################################## ##################################################################
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import numpy import numpy
import os import os
################################################################## ##################################################################
################################################################## ##################################################################
def read_TwoPuncture_Output(Output_File_directory): def read_TwoPuncture_Output(Output_File_directory):
dimensionless_mass_BH = numpy.zeros( input_data.puncture_number ) dimensionless_mass_BH = numpy.zeros( input_data.puncture_number )
bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole bare_mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize bare mass for each black hole
position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole position_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize initial position for each black hole
momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize momentum for each black hole
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum for each black hole
# Read TwoPuncture output file # Read TwoPuncture output file
data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") ) data = numpy.loadtxt( os.path.join(Output_File_directory, "puncture_parameters_new.txt") )
# Ensure data is parsed as a 1-D array # Ensure data is parsed as a 1-D array
data = data.reshape(-1) data = data.reshape(-1)
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
## Read parameters for the first two punctures from TwoPuncture output ## Read parameters for the first two punctures from TwoPuncture output
## For additional punctures, read parameters from the input file ## For additional punctures, read parameters from the input file
if i<2: if i<2:
bare_mass_BH[i] = data[12*i] bare_mass_BH[i] = data[12*i]
dimensionless_mass_BH[i] = data[12*i+1] dimensionless_mass_BH[i] = data[12*i+1]
position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ] position_BH[i] = [ data[12*i+3], data[12*i+4], data[12*i+5] ]
momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ] momentum_BH[i] = [ data[12*i+6], data[12*i+7], data[12*i+8] ]
angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ] angular_momentum_BH[i] = [ data[12*i+9], data[12*i+10], data[12*i+11] ]
else: else:
dimensionless_mass_BH[i] = input_data.parameter_BH[i,0] dimensionless_mass_BH[i] = input_data.parameter_BH[i,0]
bare_mass_BH[i] = input_data.parameter_BH[i,0] bare_mass_BH[i] = input_data.parameter_BH[i,0]
position_BH[i] = input_data.position_BH[i] position_BH[i] = input_data.position_BH[i]
momentum_BH[i] = input_data.momentum_BH[i] momentum_BH[i] = input_data.momentum_BH[i]
## Read angular momentum according to symmetry ## Read angular momentum according to symmetry
if ( input_data.Symmetry == "equatorial-symmetry" ): if ( input_data.Symmetry == "equatorial-symmetry" ):
angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (input_data.parameter_BH[i,0]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ): elif ( input_data.Symmetry == "no-symmetry" ):
angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (dimensionless_mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH return bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH
################################################################## ##################################################################
################################################################## ##################################################################
## Append the computed puncture information into the AMSS-NCKU input file ## Append the computed puncture information into the AMSS-NCKU input file
def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory): def append_AMSSNCKU_BSSN_input(File_directory, TwoPuncture_File_directory):
charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole charge_Q_BH = numpy.zeros( input_data.puncture_number ) ## initialize charge for each black hole
## If using Ansorg-TwoPuncture to solve the initial-data problem, read ## If using Ansorg-TwoPuncture to solve the initial-data problem, read
## bare masses, positions and angular momenta from TwoPuncture output ## bare masses, positions and angular momenta from TwoPuncture output
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ): if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory) bare_mass_BH, dimensionless_mass_BH, position_BH, momentum_BH, angular_momentum_BH = read_TwoPuncture_Output(TwoPuncture_File_directory)
# set charge for each black hole # set charge for each black hole
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1] charge_Q_BH[i] = dimensionless_mass_BH[i] * input_data.parameter_BH[i,1]
## If using another method for initial data, read parameters directly from input ## If using another method for initial data, read parameters directly from input
else: else:
position_BH = input_data.position_BH position_BH = input_data.position_BH
momentum_BH = input_data.momentum_BH momentum_BH = input_data.momentum_BH
## angular_momentum_BH = input_data.angular_momentum_BH ## angular_momentum_BH = input_data.angular_momentum_BH
angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array angular_momentum_BH = numpy.zeros( (input_data.puncture_number, 3) ) ## initialize spin angular momentum array
mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array mass_BH = numpy.zeros( input_data.puncture_number ) ## initialize mass array
## Set charge and spin angular momentum for each puncture ## Set charge and spin angular momentum for each puncture
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
if ( input_data.Symmetry == "octant-symmetry" ): if ( input_data.Symmetry == "octant-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0] mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1] charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "equatorial-symmetry" ): elif ( input_data.Symmetry == "equatorial-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0] mass_BH[i] = input_data.parameter_BH[i,0]
charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1] charge_Q_BH[i] = mass_BH[i]* input_data.parameter_BH[i,1]
angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ] angular_momentum_BH[i] = [ 0.0, 0.0, (mass_BH[i]**2) * input_data.parameter_BH[i,2] ]
elif ( input_data.Symmetry == "no-symmetry" ): elif ( input_data.Symmetry == "no-symmetry" ):
mass_BH[i] = input_data.parameter_BH[i,0] mass_BH[i] = input_data.parameter_BH[i,0]
angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i] angular_momentum_BH[i] = (mass_BH[i]**2) * input_data.dimensionless_spin_BH[i]
charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1] charge_Q_BH[i] = mass_BH[i] * input_data.parameter_BH[i,1]
file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode file1 = open( os.path.join(input_data.File_directory, "AMSS-NCKU.input"), "a") ## open file in append mode
## Output BSSN related settings ## Output BSSN related settings
print( file=file1 ) print( file=file1 )
print( "BSSN::chitiny = 1e-5", file=file1 ) print( "BSSN::chitiny = 1e-5", file=file1 )
print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 ) print( "BSSN::time refinement start from level = ", input_data.refinement_level, file=file1 )
print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 ) print( "BSSN::BH_num = ", input_data.puncture_number, file=file1 )
for i in range(input_data.puncture_number): for i in range(input_data.puncture_number):
if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ): if (input_data.Initial_Data_Method == "Ansorg-TwoPuncture" ):
print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 ) print( f"BSSN::Mass[{i}] = { bare_mass_BH[i] } ", file=file1 )
else: else:
print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 ) print( f"BSSN::Mass[{i}] = { mass_BH[i] } ", file=file1 )
print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 ) print( f"BSSN::Qchar[{i}] = { charge_Q_BH[i] } ", file=file1 )
print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 ) print( f"BSSN::Porgx[{i}] = { position_BH[i,0] } ", file=file1 )
print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 ) print( f"BSSN::Porgy[{i}] = { position_BH[i,1] } ", file=file1 )
print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 ) print( f"BSSN::Porgz[{i}] = { position_BH[i,2] } ", file=file1 )
print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 ) print( f"BSSN::Pmomx[{i}] = { momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 ) print( f"BSSN::Pmomy[{i}] = { momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 ) print( f"BSSN::Pmomz[{i}] = { momentum_BH[i,2] } ", file=file1 )
print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 ) print( f"BSSN::Spinx[{i}] = { angular_momentum_BH[i,0] } ", file=file1 )
print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 ) print( f"BSSN::Spiny[{i}] = { angular_momentum_BH[i,1] } ", file=file1 )
print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 ) print( f"BSSN::Spinz[{i}] = { angular_momentum_BH[i,2] } ", file=file1 )
print( file=file1 ) print( file=file1 )
file1.close() file1.close()
return return
################################################# #################################################