Compare commits

..

4 Commits

Author SHA1 Message Date
12e1f63d50 prolong3: 减少Z-pass 冗余计算 2026-03-02 21:20:49 +08:00
47f91ff46f prolong3:提升cache命中率 2026-03-02 10:31:46 +08:00
jaunatisblue
672b7ebee2 修改prolong 2026-03-02 02:01:07 +08:00
jaunatisblue
63bf180159 对prolong3做访存优化 2026-03-02 01:16:10 +08:00
19 changed files with 987 additions and 1556 deletions

View File

@@ -13,17 +13,14 @@ import numpy
## Setting MPI processes and the output file directory ## Setting MPI processes and the output file directory
File_directory = "GW150914" ## output file directory File_directory = "GW150914" ## output file directory
Output_directory = "binary_output" ## binary data file directory Output_directory = "binary_output" ## binary data file directory
## The file directory name should not be too long ## The file directory name should not be too long
MPI_processes = 64 ## number of mpi processes used in the simulation MPI_processes = 64 ## number of mpi processes used in the simulation
OMP_Threads = 3 ## number of OpenMP threads used by each MPI process
MPI_hosts = ["localhost", "192.168.20.102"] ## MPI hosts for multi-node runs GPU_Calculation = "no" ## Use GPU or not
MPI_processes_per_node = 32 ## MPI ranks launched on each node in MPI_hosts ## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
GPU_Calculation = "no" ## Use GPU or not
## (prefer "no" in the current version, because the GPU part may have bugs when integrated in this Python interface)
CPU_Part = 1.0
GPU_Part = 0.0 GPU_Part = 0.0
################################################# #################################################
@@ -53,7 +50,7 @@ Check_Time = 100.0
Dump_Time = 100.0 ## time inteval dT for dumping binary data Dump_Time = 100.0 ## time inteval dT for dumping binary data
D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT' D2_Dump_Time = 100.0 ## dump the ascii data for 2d surface after dT'
Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT" Analysis_Time = 0.1 ## dump the puncture position and GW psi4 after dT"
Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number Evolution_Step_Number = 10000000 ## stop the calculation after the maximal step number
Courant_Factor = 0.5 ## Courant Factor Courant_Factor = 0.5 ## Courant Factor
Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength Dissipation = 0.15 ## Kreiss-Oliger Dissipation Strength

View File

@@ -7,7 +7,6 @@
#include <string> #include <string>
#include <cmath> #include <cmath>
#include <new> #include <new>
#include <vector>
using namespace std; using namespace std;
#include "misc.h" #include "misc.h"
@@ -18,168 +17,6 @@ using namespace std;
#include "interp_lb_profile.h" #include "interp_lb_profile.h"
#endif #endif
namespace
{
struct InterpBlockView
{
Block *bp;
double llb[dim];
double uub[dim];
};
struct BlockBinIndex
{
int bins[dim];
double lo[dim];
double inv[dim];
vector<InterpBlockView> views;
vector<vector<int>> bin_to_blocks;
bool valid;
BlockBinIndex() : valid(false)
{
for (int i = 0; i < dim; i++)
{
bins[i] = 1;
lo[i] = 0.0;
inv[i] = 0.0;
}
}
};
inline int clamp_int(int v, int lo, int hi)
{
return (v < lo) ? lo : ((v > hi) ? hi : v);
}
inline int coord_to_bin(double x, double lo, double inv, int nb)
{
if (nb <= 1 || inv <= 0.0)
return 0;
int b = int(floor((x - lo) * inv));
return clamp_int(b, 0, nb - 1);
}
inline int bin_loc(const BlockBinIndex &index, int b0, int b1, int b2)
{
return b0 + index.bins[0] * (b1 + index.bins[1] * b2);
}
inline bool point_in_block_view(const InterpBlockView &view, const double *pox, const double *DH)
{
for (int i = 0; i < dim; i++)
{
if (pox[i] - view.llb[i] < -DH[i] / 2 || pox[i] - view.uub[i] > DH[i] / 2)
return false;
}
return true;
}
void build_block_bin_index(Patch *patch, const double *DH, BlockBinIndex &index)
{
index = BlockBinIndex();
MyList<Block> *Bp = patch->blb;
while (Bp)
{
Block *BP = Bp->data;
InterpBlockView view;
view.bp = BP;
for (int i = 0; i < dim; i++)
{
#ifdef Vertex
#ifdef Cell
#error Both Cell and Vertex are defined
#endif
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
view.llb[i] = (feq(BP->bbox[i], patch->bbox[i], DH[i] / 2)) ? BP->bbox[i] + patch->lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
view.uub[i] = (feq(BP->bbox[dim + i], patch->bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - patch->uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
}
index.views.push_back(view);
if (Bp == patch->ble)
break;
Bp = Bp->next;
}
const int nblocks = int(index.views.size());
if (nblocks <= 0)
return;
int bins_1d = int(ceil(pow(double(nblocks), 1.0 / 3.0)));
bins_1d = clamp_int(bins_1d, 1, 32);
for (int i = 0; i < dim; i++)
{
index.bins[i] = bins_1d;
index.lo[i] = patch->bbox[i] + patch->lli[i] * DH[i];
const double hi = patch->bbox[dim + i] - patch->uui[i] * DH[i];
if (hi > index.lo[i] && bins_1d > 1)
index.inv[i] = bins_1d / (hi - index.lo[i]);
else
index.inv[i] = 0.0;
}
index.bin_to_blocks.resize(index.bins[0] * index.bins[1] * index.bins[2]);
for (int bi = 0; bi < nblocks; bi++)
{
const InterpBlockView &view = index.views[bi];
int bmin[dim], bmax[dim];
for (int d = 0; d < dim; d++)
{
const double low = view.llb[d] - DH[d] / 2;
const double up = view.uub[d] + DH[d] / 2;
bmin[d] = coord_to_bin(low, index.lo[d], index.inv[d], index.bins[d]);
bmax[d] = coord_to_bin(up, index.lo[d], index.inv[d], index.bins[d]);
if (bmax[d] < bmin[d])
{
int t = bmin[d];
bmin[d] = bmax[d];
bmax[d] = t;
}
}
for (int bz = bmin[2]; bz <= bmax[2]; bz++)
for (int by = bmin[1]; by <= bmax[1]; by++)
for (int bx = bmin[0]; bx <= bmax[0]; bx++)
index.bin_to_blocks[bin_loc(index, bx, by, bz)].push_back(bi);
}
index.valid = true;
}
int find_block_index_for_point(const BlockBinIndex &index, const double *pox, const double *DH)
{
if (!index.valid)
return -1;
const int bx = coord_to_bin(pox[0], index.lo[0], index.inv[0], index.bins[0]);
const int by = coord_to_bin(pox[1], index.lo[1], index.inv[1], index.bins[1]);
const int bz = coord_to_bin(pox[2], index.lo[2], index.inv[2], index.bins[2]);
const vector<int> &cand = index.bin_to_blocks[bin_loc(index, bx, by, bz)];
for (size_t ci = 0; ci < cand.size(); ci++)
{
const int bi = cand[ci];
if (point_in_block_view(index.views[bi], pox, DH))
return bi;
}
// Fallback to full scan for numerical edge cases around bin boundaries.
for (size_t bi = 0; bi < index.views.size(); bi++)
if (point_in_block_view(index.views[bi], pox, DH))
return int(bi);
return -1;
}
} // namespace
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi) Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
{ {
@@ -530,11 +367,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -557,24 +392,57 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
@@ -667,11 +535,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
owner_rank[j] = -1; owner_rank[j] = -1;
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
// --- Interpolation phase (identical to original) --- // --- Interpolation phase (identical to original) ---
for (int j = 0; j < NN; j++) for (int j = 0; j < NN; j++)
@@ -695,23 +561,56 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp)
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
varl = VarList; #ifdef Vertex
int k = 0; #ifdef Cell
while (varl) #error Both Cell and Vertex are defined
#endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
varl = VarList;
int k = 0;
while (varl)
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }
@@ -934,11 +833,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Comm_group(MPI_COMM_WORLD, &world_group);
MPI_Comm_group(Comm_here, &local_group); MPI_Comm_group(Comm_here, &local_group);
double DH[dim]; double DH[dim], llb[dim], uub[dim];
for (int i = 0; i < dim; i++) for (int i = 0; i < dim; i++)
DH[i] = getdX(i); DH[i] = getdX(i);
BlockBinIndex block_index;
build_block_bin_index(this, DH, block_index);
for (int j = 0; j < NN; j++) // run along points for (int j = 0; j < NN; j++) // run along points
{ {
@@ -961,24 +858,57 @@ void Patch::Interp_Points(MyList<var> *VarList,
} }
} }
const int block_i = find_block_index_for_point(block_index, pox, DH); MyList<Block> *Bp = blb;
if (block_i >= 0) bool notfind = true;
while (notfind && Bp) // run along Blocks
{ {
Block *BP = block_index.views[block_i].bp; Block *BP = Bp->data;
owner_rank[j] = BP->rank;
if (myrank == BP->rank) bool flag = true;
for (int i = 0; i < dim; i++)
{ {
//---> interpolation #ifdef Vertex
varl = VarList; #ifdef Cell
int k = 0; #error Both Cell and Vertex are defined
while (varl) // run along variables #endif
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + (ghost_width - 0.5) * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - (ghost_width - 0.5) * DH[i];
#else
#ifdef Cell
llb[i] = (feq(BP->bbox[i], bbox[i], DH[i] / 2)) ? BP->bbox[i] + lli[i] * DH[i] : BP->bbox[i] + ghost_width * DH[i];
uub[i] = (feq(BP->bbox[dim + i], bbox[dim + i], DH[i] / 2)) ? BP->bbox[dim + i] - uui[i] * DH[i] : BP->bbox[dim + i] - ghost_width * DH[i];
#else
#error Not define Vertex nor Cell
#endif
#endif
if (XX[i][j] - llb[i] < -DH[i] / 2 || XX[i][j] - uub[i] > DH[i] / 2)
{ {
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k], flag = false;
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry); break;
varl = varl->next;
k++;
} }
} }
if (flag)
{
notfind = false;
owner_rank[j] = BP->rank;
if (myrank == BP->rank)
{
//---> interpolation
varl = VarList;
int k = 0;
while (varl) // run along variables
{
f_global_interp(BP->shape, BP->X[0], BP->X[1], BP->X[2], BP->fgfs[varl->data->sgfn], Shellf[j * num_var + k],
pox[0], pox[1], pox[2], ordn, varl->data->SoA, Symmetry);
varl = varl->next;
k++;
}
}
}
if (Bp == ble)
break;
Bp = Bp->next;
} }
} }

View File

@@ -3893,105 +3893,66 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
int node; int node;
MPI_Request *reqs = new MPI_Request[2 * cpusize]; MPI_Request *reqs;
MPI_Status *stats = new MPI_Status[2 * cpusize]; MPI_Status *stats;
int *req_node = new int[2 * cpusize]; reqs = new MPI_Request[2 * cpusize];
int *req_is_recv = new int[2 * cpusize]; stats = new MPI_Status[2 * cpusize];
int *completed = new int[2 * cpusize];
int req_no = 0; int req_no = 0;
int pending_recv = 0;
double **send_data = new double *[cpusize]; double **send_data, **rec_data;
double **rec_data = new double *[cpusize]; send_data = new double *[cpusize];
int *send_lengths = new int[cpusize]; rec_data = new double *[cpusize];
int *recv_lengths = new int[cpusize]; int length;
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
send_data[node] = rec_data[node] = 0; send_data[node] = rec_data[node] = 0;
send_lengths[node] = recv_lengths[node] = 0; if (node == myrank)
{
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 1" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
}
}
else
{
// send from this cpu to cpu#node
if (length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
send_data[node] = new double[length];
if (!send_data[node])
{
cout << "out of memory when new in short transfer, place 2" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
// receive from cpu#node to this cpu
if (length = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
{
rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 3" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
}
} }
// wait for all requests to complete
MPI_Waitall(req_no, reqs, stats);
// Post receives first so peers can progress rendezvous early.
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ if (rec_data[node])
if (node == myrank) continue; data_packer(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
recv_lengths[node] = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
if (recv_lengths[node] > 0)
{
rec_data[node] = new double[recv_lengths[node]];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 1" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Irecv((void *)rec_data[node], recv_lengths[node], MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 1;
req_no++;
pending_recv++;
}
}
// Local transfer on this rank.
recv_lengths[myrank] = data_packer(0, src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
if (recv_lengths[myrank] > 0)
{
rec_data[myrank] = new double[recv_lengths[myrank]];
if (!rec_data[myrank])
{
cout << "out of memory when new in short transfer, place 2" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(rec_data[myrank], src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
}
// Pack and post sends.
for (node = 0; node < cpusize; node++)
{
if (node == myrank) continue;
send_lengths[node] = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
if (send_lengths[node] > 0)
{
send_data[node] = new double[send_lengths[node]];
if (!send_data[node])
{
cout << "out of memory when new in short transfer, place 3" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packer(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], send_lengths[node], MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 0;
req_no++;
}
}
// Unpack as soon as receive completes to reduce pure wait time.
while (pending_recv > 0)
{
int outcount = 0;
MPI_Waitsome(req_no, reqs, &outcount, completed, stats);
if (outcount == MPI_UNDEFINED) break;
for (int i = 0; i < outcount; i++)
{
int idx = completed[i];
if (idx >= 0 && req_is_recv[idx])
{
int recv_node = req_node[idx];
data_packer(rec_data[recv_node], src[recv_node], dst[recv_node], recv_node, UNPACK, VarList1, VarList2, Symmetry);
pending_recv--;
}
}
}
if (req_no > 0) MPI_Waitall(req_no, reqs, stats);
if (rec_data[myrank])
data_packer(rec_data[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
@@ -4003,13 +3964,8 @@ void Parallel::transfer(MyList<Parallel::gridseg> **src, MyList<Parallel::gridse
delete[] reqs; delete[] reqs;
delete[] stats; delete[] stats;
delete[] req_node;
delete[] req_is_recv;
delete[] completed;
delete[] send_data; delete[] send_data;
delete[] rec_data; delete[] rec_data;
delete[] send_lengths;
delete[] recv_lengths;
} }
// //
void Parallel::transfermix(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst, void Parallel::transfermix(MyList<Parallel::gridseg> **src, MyList<Parallel::gridseg> **dst,
@@ -4022,105 +3978,66 @@ void Parallel::transfermix(MyList<Parallel::gridseg> **src, MyList<Parallel::gri
int node; int node;
MPI_Request *reqs = new MPI_Request[2 * cpusize]; MPI_Request *reqs;
MPI_Status *stats = new MPI_Status[2 * cpusize]; MPI_Status *stats;
int *req_node = new int[2 * cpusize]; reqs = new MPI_Request[2 * cpusize];
int *req_is_recv = new int[2 * cpusize]; stats = new MPI_Status[2 * cpusize];
int *completed = new int[2 * cpusize];
int req_no = 0; int req_no = 0;
int pending_recv = 0;
double **send_data = new double *[cpusize]; double **send_data, **rec_data;
double **rec_data = new double *[cpusize]; send_data = new double *[cpusize];
int *send_lengths = new int[cpusize]; rec_data = new double *[cpusize];
int *recv_lengths = new int[cpusize]; int length;
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
send_data[node] = rec_data[node] = 0; send_data[node] = rec_data[node] = 0;
send_lengths[node] = recv_lengths[node] = 0; if (node == myrank)
{
if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 1" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packermix(rec_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
}
}
else
{
// send from this cpu to cpu#node
if (length = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry))
{
send_data[node] = new double[length];
if (!send_data[node])
{
cout << "out of memory when new in short transfer, place 2" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packermix(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
// receive from cpu#node to this cpu
if (length = data_packermix(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry))
{
rec_data[node] = new double[length];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 3" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Irecv((void *)rec_data[node], length, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no++);
}
}
} }
// wait for all requests to complete
MPI_Waitall(req_no, reqs, stats);
// Post receives first so peers can progress rendezvous early.
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ if (rec_data[node])
if (node == myrank) continue; data_packermix(rec_data[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
recv_lengths[node] = data_packermix(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
if (recv_lengths[node] > 0)
{
rec_data[node] = new double[recv_lengths[node]];
if (!rec_data[node])
{
cout << "out of memory when new in short transfer, place 1" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Irecv((void *)rec_data[node], recv_lengths[node], MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 1;
req_no++;
pending_recv++;
}
}
// Local transfer on this rank.
recv_lengths[myrank] = data_packermix(0, src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
if (recv_lengths[myrank] > 0)
{
rec_data[myrank] = new double[recv_lengths[myrank]];
if (!rec_data[myrank])
{
cout << "out of memory when new in short transfer, place 2" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packermix(rec_data[myrank], src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
}
// Pack and post sends.
for (node = 0; node < cpusize; node++)
{
if (node == myrank) continue;
send_lengths[node] = data_packermix(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
if (send_lengths[node] > 0)
{
send_data[node] = new double[send_lengths[node]];
if (!send_data[node])
{
cout << "out of memory when new in short transfer, place 3" << endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
data_packermix(send_data[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)send_data[node], send_lengths[node], MPI_DOUBLE, node, 1, MPI_COMM_WORLD, reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 0;
req_no++;
}
}
// Unpack as soon as receive completes to reduce pure wait time.
while (pending_recv > 0)
{
int outcount = 0;
MPI_Waitsome(req_no, reqs, &outcount, completed, stats);
if (outcount == MPI_UNDEFINED) break;
for (int i = 0; i < outcount; i++)
{
int idx = completed[i];
if (idx >= 0 && req_is_recv[idx])
{
int recv_node = req_node[idx];
data_packermix(rec_data[recv_node], src[recv_node], dst[recv_node], recv_node, UNPACK, VarList1, VarList2, Symmetry);
pending_recv--;
}
}
}
if (req_no > 0) MPI_Waitall(req_no, reqs, stats);
if (rec_data[myrank])
data_packermix(rec_data[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
@@ -4132,13 +4049,8 @@ void Parallel::transfermix(MyList<Parallel::gridseg> **src, MyList<Parallel::gri
delete[] reqs; delete[] reqs;
delete[] stats; delete[] stats;
delete[] req_node;
delete[] req_is_recv;
delete[] completed;
delete[] send_data; delete[] send_data;
delete[] rec_data; delete[] rec_data;
delete[] send_lengths;
delete[] recv_lengths;
} }
void Parallel::Sync(Patch *Pat, MyList<var> *VarList, int Symmetry) void Parallel::Sync(Patch *Pat, MyList<var> *VarList, int Symmetry)
{ {
@@ -4320,7 +4232,7 @@ Parallel::SyncCache::SyncCache()
: valid(false), cpusize(0), combined_src(0), combined_dst(0), : valid(false), cpusize(0), combined_src(0), combined_dst(0),
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0), send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0), send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
lengths_valid(false), tc_req_node(0), tc_req_is_recv(0), tc_completed(0) lengths_valid(false)
{ {
} }
// SyncCache invalidate: free grid segment lists but keep buffers // SyncCache invalidate: free grid segment lists but keep buffers
@@ -4359,15 +4271,11 @@ void Parallel::SyncCache::destroy()
if (recv_bufs) delete[] recv_bufs; if (recv_bufs) delete[] recv_bufs;
if (reqs) delete[] reqs; if (reqs) delete[] reqs;
if (stats) delete[] stats; if (stats) delete[] stats;
if (tc_req_node) delete[] tc_req_node;
if (tc_req_is_recv) delete[] tc_req_is_recv;
if (tc_completed) delete[] tc_completed;
combined_src = combined_dst = 0; combined_src = combined_dst = 0;
send_lengths = recv_lengths = 0; send_lengths = recv_lengths = 0;
send_buf_caps = recv_buf_caps = 0; send_buf_caps = recv_buf_caps = 0;
send_bufs = recv_bufs = 0; send_bufs = recv_bufs = 0;
reqs = 0; stats = 0; reqs = 0; stats = 0;
tc_req_node = 0; tc_req_is_recv = 0; tc_completed = 0;
cpusize = 0; max_reqs = 0; cpusize = 0; max_reqs = 0;
} }
// transfer_cached: reuse pre-allocated buffers from SyncCache // transfer_cached: reuse pre-allocated buffers from SyncCache
@@ -4381,96 +4289,64 @@ void Parallel::transfer_cached(MyList<Parallel::gridseg> **src, MyList<Parallel:
int cpusize = cache.cpusize; int cpusize = cache.cpusize;
int req_no = 0; int req_no = 0;
int pending_recv = 0;
int node; int node;
int *req_node = cache.tc_req_node;
int *req_is_recv = cache.tc_req_is_recv;
int *completed = cache.tc_completed;
// Post receives first so peers can progress rendezvous early.
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ {
if (node == myrank) continue; if (node == myrank)
int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = rlength;
if (rlength > 0)
{ {
if (rlength > cache.recv_buf_caps[node]) int length = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = length;
if (length > 0)
{ {
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; if (length > cache.recv_buf_caps[node])
cache.recv_bufs[node] = new double[rlength]; {
cache.recv_buf_caps[node] = rlength; if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
cache.recv_bufs[node] = new double[length];
cache.recv_buf_caps[node] = length;
}
data_packer(cache.recv_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
} }
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 1;
req_no++;
pending_recv++;
} }
} else
// Local transfer on this rank.
int self_len = data_packer(0, src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[myrank] = self_len;
if (self_len > 0)
{
if (self_len > cache.recv_buf_caps[myrank])
{ {
if (cache.recv_bufs[myrank]) delete[] cache.recv_bufs[myrank]; // send
cache.recv_bufs[myrank] = new double[self_len]; int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.recv_buf_caps[myrank] = self_len; cache.send_lengths[node] = slength;
if (slength > 0)
{
if (slength > cache.send_buf_caps[node])
{
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
cache.send_bufs[node] = new double[slength];
cache.send_buf_caps[node] = slength;
}
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
}
// recv
int rlength = data_packer(0, src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = rlength;
if (rlength > 0)
{
if (rlength > cache.recv_buf_caps[node])
{
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
cache.recv_bufs[node] = new double[rlength];
cache.recv_buf_caps[node] = rlength;
}
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
}
} }
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
} }
// Pack and post sends. MPI_Waitall(req_no, cache.reqs, cache.stats);
for (node = 0; node < cpusize; node++) for (node = 0; node < cpusize; node++)
{ if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
if (node == myrank) continue; data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
int slength = data_packer(0, src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.send_lengths[node] = slength;
if (slength > 0)
{
if (slength > cache.send_buf_caps[node])
{
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
cache.send_bufs[node] = new double[slength];
cache.send_buf_caps[node] = slength;
}
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 0;
req_no++;
}
}
// Unpack as soon as receive completes to reduce pure wait time.
while (pending_recv > 0)
{
int outcount = 0;
MPI_Waitsome(req_no, cache.reqs, &outcount, completed, cache.stats);
if (outcount == MPI_UNDEFINED) break;
for (int i = 0; i < outcount; i++)
{
int idx = completed[i];
if (idx >= 0 && req_is_recv[idx])
{
int recv_node_i = req_node[idx];
data_packer(cache.recv_bufs[recv_node_i], src[recv_node_i], dst[recv_node_i], recv_node_i, UNPACK, VarList1, VarList2, Symmetry);
pending_recv--;
}
}
}
if (req_no > 0) MPI_Waitall(req_no, cache.reqs, cache.stats);
if (self_len > 0)
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
} }
// Sync_cached: build grid segment lists on first call, reuse on subsequent calls
void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache) void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache)
{ {
if (!cache.valid) if (!cache.valid)
@@ -4498,9 +4374,6 @@ void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmet
cache.max_reqs = 2 * cpusize; cache.max_reqs = 2 * cpusize;
cache.reqs = new MPI_Request[cache.max_reqs]; cache.reqs = new MPI_Request[cache.max_reqs];
cache.stats = new MPI_Status[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs];
cache.tc_req_node = new int[cache.max_reqs];
cache.tc_req_is_recv = new int[cache.max_reqs];
cache.tc_completed = new int[cache.max_reqs];
} }
for (int node = 0; node < cpusize; node++) for (int node = 0; node < cpusize; node++)
@@ -4601,9 +4474,6 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
cache.max_reqs = 2 * cpusize; cache.max_reqs = 2 * cpusize;
cache.reqs = new MPI_Request[cache.max_reqs]; cache.reqs = new MPI_Request[cache.max_reqs];
cache.stats = new MPI_Status[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs];
cache.tc_req_node = new int[cache.max_reqs];
cache.tc_req_is_recv = new int[cache.max_reqs];
cache.tc_completed = new int[cache.max_reqs];
} }
for (int node = 0; node < cpusize; node++) for (int node = 0; node < cpusize; node++)
@@ -4674,11 +4544,6 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
int cpusize = cache.cpusize; int cpusize = cache.cpusize;
state.req_no = 0; state.req_no = 0;
state.active = true; state.active = true;
state.pending_recv = 0;
// Allocate tracking arrays
delete[] state.req_node; delete[] state.req_is_recv;
state.req_node = new int[cache.max_reqs];
state.req_is_recv = new int[cache.max_reqs];
MyList<Parallel::gridseg> **src = cache.combined_src; MyList<Parallel::gridseg> **src = cache.combined_src;
MyList<Parallel::gridseg> **dst = cache.combined_dst; MyList<Parallel::gridseg> **dst = cache.combined_dst;
@@ -4723,8 +4588,6 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
cache.send_buf_caps[node] = slength; cache.send_buf_caps[node] = slength;
} }
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry); data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
state.req_node[state.req_no] = node;
state.req_is_recv[state.req_no] = 0;
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
} }
int rlength; int rlength;
@@ -4742,60 +4605,29 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
cache.recv_bufs[node] = new double[rlength]; cache.recv_bufs[node] = new double[rlength];
cache.recv_buf_caps[node] = rlength; cache.recv_buf_caps[node] = rlength;
} }
state.req_node[state.req_no] = node;
state.req_is_recv[state.req_no] = 1;
state.pending_recv++;
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++); MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
} }
} }
} }
cache.lengths_valid = true; cache.lengths_valid = true;
} }
// Sync_finish: progressive unpack as receives complete, then wait for sends // Sync_finish: wait for async MPI operations and unpack
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state, void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
MyList<var> *VarList, int Symmetry) MyList<var> *VarList, int Symmetry)
{ {
if (!state.active) if (!state.active)
return; return;
int myrank; MPI_Waitall(state.req_no, cache.reqs, cache.stats);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
int cpusize = cache.cpusize;
MyList<Parallel::gridseg> **src = cache.combined_src; MyList<Parallel::gridseg> **src = cache.combined_src;
MyList<Parallel::gridseg> **dst = cache.combined_dst; MyList<Parallel::gridseg> **dst = cache.combined_dst;
// Unpack local data first (no MPI needed) for (int node = 0; node < cpusize; node++)
if (cache.recv_bufs[myrank] && cache.recv_lengths[myrank] > 0) if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList, VarList, Symmetry); data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
// Progressive unpack of remote receives
if (state.pending_recv > 0 && state.req_no > 0)
{
int pending = state.pending_recv;
int *completed = new int[cache.max_reqs];
while (pending > 0)
{
int outcount = 0;
MPI_Waitsome(state.req_no, cache.reqs, &outcount, completed, cache.stats);
if (outcount == MPI_UNDEFINED) break;
for (int i = 0; i < outcount; i++)
{
int idx = completed[i];
if (idx >= 0 && state.req_is_recv[idx])
{
int recv_node = state.req_node[idx];
data_packer(cache.recv_bufs[recv_node], src[recv_node], dst[recv_node], recv_node, UNPACK, VarList, VarList, Symmetry);
pending--;
}
}
}
delete[] completed;
}
// Wait for remaining sends
if (state.req_no > 0) MPI_Waitall(state.req_no, cache.reqs, cache.stats);
delete[] state.req_node; state.req_node = 0;
delete[] state.req_is_recv; state.req_is_recv = 0;
state.active = false; state.active = false;
} }
// collect buffer grid segments or blocks for the periodic boundary condition of given patch // collect buffer grid segments or blocks for the periodic boundary condition of given patch
@@ -5862,9 +5694,6 @@ void Parallel::Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
cache.max_reqs = 2 * cpusize; cache.max_reqs = 2 * cpusize;
cache.reqs = new MPI_Request[cache.max_reqs]; cache.reqs = new MPI_Request[cache.max_reqs];
cache.stats = new MPI_Status[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs];
cache.tc_req_node = new int[cache.max_reqs];
cache.tc_req_is_recv = new int[cache.max_reqs];
cache.tc_completed = new int[cache.max_reqs];
} }
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL); MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
@@ -5911,9 +5740,6 @@ void Parallel::OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
cache.max_reqs = 2 * cpusize; cache.max_reqs = 2 * cpusize;
cache.reqs = new MPI_Request[cache.max_reqs]; cache.reqs = new MPI_Request[cache.max_reqs];
cache.stats = new MPI_Status[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs];
cache.tc_req_node = new int[cache.max_reqs];
cache.tc_req_is_recv = new int[cache.max_reqs];
cache.tc_completed = new int[cache.max_reqs];
} }
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL); MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
@@ -5960,9 +5786,6 @@ void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
cache.max_reqs = 2 * cpusize; cache.max_reqs = 2 * cpusize;
cache.reqs = new MPI_Request[cache.max_reqs]; cache.reqs = new MPI_Request[cache.max_reqs];
cache.stats = new MPI_Status[cache.max_reqs]; cache.stats = new MPI_Status[cache.max_reqs];
cache.tc_req_node = new int[cache.max_reqs];
cache.tc_req_is_recv = new int[cache.max_reqs];
cache.tc_completed = new int[cache.max_reqs];
} }
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL); MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
@@ -5984,98 +5807,58 @@ void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
int cpusize = cache.cpusize; int cpusize = cache.cpusize;
int req_no = 0; int req_no = 0;
int pending_recv = 0;
int *req_node = new int[cache.max_reqs];
int *req_is_recv = new int[cache.max_reqs];
int *completed = new int[cache.max_reqs];
// Post receives first so peers can progress rendezvous early.
for (int node = 0; node < cpusize; node++) for (int node = 0; node < cpusize; node++)
{ {
if (node == myrank) continue; if (node == myrank)
int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = rlength;
if (rlength > 0)
{ {
if (rlength > cache.recv_buf_caps[node]) int length = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = length;
if (length > 0)
{ {
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node]; if (length > cache.recv_buf_caps[node])
cache.recv_bufs[node] = new double[rlength]; {
cache.recv_buf_caps[node] = rlength; if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
cache.recv_bufs[node] = new double[length];
cache.recv_buf_caps[node] = length;
}
data_packermix(cache.recv_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
} }
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 1;
req_no++;
pending_recv++;
} }
} else
// Local transfer on this rank.
int self_len = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[myrank] = self_len;
if (self_len > 0)
{
if (self_len > cache.recv_buf_caps[myrank])
{ {
if (cache.recv_bufs[myrank]) delete[] cache.recv_bufs[myrank]; int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.recv_bufs[myrank] = new double[self_len]; cache.send_lengths[node] = slength;
cache.recv_buf_caps[myrank] = self_len; if (slength > 0)
{
if (slength > cache.send_buf_caps[node])
{
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
cache.send_bufs[node] = new double[slength];
cache.send_buf_caps[node] = slength;
}
data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
}
int rlength = data_packermix(0, cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
cache.recv_lengths[node] = rlength;
if (rlength > 0)
{
if (rlength > cache.recv_buf_caps[node])
{
if (cache.recv_bufs[node]) delete[] cache.recv_bufs[node];
cache.recv_bufs[node] = new double[rlength];
cache.recv_buf_caps[node] = rlength;
}
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no++);
}
} }
data_packermix(cache.recv_bufs[myrank], cache.combined_src[myrank], cache.combined_dst[myrank], myrank, PACK, VarList1, VarList2, Symmetry);
} }
// Pack and post sends. MPI_Waitall(req_no, cache.reqs, cache.stats);
for (int node = 0; node < cpusize; node++) for (int node = 0; node < cpusize; node++)
{ if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
if (node == myrank) continue; data_packermix(cache.recv_bufs[node], cache.combined_src[node], cache.combined_dst[node], node, UNPACK, VarList1, VarList2, Symmetry);
int slength = data_packermix(0, cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
cache.send_lengths[node] = slength;
if (slength > 0)
{
if (slength > cache.send_buf_caps[node])
{
if (cache.send_bufs[node]) delete[] cache.send_bufs[node];
cache.send_bufs[node] = new double[slength];
cache.send_buf_caps[node] = slength;
}
data_packermix(cache.send_bufs[node], cache.combined_src[myrank], cache.combined_dst[myrank], node, PACK, VarList1, VarList2, Symmetry);
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 1, MPI_COMM_WORLD, cache.reqs + req_no);
req_node[req_no] = node;
req_is_recv[req_no] = 0;
req_no++;
}
}
// Unpack as soon as receive completes to reduce pure wait time.
while (pending_recv > 0)
{
int outcount = 0;
MPI_Waitsome(req_no, cache.reqs, &outcount, completed, cache.stats);
if (outcount == MPI_UNDEFINED) break;
for (int i = 0; i < outcount; i++)
{
int idx = completed[i];
if (idx >= 0 && req_is_recv[idx])
{
int recv_node_i = req_node[idx];
data_packermix(cache.recv_bufs[recv_node_i], cache.combined_src[recv_node_i], cache.combined_dst[recv_node_i], recv_node_i, UNPACK, VarList1, VarList2, Symmetry);
pending_recv--;
}
}
}
if (req_no > 0) MPI_Waitall(req_no, cache.reqs, cache.stats);
if (self_len > 0)
data_packermix(cache.recv_bufs[myrank], cache.combined_src[myrank], cache.combined_dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
delete[] req_node;
delete[] req_is_recv;
delete[] completed;
} }
// collect all buffer grid segments or blocks for given patch // collect all buffer grid segments or blocks for given patch

View File

@@ -108,9 +108,6 @@ namespace Parallel
MPI_Status *stats; MPI_Status *stats;
int max_reqs; int max_reqs;
bool lengths_valid; bool lengths_valid;
int *tc_req_node;
int *tc_req_is_recv;
int *tc_completed;
SyncCache(); SyncCache();
void invalidate(); void invalidate();
void destroy(); void destroy();
@@ -124,10 +121,7 @@ namespace Parallel
struct AsyncSyncState { struct AsyncSyncState {
int req_no; int req_no;
bool active; bool active;
int *req_node; AsyncSyncState() : req_no(0), active(false) {}
int *req_is_recv;
int pending_recv;
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
}; };
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,

View File

@@ -36,18 +36,12 @@ using namespace std;
#include "myglobal.h" #include "myglobal.h"
#endif #endif
#include "perf.h" #include "perf.h"
#include "derivatives.h" #include "derivatives.h"
#include "ricci_gamma.h" #include "ricci_gamma.h"
// Compile-time switch for per-timestep memory usage collection/printing. //================================================================================================
// Default is OFF to reduce overhead in production runs.
#ifndef BSSN_ENABLE_MEM_USAGE_LOG
#define BSSN_ENABLE_MEM_USAGE_LOG 0
#endif
//================================================================================================
// define bssn_class // define bssn_class
@@ -742,8 +736,6 @@ void bssn_class::Initialize()
sync_cache_cor = new Parallel::SyncCache[GH->levels]; sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels]; sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels]; sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
} }
//================================================================================================ //================================================================================================
@@ -2034,9 +2026,9 @@ void bssn_class::Read_Ansorg()
//================================================================================================ //================================================================================================
void bssn_class::Evolve(int Steps) void bssn_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0; LastAnas = 0;
#if 0 #if 0
@@ -2135,24 +2127,22 @@ void bssn_class::Evolve(int Steps)
#endif #endif
*/ */
#if BSSN_ENABLE_MEM_USAGE_LOG perf bssn_perf;
perf bssn_perf; size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
#endif
for (int lev = 0; lev < GH->levels; lev++) for (int lev = 0; lev < GH->levels; lev++)
GH->Lt[lev] = PhysTime; GH->Lt[lev] = PhysTime;
GH->settrfls(trfls); GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
// special for large mass ratio consideration // special for large mass ratio consideration
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6) // if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
// { GH->levels=GH->movls; } // { GH->levels=GH->movls; }
if (myrank == 0) if (myrank == 0)
curr_clock = MPI_Wtime(); curr_clock = clock();
#if (PSTR == 0) #if (PSTR == 0)
RecursiveStep(0); RecursiveStep(0);
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3) #elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
@@ -2205,16 +2195,16 @@ void bssn_class::Evolve(int Steps)
} }
} }
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << endl; cout << endl;
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " " cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime << " "
<< " Computer used " << (curr_clock - prev_clock) << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
// cout << endl; // cout << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)
break; break;
@@ -2223,7 +2213,7 @@ void bssn_class::Evolve(int Steps)
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
#endif #endif
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2)) #if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
@@ -2232,23 +2222,21 @@ void bssn_class::Evolve(int Steps)
// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor); // fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor);
#endif #endif
#if BSSN_ENABLE_MEM_USAGE_LOG // Retrieve memory usage information used during computation; master process prints it
// Retrieve memory usage information used during computation; master process prints it bssn_perf.MemoryUsage(&current_min, &current_avg, &current_max,
bssn_perf.MemoryUsage(&current_min, &current_avg, &current_max, &peak_min, &peak_avg, &peak_max, nprocs);
&peak_min, &peak_avg, &peak_max, nprocs); if (myrank == 0)
if (myrank == 0) {
{ printf(" Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, "
printf(" Memory usage: current %0.4lg/%0.4lg/%0.4lgMB, " "peak %0.4lg/%0.4lg/%0.4lgMB\n",
"peak %0.4lg/%0.4lg/%0.4lgMB\n", (double)current_min / (1024.0 * 1024.0),
(double)current_min / (1024.0 * 1024.0), (double)current_avg / (1024.0 * 1024.0),
(double)current_avg / (1024.0 * 1024.0), (double)current_max / (1024.0 * 1024.0),
(double)current_max / (1024.0 * 1024.0), (double)peak_min / (1024.0 * 1024.0),
(double)peak_min / (1024.0 * 1024.0), (double)peak_avg / (1024.0 * 1024.0),
(double)peak_avg / (1024.0 * 1024.0), (double)peak_max / (1024.0 * 1024.0));
(double)peak_max / (1024.0 * 1024.0)); cout << endl;
cout << endl; }
}
#endif
// Output puncture positions at each step // Output puncture positions at each step
if (myrank == 0) if (myrank == 0)
@@ -2441,7 +2429,7 @@ void bssn_class::RecursiveStep(int lev)
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
#endif #endif
} }
@@ -2620,7 +2608,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
#endif #endif
} }
@@ -2787,7 +2775,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -2802,7 +2790,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -2821,7 +2809,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -2837,7 +2825,7 @@ void bssn_class::ParallelStep()
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); } for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -5808,7 +5796,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
@@ -5832,7 +5820,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif #endif
@@ -5859,7 +5847,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
@@ -5883,7 +5871,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif #endif
@@ -5952,7 +5940,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
} }
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
@@ -5962,7 +5950,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif #endif
@@ -5974,7 +5962,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
else // no time refinement levels and for all same time levels else // no time refinement levels and for all same time levels
{ {
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
@@ -5984,7 +5972,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif #endif
@@ -6039,7 +6027,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
} }
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
@@ -6049,7 +6037,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
@@ -6063,7 +6051,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
if (myrank == 0) if (myrank == 0)
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
#if (RPB == 0) #if (RPB == 0)
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]); Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
@@ -6073,7 +6061,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif
@@ -6114,7 +6102,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
@@ -6127,7 +6115,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
{ {
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif

View File

@@ -130,8 +130,6 @@ public:
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1] Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev] Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor; monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
monitor *ConVMonitor; monitor *ConVMonitor;

View File

@@ -1891,7 +1891,7 @@ void bssn_class::Read_Ansorg()
void bssn_class::Evolve(int Steps) void bssn_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0; double LastDump = 0.0, LastCheck = 0.0, Last2dDump = 0.0;
LastAnas = 0; LastAnas = 0;
#if 0 #if 0
@@ -2035,12 +2035,10 @@ void bssn_class::Evolve(int Steps)
GH->settrfls(trfls); GH->settrfls(trfls);
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
if (myrank == 0) cout << "Before Step: " << ncount << " My Rank: " << myrank
curr_clock = MPI_Wtime(); << " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
cout << "Before Step: " << ncount << " My Rank: " << myrank
<< " takes " << MPI_Wtime() - beg_time << " seconds!" << endl;
beg_time = MPI_Wtime(); beg_time = MPI_Wtime();
#if (PSTR == 0) #if (PSTR == 0)
RecursiveStep(0); RecursiveStep(0);
@@ -2097,10 +2095,10 @@ void bssn_class::Evolve(int Steps)
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl; cout << "Timestep # " << ncount << ": integrating to time: " << PhysTime << endl;
cout << "used " << (curr_clock - prev_clock) << " seconds!" << endl; cout << "used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds!" << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)

View File

@@ -59,10 +59,9 @@
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
real*8,intent(in) :: eps real*8,intent(in) :: eps
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
! gont = 0: success; gont = 1: something wrong ! gont = 0: success; gont = 1: something wrong
integer::gont integer::gont
integer :: i,j,k
!~~~~~~> Other variables: !~~~~~~> Other variables:
@@ -84,18 +83,11 @@
real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz real*8, dimension(ex(1),ex(2),ex(3)) :: gupxx,gupxy,gupxz
real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz real*8, dimension(ex(1),ex(2),ex(3)) :: gupyy,gupyz,gupzz
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
real*8 :: dX, dY, dZ, PI real*8 :: dX, dY, dZ, PI
real*8 :: divb_loc,det_loc real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
real*8 :: fxx_loc,fxy_loc,fxz_loc
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
real*8 :: f_loc,chin_loc
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
double precision,parameter::FF = 0.75d0,eta=2.d0 double precision,parameter::FF = 0.75d0,eta=2.d0
real*8, parameter :: F1o3 = 1.D0/3.D0, F2o3 = 2.D0/3.D0,F3o2=1.5d0, F1o6 = 1.D0/6.D0 real*8, parameter :: F1o3 = 1.D0/3.D0, F2o3 = 2.D0/3.D0,F3o2=1.5d0, F1o6 = 1.D0/6.D0
real*8, parameter :: F16=1.6d1,F8=8.d0 real*8, parameter :: F16=1.6d1,F8=8.d0
@@ -104,11 +96,11 @@
real*8, dimension(ex(1),ex(2),ex(3)) :: reta real*8, dimension(ex(1),ex(2),ex(3)) :: reta
#endif #endif
#if (GAUGE == 6 || GAUGE == 7) #if (GAUGE == 6 || GAUGE == 7)
integer :: BHN integer :: BHN,i,j,k
real*8, dimension(9) :: Porg real*8, dimension(9) :: Porg
real*8, dimension(3) :: Mass real*8, dimension(3) :: Mass
real*8 :: r1,r2,M,A,w1,w2,C1,C2 real*8 :: r1,r2,M,A,w1,w2,C1,C2
real*8, dimension(ex(1),ex(2),ex(3)) :: reta real*8, dimension(ex(1),ex(2),ex(3)) :: reta
call getpbh(BHN,Porg,Mass) call getpbh(BHN,Porg,Mass)
@@ -153,204 +145,174 @@
dY = Y(2) - Y(1) dY = Y(2) - Y(1)
dZ = Z(2) - Z(1) dZ = Z(2) - Z(1)
do k=1,ex(3) alpn1 = Lap + ONE
do j=1,ex(2) chin1 = chi + ONE
do i=1,ex(1) gxx = dxx + ONE
alpn1(i,j,k) = Lap(i,j,k) + ONE gyy = dyy + ONE
chin1(i,j,k) = chi(i,j,k) + ONE gzz = dzz + ONE
gxx(i,j,k) = dxx(i,j,k) + ONE
gyy(i,j,k) = dyy(i,j,k) + ONE
gzz(i,j,k) = dzz(i,j,k) + ONE
enddo
enddo
enddo
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev) call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev) call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev) call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) div_beta = betaxx + betayy + betazz
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev) call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev) call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev) call fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
do k=1,ex(3) call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
do j=1,ex(2)
do i=1,ex(1) gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k) TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
div_beta(i,j,k) = divb_loc
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc) TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + & gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) ) TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + & gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) ) gxx * betaxy + gxz * betazy + &
gyy * betayx + gyz * betazx &
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + & - gxy * betazz
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) )
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + & gxy * betaxz + gyy * betayz + &
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + & gxz * betaxy + gzz * betazy &
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k) - gyz * betaxx
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + & gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + & gxx * betaxz + gxy * betayz + &
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k) gyz * betayx + gzz * betazx &
- gxz * betayy !rhs for gij
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + &
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + & ! invert tilted metric
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k) gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + & gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - & gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k) gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc if(co == 0)then
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc ! Gam^i_Res = Gam^i + gup^ij_,j
gupxx(i,j,k) = gupxx_loc Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
gupxy(i,j,k) = gupxy_loc +gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
gupxz(i,j,k) = gupxz_loc +gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
gupyy(i,j,k) = gupyy_loc +gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
gupyz(i,j,k) = gupyz_loc +gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
gupzz(i,j,k) = gupzz_loc +gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
if(co == 0)then +gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( & +gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + & Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + & +gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + & +gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + & +gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + & +gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + & +gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & +gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & +gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k))) +gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( & Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + & +gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + & +gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + & +gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + & +gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + & +gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + & +gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & +gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & +gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k))) endif
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + & ! second kind of connection
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + & Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + & Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + & Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + & Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + & Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + & Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
endif Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k))) Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k))) Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k)) Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k))
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k)) Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) ) ! Raise indices of \tilde A_{ij} and store in R_ij
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) ) TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) ) Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) ) Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) ) TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
enddo
enddo Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
enddo (gupxx * gupyy + gupxy * gupxy)* Axy + &
! Raise indices of \tilde A_{ij} and store in R_ij (gupxx * gupyz + gupxz * gupxy)* Axz + &
(gupxy * gupyz + gupxz * gupyy)* Ayz
! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev) (gupxx * gupyz + gupxy * gupxz)* Axy + &
do k=1,ex(3) (gupxx * gupzz + gupxz * gupxz)* Axz + &
do j=1,ex(2) (gupxy * gupzz + gupxz * gupyz)* Ayz
do i=1,ex(1)
gupxx_loc = gupxx(i,j,k) Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
gupxy_loc = gupxy(i,j,k) (gupxy * gupyz + gupyy * gupxz)* Axy + &
gupxz_loc = gupxz(i,j,k) (gupxy * gupzz + gupyz * gupxz)* Axz + &
gupyy_loc = gupyy(i,j,k) (gupyy * gupzz + gupyz * gupyz)* Ayz
gupyz_loc = gupyz(i,j,k)
gupzz_loc = gupzz(i,j,k) ! Right hand side for Gam^i without shift terms...
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + & call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k))
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + & Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k)) TWO * alpn1 * ( &
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + & -F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k)) gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + & gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + & Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k) TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + & Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + & TWO * alpn1 * ( &
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k) -F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + & gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + & gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + & gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k) Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
Rxx(i,j,k) = Rxx_loc TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
Ryy(i,j,k) = Ryy_loc
Rzz(i,j,k) = Rzz_loc Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
Rxy(i,j,k) = Rxy_loc TWO * alpn1 * ( &
Rxz(i,j,k) = Rxz_loc -F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
Ryz(i,j,k) = Ryz_loc gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + & gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
TWO * alpn1(i,j,k) * ( & Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - & TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + &
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc))
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - &
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + &
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc))
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
TWO * alpn1(i,j,k) * ( &
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
enddo
enddo
enddo
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,& call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
@@ -359,54 +321,38 @@
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,& call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev) fxx = gxxx + gxyy + gxzz
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev) fxy = gxyx + gyyy + gyzz
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev) fxz = gxzx + gyzy + gzzz
do k=1,ex(3)
do j=1,ex(2) Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
do i=1,ex(1) TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
divb_loc = div_beta(i,j,k) Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k) TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k) Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k) TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
gupxx_loc = gupxx(i,j,k) call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
gupxy_loc = gupxy(i,j,k) call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
gupxz_loc = gupxz(i,j,k) call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
gupyy_loc = gupyy(i,j,k)
gupyz_loc = gupyz(i,j,k) Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
gupzz_loc = gupzz(i,j,k) Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + & gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k)) TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + &
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k)) Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + & Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k)) F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
Gamxa(i,j,k) = Gamxa_loc gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
Gamya(i,j,k) = Gamya_loc TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
Gamza(i,j,k) = Gamza_loc
Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - & Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + & F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + & gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + & TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k))
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
enddo
enddo
enddo
!first kind of connection stored in gij,k !first kind of connection stored in gij,k
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
@@ -655,190 +601,192 @@
Gamxyz * gxzz + Gamyyz * gyzz + Gamzyz * gzzz + & Gamxyz * gxzz + Gamyyz * gyzz + Gamzyz * gzzz + &
Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + & Gamxzz * gxzy + Gamyzz * gyzy + Gamzzz * gzzy + &
Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz ) Gamxyz * gzzx + Gamyyz * gzzy + Gamzyz * gzzz )
!covariant second derivative of chi respect to tilted metric !covariant second derivative of chi respect to tilted metric
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev) call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
do k=1,ex(3) fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
do j=1,ex(2) fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
do i=1,ex(1) fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k) fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k) fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k) fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k) ! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k) f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
chin_loc = chin1(i,j,k) gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + & TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + & TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + & TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + & ! Add chi part to Ricci tensor:
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + &
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k)) Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
f(i,j,k) = f_loc Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO ! covariant second derivatives of the lapse respect to physical metric
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
enddo SYM,SYM,SYM,symmetry,Lev)
enddo
enddo gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
! covariant second derivatives of the lapse respect to physical metric gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, & ! now get physical second kind of connection
SYM,SYM,SYM,symmetry,Lev) Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
do k=1,ex(3) Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
do j=1,ex(2) Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
do i=1,ex(1) Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
chin_loc = chin1(i,j,k) Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF ! store D^i D_i Lap in trK_rhs upto chi
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k) #if 1
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k) !! follow bam code
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k) S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k) TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k) f = F2o3 * trK * trK -(&
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k) gupxx * ( &
gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) gupyy * ( &
enddo gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
enddo TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
enddo gupzz * ( &
do k=1,ex(3) gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
do j=1,ex(2) TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
do i=1,ex(1) TWO * ( &
divb_loc = div_beta(i,j,k) gupxy * ( &
chin_loc = chin1(i,j,k) gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
gupxy * (Axx * Ayy + Axy * Axy) + &
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + & gupxz * (Axx * Ayz + Axz * Axy) + &
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) ) gupyz * (Axy * Ayz + Axz * Ayy) ) + &
S(i,j,k) = S_loc gupxz * ( &
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( & gupxy * (Axx * Ayz + Axy * Axz) + &
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & gupxz * (Axx * Azz + Axz * Axz) + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + & gupyz * (Axy * Azz + Axz * Ayz) ) + &
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + & gupyz * ( &
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + & gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & gupxy * (Axy * Ayz + Ayy * Axz) + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & gupxz * (Axy * Azz + Ayz * Axz) + &
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + & f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + & fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + & fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + & fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + & fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + & #else
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & ! Add lapse and S_ij parts to Ricci tensor:
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + & fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + & fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + & fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + & fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + & fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + & fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - & ! Compute trace-free part (note: chi^-1 and chi cancel!):
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + & TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & #endif
alpn1(i,j,k)/chin_loc * f_loc )
f(i,j,k) = f_loc Axx_rhs = fxx - gxx * f
Ayy_rhs = fyy - gyy * f
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k) Azz_rhs = fzz - gzz * f
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k) Axy_rhs = fxy - gxy * f
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k) Axz_rhs = fxz - gxz * f
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k) Ayz_rhs = fyz - gyz * f
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k)
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k) ! Now: store A_il A^l_j into fij:
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + & gupxy *(Axx * Ayy + Axy * Axy) + &
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + & gupxz *(Axx * Ayz + Axz * Axy) + &
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) gupyz *(Axy * Ayz + Axz * Ayy)
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + & fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & gupxy *(Axx * Ayz + Axy * Axz) + &
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) gupxz *(Axx * Azz + Axz * Axz) + &
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + & gupyz *(Axy * Azz + Axz * Ayz)
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + & fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) gupxy *(Axy * Ayz + Ayy * Axz) + &
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + & gupxz *(Axy * Azz + Ayz * Axz) + &
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + & gupyz *(Ayy * Azz + Ayz * Ayz)
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) f = chin1
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + & ! store D^i D_i Lap in trK_rhs
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + & trK_rhs = f*trK_rhs
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + & TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + & F2o3 * Axx * div_beta
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k) F2o3 * Ayy * div_beta
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + & Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - & TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
F2o3 * Axx(i,j,k) * divb_loc F2o3 * Azz * div_beta
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - & Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
F2o3 * Ayy(i,j,k) * divb_loc Axx * betaxy + Axz * betazy + &
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + & Ayy * betayx + Ayz * betazx + &
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - & F1o3 * Axy * div_beta - Axy * betazz
F2o3 * Azz(i,j,k) * divb_loc
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + & Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + & Axy * betaxz + Ayy * betayz + &
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k) Axz * betaxy + Azz * betazy + &
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + & F1o3 * Ayz * div_beta - Ayz * betaxx
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k) Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + & Axx * betaxz + Axy * betayz + &
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + & Ayz * betayx + Azz * betazx + &
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k) F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + & ! Compute trace of S_ij
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + & S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
FOUR * PI * (rho(i,j,k) + S_loc) ) TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
enddo
enddo trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
enddo gupxx * fxx + gupyy * fyy + gupzz * fzz + &
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
FOUR * PI * ( rho + S )) !rhs for trK
!!!! gauge variable part !!!! gauge variable part
@@ -1000,15 +948,15 @@
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency) !!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
! lopsided_kodis shares the symmetry_bd buffer between advection and ! lopsided_kodis shares the symmetry_bd buffer between advection and
! dissipation, eliminating redundant full-grid copies. For metric variables ! dissipation, eliminating redundant full-grid copies. For metric variables
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero, ! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
! so the constant offset has no effect on dissipation. ! so the constant offset has no effect on dissipation.
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps) call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps) call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps) call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps) call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)

View File

@@ -2,24 +2,6 @@
#include "bssn_rhs.h" #include "bssn_rhs.h"
#include "share_func.h" #include "share_func.h"
#include "tool.h" #include "tool.h"
#ifdef _OPENMP
#define BSSN_OMP_TASK_GROUP_BEGIN \
_Pragma("omp parallel") \
{ \
_Pragma("omp single nowait") \
{
#define BSSN_OMP_TASK_CALL(...) \
_Pragma("omp task") { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END \
_Pragma("omp taskwait") \
} \
}
#else
#define BSSN_OMP_TASK_GROUP_BEGIN {
#define BSSN_OMP_TASK_CALL(...) { __VA_ARGS__; }
#define BSSN_OMP_TASK_GROUP_END }
#endif
// 0-based i,j,k // 0-based i,j,k
// #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny)) // #define IDX_F(i,j,k,nx,ny) ((i) + (j)*(nx) + (k)*(nx)*(ny))
// ex(1)=nx, ex(2)=ny, ex(3)=nz // ex(1)=nx, ex(2)=ny, ex(3)=nz
@@ -126,20 +108,18 @@ int f_compute_rhs_bssn(int *ex, double &T,
chin1[i] = chi[i] + 1.0; chin1[i] = chi[i] + 1.0;
} }
// 9ms // // 9ms //
BSSN_OMP_TASK_GROUP_BEGIN fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)) fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)) fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)) fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)) fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)) fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)) fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dyy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)) fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)) fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)) fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev))
BSSN_OMP_TASK_GROUP_END
// 3ms // // 3ms //
for(int i=0;i<all;i+=1){ for(int i=0;i<all;i+=1){
@@ -336,14 +316,15 @@ int f_compute_rhs_bssn(int *ex, double &T,
); );
} }
// 22.3ms // // 22.3ms //
BSSN_OMP_TASK_GROUP_BEGIN fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,
BSSN_OMP_TASK_CALL(fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)) X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)) fdderivs(ex,betay,gxxy,gxyy,gxzy,gyyy,gyzy,gzzy,
BSSN_OMP_TASK_CALL(fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)) X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)) fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,
BSSN_OMP_TASK_CALL(fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)) X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev);
BSSN_OMP_TASK_CALL(fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)) fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev);
BSSN_OMP_TASK_GROUP_END fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev);
fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev);
// Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1) // Fused: fxx/Gamxa + Gamma_rhs part 2 (2 loops -> 1)
for(int i=0;i<all;i+=1){ for(int i=0;i<all;i+=1){
@@ -1082,32 +1063,30 @@ int f_compute_rhs_bssn(int *ex, double &T,
#endif #endif
} }
// advection + KO dissipation with shared symmetry buffer // advection + KO dissipation with shared symmetry buffer
BSSN_OMP_TASK_GROUP_BEGIN lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)) lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)) lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps)) lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)) lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps)) lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)) lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps)) lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps)) lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps)) lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps)) lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps)) lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps);
BSSN_OMP_TASK_CALL(lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps))
BSSN_OMP_TASK_GROUP_END
// 2ms // // 2ms //
if(co==0){ if(co==0){
for (int i=0;i<all;i+=1) { for (int i=0;i<all;i+=1) {
@@ -1154,67 +1133,65 @@ int f_compute_rhs_bssn(int *ex, double &T,
} }
// 1ms // // 1ms //
BSSN_OMP_TASK_GROUP_BEGIN fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)) fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,0)) fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Axz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,0)) fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Ayy,gyyx,gyyy,gyyz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)) fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Ayz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,0)) fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0);
BSSN_OMP_TASK_CALL(fderivs(ex,Azz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,0)) }
BSSN_OMP_TASK_GROUP_END // 7ms //
// 7ms // for (int i=0;i<all;i+=1) {
for (int i=0;i<all;i+=1) { gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]
gxxx[i] = gxxx[i] - ( Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i] + Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]) - chix[i]*Axx[i]/chin1[i];
+ Gamxxx[i] * Axx[i] + Gamyxx[i] * Axy[i] + Gamzxx[i] * Axz[i]) - chix[i]*Axx[i]/chin1[i]; gxyx[i] = gxyx[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
gxyx[i] = gxyx[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i] + Gamxxx[i] * Axy[i] + Gamyxx[i] * Ayy[i] + Gamzxx[i] * Ayz[i]) - chix[i]*Axy[i]/chin1[i];
+ Gamxxx[i] * Axy[i] + Gamyxx[i] * Ayy[i] + Gamzxx[i] * Ayz[i]) - chix[i]*Axy[i]/chin1[i]; gxzx[i] = gxzx[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
gxzx[i] = gxzx[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i] + Gamxxx[i] * Axz[i] + Gamyxx[i] * Ayz[i] + Gamzxx[i] * Azz[i]) - chix[i]*Axz[i]/chin1[i];
+ Gamxxx[i] * Axz[i] + Gamyxx[i] * Ayz[i] + Gamzxx[i] * Azz[i]) - chix[i]*Axz[i]/chin1[i]; gyyx[i] = gyyx[i] - ( Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]
gyyx[i] = gyyx[i] - ( Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i] + Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chix[i]*Ayy[i]/chin1[i];
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chix[i]*Ayy[i]/chin1[i]; gyzx[i] = gyzx[i] - ( Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]
gyzx[i] = gyzx[i] - ( Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i] + Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chix[i]*Ayz[i]/chin1[i];
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chix[i]*Ayz[i]/chin1[i]; gzzx[i] = gzzx[i] - ( Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]
gzzx[i] = gzzx[i] - ( Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i] + Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chix[i]*Azz[i]/chin1[i];
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chix[i]*Azz[i]/chin1[i]; gxxy[i] = gxxy[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]
gxxy[i] = gxxy[i] - ( Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i] + Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]) - chiy[i]*Axx[i]/chin1[i];
+ Gamxxy[i] * Axx[i] + Gamyxy[i] * Axy[i] + Gamzxy[i] * Axz[i]) - chiy[i]*Axx[i]/chin1[i]; gxyy[i] = gxyy[i] - ( Gamxyy[i] * Axx[i] + Gamyyy[i] * Axy[i] + Gamzyy[i] * Axz[i]
gxyy[i] = gxyy[i] - ( Gamxyy[i] * Axx[i] + Gamyyy[i] * Axy[i] + Gamzyy[i] * Axz[i] + Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chiy[i]*Axy[i]/chin1[i];
+ Gamxxy[i] * Axy[i] + Gamyxy[i] * Ayy[i] + Gamzxy[i] * Ayz[i]) - chiy[i]*Axy[i]/chin1[i]; gxzy[i] = gxzy[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
gxzy[i] = gxzy[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i] + Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chiy[i]*Axz[i]/chin1[i];
+ Gamxxy[i] * Axz[i] + Gamyxy[i] * Ayz[i] + Gamzxy[i] * Azz[i]) - chiy[i]*Axz[i]/chin1[i]; gyyy[i] = gyyy[i] - ( Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]
gyyy[i] = gyyy[i] - ( Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i] + Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]) - chiy[i]*Ayy[i]/chin1[i];
+ Gamxyy[i] * Axy[i] + Gamyyy[i] * Ayy[i] + Gamzyy[i] * Ayz[i]) - chiy[i]*Ayy[i]/chin1[i]; gyzy[i] = gyzy[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
gyzy[i] = gyzy[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i] + Gamxyy[i] * Axz[i] + Gamyyy[i] * Ayz[i] + Gamzyy[i] * Azz[i]) - chiy[i]*Ayz[i]/chin1[i];
+ Gamxyy[i] * Axz[i] + Gamyyy[i] * Ayz[i] + Gamzyy[i] * Azz[i]) - chiy[i]*Ayz[i]/chin1[i]; gzzy[i] = gzzy[i] - ( Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]
gzzy[i] = gzzy[i] - ( Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i] + Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiy[i]*Azz[i]/chin1[i];
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiy[i]*Azz[i]/chin1[i]; gxxz[i] = gxxz[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]
gxxz[i] = gxxz[i] - ( Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i] + Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]) - chiz[i]*Axx[i]/chin1[i];
+ Gamxxz[i] * Axx[i] + Gamyxz[i] * Axy[i] + Gamzxz[i] * Axz[i]) - chiz[i]*Axx[i]/chin1[i]; gxyz[i] = gxyz[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i]
gxyz[i] = gxyz[i] - ( Gamxyz[i] * Axx[i] + Gamyyz[i] * Axy[i] + Gamzyz[i] * Axz[i] + Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]) - chiz[i]*Axy[i]/chin1[i];
+ Gamxxz[i] * Axy[i] + Gamyxz[i] * Ayy[i] + Gamzxz[i] * Ayz[i]) - chiz[i]*Axy[i]/chin1[i]; gxzz[i] = gxzz[i] - ( Gamxzz[i] * Axx[i] + Gamyzz[i] * Axy[i] + Gamzzz[i] * Axz[i]
gxzz[i] = gxzz[i] - ( Gamxzz[i] * Axx[i] + Gamyzz[i] * Axy[i] + Gamzzz[i] * Axz[i] + Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chiz[i]*Axz[i]/chin1[i];
+ Gamxxz[i] * Axz[i] + Gamyxz[i] * Ayz[i] + Gamzxz[i] * Azz[i]) - chiz[i]*Axz[i]/chin1[i]; gyyz[i] = gyyz[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]
gyyz[i] = gyyz[i] - ( Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i] + Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]) - chiz[i]*Ayy[i]/chin1[i];
+ Gamxyz[i] * Axy[i] + Gamyyz[i] * Ayy[i] + Gamzyz[i] * Ayz[i]) - chiz[i]*Ayy[i]/chin1[i]; gyzz[i] = gyzz[i] - ( Gamxzz[i] * Axy[i] + Gamyzz[i] * Ayy[i] + Gamzzz[i] * Ayz[i]
gyzz[i] = gyzz[i] - ( Gamxzz[i] * Axy[i] + Gamyzz[i] * Ayy[i] + Gamzzz[i] * Ayz[i] + Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiz[i]*Ayz[i]/chin1[i];
+ Gamxyz[i] * Axz[i] + Gamyyz[i] * Ayz[i] + Gamzyz[i] * Azz[i]) - chiz[i]*Ayz[i]/chin1[i]; gzzz[i] = gzzz[i] - ( Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]
gzzz[i] = gzzz[i] - ( Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i] + Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]) - chiz[i]*Azz[i]/chin1[i];
+ Gamxzz[i] * Axz[i] + Gamyzz[i] * Ayz[i] + Gamzzz[i] * Azz[i]) - chiz[i]*Azz[i]/chin1[i];
movx_Res[i] = gupxx[i]*gxxx[i] + gupyy[i]*gxyy[i] + gupzz[i]*gxzz[i] movx_Res[i] = gupxx[i]*gxxx[i] + gupyy[i]*gxyy[i] + gupzz[i]*gxzz[i]
+ gupxy[i]*gxyx[i] + gupxz[i]*gxzx[i] + gupyz[i]*gxzy[i] + gupxy[i]*gxyx[i] + gupxz[i]*gxzx[i] + gupyz[i]*gxzy[i]
+ gupxy[i]*gxxy[i] + gupxz[i]*gxxz[i] + gupyz[i]*gxyz[i]; + gupxy[i]*gxxy[i] + gupxz[i]*gxxz[i] + gupyz[i]*gxyz[i];
movy_Res[i] = gupxx[i]*gxyx[i] + gupyy[i]*gyyy[i] + gupzz[i]*gyzz[i] movy_Res[i] = gupxx[i]*gxyx[i] + gupyy[i]*gyyy[i] + gupzz[i]*gyzz[i]
+ gupxy[i]*gyyx[i] + gupxz[i]*gyzx[i] + gupyz[i]*gyzy[i] + gupxy[i]*gyyx[i] + gupxz[i]*gyzx[i] + gupyz[i]*gyzy[i]
+ gupxy[i]*gxyy[i] + gupxz[i]*gxyz[i] + gupyz[i]*gyyz[i]; + gupxy[i]*gxyy[i] + gupxz[i]*gxyz[i] + gupyz[i]*gyyz[i];
movz_Res[i] = gupxx[i]*gxzx[i] + gupyy[i]*gyzy[i] + gupzz[i]*gzzz[i] movz_Res[i] = gupxx[i]*gxzx[i] + gupyy[i]*gyzy[i] + gupzz[i]*gzzz[i]
+ gupxy[i]*gyzx[i] + gupxz[i]*gzzx[i] + gupyz[i]*gzzy[i] + gupxy[i]*gyzx[i] + gupxz[i]*gzzx[i] + gupyz[i]*gzzy[i]
+ gupxy[i]*gxzy[i] + gupxz[i]*gxzz[i] + gupyz[i]*gyzz[i]; + gupxy[i]*gxzy[i] + gupxz[i]*gxzz[i] + gupyz[i]*gyzz[i];
movx_Res[i] = movx_Res[i] - F2o3*Kx[i] - F8*PI*Sx[i]; movx_Res[i] = movx_Res[i] - F2o3*Kx[i] - F8*PI*Sx[i];
movy_Res[i] = movy_Res[i] - F2o3*Ky[i] - F8*PI*Sy[i]; movy_Res[i] = movy_Res[i] - F2o3*Ky[i] - F8*PI*Sy[i];
movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i]; movz_Res[i] = movz_Res[i] - F2o3*Kz[i] - F8*PI*Sz[i];
}
} }

View File

@@ -33,7 +33,7 @@
real*8 :: dX,dY,dZ real*8 :: dX,dY,dZ
real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh
real*8, dimension(3) :: SoA real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: d2dx,d2dy,d2dz real*8 :: d2dx,d2dy,d2dz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1 real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
@@ -137,7 +137,7 @@
real*8 :: dX real*8 :: dX
real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh
real*8, dimension(3) :: SoA real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
real*8 :: d2dx real*8 :: d2dx
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1 real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
@@ -1512,9 +1512,8 @@
real*8 :: dX,dY,dZ real*8 :: dX,dY,dZ
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
real*8, dimension(3) :: SoA real*8, dimension(3) :: SoA
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1, F9=9.d0, F45=4.5d1 real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1, F9=9.d0, F45=4.5d1
@@ -1561,55 +1560,17 @@
fxx = ZEO fxx = ZEO
fyy = ZEO fyy = ZEO
fzz = ZEO fzz = ZEO
fxy = ZEO fxy = ZEO
fxz = ZEO fxz = ZEO
fyz = ZEO fyz = ZEO
i_core_min = max(1, imin+2) do k=1,ex(3)
i_core_max = min(ex(1), imax-2) do j=1,ex(2)
j_core_min = max(1, jmin+2) do i=1,ex(1)
j_core_max = min(ex(2), jmax-2) !~~~~~~ fxx
k_core_min = max(1, kmin+2) if(i+2 <= imax .and. i-2 >= imin)then
k_core_max = min(ex(3), kmax-2) !
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
do k=k_core_min,k_core_max
do j=j_core_min,j_core_max
do i=i_core_min,i_core_max
! interior points always use 4th-order stencils without branch checks
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
enddo
enddo
enddo
endif
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
if(i>=i_core_min .and. i<=i_core_max .and. &
j>=j_core_min .and. j<=j_core_max .and. &
k>=k_core_min .and. k<=k_core_max) cycle
!~~~~~~ fxx
if(i+2 <= imax .and. i-2 >= imin)then
!
! - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2) ! - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2)
! fxx(i) = ---------------------------------------------------------- ! fxx(i) = ----------------------------------------------------------
! 12 dx^2 ! 12 dx^2

View File

@@ -41,8 +41,8 @@ void fdderivs(const int ex[3],
const size_t nz = (size_t)ex3 + 2; const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; static double *fh = NULL;
static thread_local size_t cap = 0; static size_t cap = 0;
if (fh_size > cap) { if (fh_size > cap) {
free(fh); free(fh);
@@ -141,26 +141,12 @@ void fdderivs(const int ex[3],
const int j4_hi = ex2 - 3; const int j4_hi = ex2 - 3;
const int k4_hi = ex3 - 3; const int k4_hi = ex3 - 3;
/*
* Strategy A:
* Avoid redundant work in overlap of 2nd/4th-order regions.
* Only compute 2nd-order on shell points that are NOT overwritten by
* the 4th-order pass.
*/
const int has4 = (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi);
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) { if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) { for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
const int kF = k0 + 1; const int kF = k0 + 1;
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) { for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
const int jF = j0 + 1; const int jF = j0 + 1;
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) { for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
if (has4 &&
i0 >= i4_lo && i0 <= i4_hi &&
j0 >= j4_lo && j0 <= j4_hi &&
k0 >= k4_lo && k0 <= k4_hi) {
continue;
}
const int iF = i0 + 1; const int iF = i0 + 1;
const size_t p = idx_ex(i0, j0, k0, ex); const size_t p = idx_ex(i0, j0, k0, ex);
@@ -207,7 +193,7 @@ void fdderivs(const int ex[3],
} }
} }
if (has4) { if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) { for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
const int kF = k0 + 1; const int kF = k0 + 1;
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) { for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {

View File

@@ -50,8 +50,8 @@ void fderivs(const int ex[3],
const size_t ny = (size_t)ex2 + 2; const size_t ny = (size_t)ex2 + 2;
const size_t nz = (size_t)ex3 + 2; const size_t nz = (size_t)ex3 + 2;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; static double *fh = NULL;
static thread_local size_t cap = 0; static size_t cap = 0;
if (fh_size > cap) { if (fh_size > cap) {
free(fh); free(fh);

View File

@@ -43,13 +43,7 @@ void lopsided_kodis(const int ex[3],
const size_t nz = (size_t)ex3 + 3; const size_t nz = (size_t)ex3 + 3;
const size_t fh_size = nx * ny * nz; const size_t fh_size = nx * ny * nz;
static thread_local double *fh = NULL; double *fh = (double*)malloc(fh_size * sizeof(double));
static thread_local size_t cap = 0;
if (fh_size > cap) {
free(fh);
fh = (double*)aligned_alloc(64, fh_size * sizeof(double));
cap = fh_size;
}
if (!fh) return; if (!fh) return;
symmetry_bd(3, ex, f, fh, SoA); symmetry_bd(3, ex, f, fh, SoA);
@@ -249,4 +243,6 @@ void lopsided_kodis(const int ex[3],
} }
} }
} }
free(fh);
} }

View File

@@ -16,7 +16,7 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument) ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else else
@@ -26,7 +26,7 @@ else
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) $(OPENMP_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif endif
@@ -64,8 +64,8 @@ lopsided_c.o: lopsided_c.C
lopsided_kodis_c.o: lopsided_kodis_c.C lopsided_kodis_c.o: lopsided_kodis_c.C
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS ## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata

View File

@@ -29,9 +29,6 @@ endif
## instrument : PGO Phase 1 instrumentation to collect fresh profile data ## instrument : PGO Phase 1 instrumentation to collect fresh profile data
PGO_MODE ?= opt PGO_MODE ?= opt
## OpenMP switch for C/C++ kernels
OPENMP_FLAGS ?= -qopenmp
## Interp_Points load balance profiling mode ## Interp_Points load balance profiling mode
## off : (default) no load balance instrumentation ## off : (default) no load balance instrumentation
## profile : Pass 1 — instrument Interp_Points to collect timing profile ## profile : Pass 1 — instrument Interp_Points to collect timing profile

View File

@@ -1956,13 +1956,11 @@
real*8,dimension(3) :: CD,FD real*8,dimension(3) :: CD,FD
real*8 :: tmp_yz(extc(1), 6) ! 存储整条 X 线上 6 个 Y 轴偏置的 Z 向插值结果 real*8 :: tmp_yz(extc(1), 6) ! 存储整条 X 线上 6 个 Y 轴偏置的 Z 向插值结果
real*8 :: tmp_xyz_line(-2:extc(1)) ! 包含 X 向 6 点模板访问所需下界 real*8 :: tmp_xyz_line(extc(1)) ! 存储整条 X 线上完成 Y 向融合后的结果
real*8 :: v1, v2, v3, v4, v5, v6 real*8 :: v1, v2, v3, v4, v5, v6
integer :: ic, jc, kc, ix_offset,ix,iy,iz,jc_min,jc_max,ic_min,ic_max,kc_min,kc_max integer :: ic, jc, kc, ix_offset,ix,iy,iz,jc_min,jc_max
integer :: i_lo, i_hi, j_lo, j_hi, k_lo, k_hi
logical :: need_full_symmetry
real*8 :: res_line real*8 :: res_line
real*8 :: tmp_z_slab(-2:extc(1), -2:extc(2)) ! 包含 Y/X 向模板访问所需下界 real*8 :: tmp_z_slab(extc(1), extc(2)) ! 分配在 k 循环外
if(wei.ne.3)then if(wei.ne.3)then
write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension" write(*,*)"prolongrestrict.f90::prolong3: this routine only surport 3 dimension"
write(*,*)"dim = ",wei write(*,*)"dim = ",wei
@@ -2065,41 +2063,24 @@
endif endif
enddo enddo
ic_min = minval(cix(imino:imaxo)) maxcx = maxval(cix(imino:imaxo))
ic_max = maxval(cix(imino:imaxo)) maxcy = maxval(ciy(jmino:jmaxo))
jc_min = minval(ciy(jmino:jmaxo)) maxcz = maxval(ciz(kmino:kmaxo))
jc_max = maxval(ciy(jmino:jmaxo))
kc_min = minval(ciz(kmino:kmaxo))
kc_max = maxval(ciz(kmino:kmaxo))
maxcx = ic_max
maxcy = jc_max
maxcz = kc_max
if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then if(maxcx+3 > extc(1) .or. maxcy+3 > extc(2) .or. maxcz+3 > extc(3))then
write(*,*)"error in prolong" write(*,*)"error in prolong"
return return
endif endif
i_lo = ic_min - 2 call symmetry_bd(3,extc,func,funcc,SoA)
i_hi = ic_max + 3
j_lo = jc_min - 2
j_hi = jc_max + 3
k_lo = kc_min - 2
k_hi = kc_max + 3
need_full_symmetry = (i_lo < 1) .or. (j_lo < 1) .or. (k_lo < 1)
if(need_full_symmetry)then
call symmetry_bd(3,extc,func,funcc,SoA)
else
funcc(i_lo:i_hi,j_lo:j_hi,k_lo:k_hi) = func(i_lo:i_hi,j_lo:j_hi,k_lo:k_hi)
endif
! 对每个 kpz, kc 固定)预计算 Z 向插值的 2D 切片 ! 对每个 kpz, kc 固定)预计算 Z 向插值的 2D 切片
jc_min = minval(ciy(jmino:jmaxo))
jc_max = maxval(ciy(jmino:jmaxo))
do k = kmino, kmaxo do k = kmino, kmaxo
pz = piz(k); kc = ciz(k) pz = piz(k); kc = ciz(k)
! --- Pass 1: Z 方向,只算一次 --- ! --- Pass 1: Z 方向,只算一次 ---
do iy = jc_min-2, jc_max+3 ! 仅需的 iy 范围(对应 jc-2:jc+3 do iy = jc_min-3, jc_max+3 ! 仅需的 iy 范围
do ii = ic_min-2, ic_max+3 ! 仅需的 ii 范围(对应 cix-2:cix+3 do ii = imini-3, imaxi+3 ! 仅需的 ii 范围
tmp_z_slab(ii, iy) = sum(WC(:,pz) * funcc(ii, iy, kc-2:kc+3)) tmp_z_slab(ii, iy) = sum(WC(:,pz) * funcc(ii, iy, kc-2:kc+3))
end do end do
end do end do
@@ -2107,7 +2088,7 @@ do k = kmino, kmaxo
do j = jmino, jmaxo do j = jmino, jmaxo
py = piy(j); jc = ciy(j) py = piy(j); jc = ciy(j)
! --- Pass 2: Y 方向 --- ! --- Pass 2: Y 方向 ---
do ii = ic_min-2, ic_max+3 do ii = imini-3, imaxi+3
tmp_xyz_line(ii) = sum(WC(:,py) * tmp_z_slab(ii, jc-2:jc+3)) tmp_xyz_line(ii) = sum(WC(:,py) * tmp_z_slab(ii, jc-2:jc+3))
end do end do
! --- Pass 3: X 方向 --- ! --- Pass 3: X 方向 ---
@@ -2370,12 +2351,9 @@ end do
real*8,dimension(3) :: CD,FD real*8,dimension(3) :: CD,FD
real*8 :: tmp_xz_plane(-1:extf(1), 6) real*8 :: tmp_xz_plane(extf(1), 6)
real*8 :: tmp_x_line(-1:extf(1)) real*8 :: tmp_x_line(extf(1))
integer :: fi, fj, fk, ii, jj, kk integer :: fi, fj, fk, ii, jj, kk
integer :: fi_min, fi_max, ii_lo, ii_hi
integer :: fj_min, fj_max, fk_min, fk_max, jj_lo, jj_hi, kk_lo, kk_hi
logical :: need_full_symmetry
if(wei.ne.3)then if(wei.ne.3)then
write(*,*)"prolongrestrict.f90::restrict3: this routine only surport 3 dimension" write(*,*)"prolongrestrict.f90::restrict3: this routine only surport 3 dimension"
@@ -2455,34 +2433,7 @@ end do
stop stop
endif endif
! 仅计算 X 向最终写回所需的窗口: call symmetry_bd(2,extf,funf,funff,SoA)
! func(i,j,k) 只访问 tmp_x_line(fi-2:fi+3)
fi_min = 2*(imino + lbc(1) - 1) - 1 - lbf(1) + 1
fi_max = 2*(imaxo + lbc(1) - 1) - 1 - lbf(1) + 1
fj_min = 2*(jmino + lbc(2) - 1) - 1 - lbf(2) + 1
fj_max = 2*(jmaxo + lbc(2) - 1) - 1 - lbf(2) + 1
fk_min = 2*(kmino + lbc(3) - 1) - 1 - lbf(3) + 1
fk_max = 2*(kmaxo + lbc(3) - 1) - 1 - lbf(3) + 1
ii_lo = fi_min - 2
ii_hi = fi_max + 3
jj_lo = fj_min - 2
jj_hi = fj_max + 3
kk_lo = fk_min - 2
kk_hi = fk_max + 3
if(ii_lo < -1 .or. ii_hi > extf(1) .or. &
jj_lo < -1 .or. jj_hi > extf(2) .or. &
kk_lo < -1 .or. kk_hi > extf(3))then
write(*,*)"restrict3: invalid stencil window"
write(*,*)"ii=",ii_lo,ii_hi," jj=",jj_lo,jj_hi," kk=",kk_lo,kk_hi
write(*,*)"extf=",extf
stop
endif
need_full_symmetry = (ii_lo < 1) .or. (jj_lo < 1) .or. (kk_lo < 1)
if(need_full_symmetry)then
call symmetry_bd(2,extf,funf,funff,SoA)
else
funff(ii_lo:ii_hi,jj_lo:jj_hi,kk_lo:kk_hi) = funf(ii_lo:ii_hi,jj_lo:jj_hi,kk_lo:kk_hi)
endif
!~~~~~~> restriction start... !~~~~~~> restriction start...
do k = kmino, kmaxo do k = kmino, kmaxo
@@ -2494,7 +2445,7 @@ do k = kmino, kmaxo
! 优化点 1: 显式展开 Z 方向计算,减少循环开销 ! 优化点 1: 显式展开 Z 方向计算,减少循环开销
! 确保 ii 循环是最内层且连续访问 ! 确保 ii 循环是最内层且连续访问
!DIR$ VECTOR ALWAYS !DIR$ VECTOR ALWAYS
do ii = ii_lo, ii_hi do ii = 1, extf(1)
! 预计算当前 j 对应的 6 行在 Z 方向的压缩结果 ! 预计算当前 j 对应的 6 行在 Z 方向的压缩结果
! 这里直接硬编码 jj 的偏移,彻底消除一层循环 ! 这里直接硬编码 jj 的偏移,彻底消除一层循环
tmp_xz_plane(ii, 1) = C1*(funff(ii,fj-2,fk-2)+funff(ii,fj-2,fk+3)) + & tmp_xz_plane(ii, 1) = C1*(funff(ii,fj-2,fk-2)+funff(ii,fj-2,fk+3)) + &
@@ -2519,7 +2470,7 @@ do k = kmino, kmaxo
! 优化点 2: 同样向量化 Y 方向压缩 ! 优化点 2: 同样向量化 Y 方向压缩
!DIR$ VECTOR ALWAYS !DIR$ VECTOR ALWAYS
do ii = ii_lo, ii_hi do ii = 1, extf(1)
tmp_x_line(ii) = C1*(tmp_xz_plane(ii, 1) + tmp_xz_plane(ii, 6)) + & tmp_x_line(ii) = C1*(tmp_xz_plane(ii, 1) + tmp_xz_plane(ii, 6)) + &
C2*(tmp_xz_plane(ii, 2) + tmp_xz_plane(ii, 5)) + & C2*(tmp_xz_plane(ii, 2) + tmp_xz_plane(ii, 5)) + &
C3*(tmp_xz_plane(ii, 3) + tmp_xz_plane(ii, 4)) C3*(tmp_xz_plane(ii, 3) + tmp_xz_plane(ii, 4))

View File

@@ -317,9 +317,9 @@ void scalar_class::Setup_Initial_Data()
#endif #endif
} }
} }
void scalar_class::Evolve(int Steps) void scalar_class::Evolve(int Steps)
{ {
double prev_clock = 0.0, curr_clock = 0.0; clock_t prev_clock, curr_clock;
double LastDump = 0.0, LastCheck = 0.0; double LastDump = 0.0, LastCheck = 0.0;
LastAnas = 0; LastAnas = 0;
@@ -327,8 +327,8 @@ void scalar_class::Evolve(int Steps)
for (int ncount = 1; ncount < Steps + 1; ncount++) for (int ncount = 1; ncount < Steps + 1; ncount++)
{ {
if (myrank == 0) if (myrank == 0)
curr_clock = MPI_Wtime(); curr_clock = clock();
RecursiveStep(0); RecursiveStep(0);
LastDump += dT_mon; LastDump += dT_mon;
@@ -343,13 +343,13 @@ void scalar_class::Evolve(int Steps)
#endif #endif
LastDump = 0; LastDump = 0;
} }
if (myrank == 0) if (myrank == 0)
{ {
prev_clock = curr_clock; prev_clock = curr_clock;
curr_clock = MPI_Wtime(); curr_clock = clock();
cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime cout << " Timestep # " << ncount << ": integrating to time: " << PhysTime
<< " Computer used " << (curr_clock - prev_clock) << " seconds! " << endl; << " Computer used " << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << " seconds! " << endl;
} }
if (PhysTime >= TotalTime) if (PhysTime >= TotalTime)
break; break;
} }

View File

@@ -9,7 +9,6 @@
import AMSS_NCKU_Input as input_data import AMSS_NCKU_Input as input_data
import os
import subprocess import subprocess
import time import time
@@ -55,44 +54,6 @@ NUMACTL_CPU_BIND = get_last_n_cores_per_socket(n=32)
BUILD_JOBS = 64 BUILD_JOBS = 64
def build_abe_runtime_env():
"""Inject OpenMP runtime settings only for the main ABE evolution run."""
runtime_env = os.environ.copy()
omp_threads = max(1, int(getattr(input_data, "OMP_Threads", 1)))
runtime_env["OMP_NUM_THREADS"] = str(omp_threads)
return runtime_env
def build_twopuncture_runtime_env():
"""Let TwoPunctureABE use the runtime default instead of the ABE OMP override."""
runtime_env = os.environ.copy()
runtime_env.pop("OMP_NUM_THREADS", None)
runtime_env.pop("OMP_THREAD_LIMIT", None)
return runtime_env
def build_mpi_launch_args():
"""Build optional host-distribution arguments for mpirun."""
hosts = list(getattr(input_data, "MPI_hosts", []))
ppn = int(getattr(input_data, "MPI_processes_per_node", 0))
if not hosts:
return ""
if ppn > 0:
expected = len(hosts) * ppn
if int(input_data.MPI_processes) != expected:
raise ValueError(
f"MPI_processes={input_data.MPI_processes} does not match "
f"len(MPI_hosts) * MPI_processes_per_node = {expected}"
)
launch_args = f"-hosts {','.join(hosts)}"
if ppn > 0:
launch_args += f" -ppn {ppn}"
return launch_args
################################################################## ##################################################################
@@ -182,38 +143,19 @@ def run_ABE():
print( ) print( )
print( " Running the AMSS-NCKU executable file ABE/ABEGPU " ) print( " Running the AMSS-NCKU executable file ABE/ABEGPU " )
print( ) print( )
print( f" MPI processes = {input_data.MPI_processes}, OMP threads per process = {max(1, int(getattr(input_data, 'OMP_Threads', 1)))}" )
if getattr(input_data, "MPI_hosts", []):
print( f" MPI hosts = {getattr(input_data, 'MPI_hosts', [])}, MPI ranks per node = {int(getattr(input_data, 'MPI_processes_per_node', 0))}" )
print( " Multi-node runs require the working directory to be visible on all MPI hosts. " )
print( )
## Define the command to run; cast other values to strings as needed ## Define the command to run; cast other values to strings as needed
mpi_launch_args = build_mpi_launch_args()
if (input_data.GPU_Calculation == "no"): if (input_data.GPU_Calculation == "no"):
mpi_command = NUMACTL_CPU_BIND + " mpirun " mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABE"
#mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE" #mpi_command = " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
mpi_command_outfile = "ABE_out.log" mpi_command_outfile = "ABE_out.log"
elif (input_data.GPU_Calculation == "yes"): elif (input_data.GPU_Calculation == "yes"):
mpi_command = NUMACTL_CPU_BIND + " mpirun " mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
if mpi_launch_args:
mpi_command += mpi_launch_args + " "
mpi_command += "-np " + str(input_data.MPI_processes) + " ./ABEGPU"
mpi_command_outfile = "ABEGPU_out.log" mpi_command_outfile = "ABEGPU_out.log"
## Execute the MPI command and stream output ## Execute the MPI command and stream output
mpi_process = subprocess.Popen( mpi_process = subprocess.Popen(mpi_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
mpi_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_abe_runtime_env(),
)
## Write ABE run output to file while printing to stdout ## Write ABE run output to file while printing to stdout
with open(mpi_command_outfile, 'w') as file0: with open(mpi_command_outfile, 'w') as file0:
@@ -253,14 +195,7 @@ def run_TwoPunctureABE():
TwoPuncture_command_outfile = "TwoPunctureABE_out.log" TwoPuncture_command_outfile = "TwoPunctureABE_out.log"
## Execute the command with subprocess.Popen and stream output ## Execute the command with subprocess.Popen and stream output
TwoPuncture_process = subprocess.Popen( TwoPuncture_process = subprocess.Popen(TwoPuncture_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
TwoPuncture_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
env=build_twopuncture_runtime_env(),
)
## Write TwoPunctureABE run output to file while printing to stdout ## Write TwoPunctureABE run output to file while printing to stdout
with open(TwoPuncture_command_outfile, 'w') as file0: with open(TwoPuncture_command_outfile, 'w') as file0:

View File

@@ -65,14 +65,10 @@ def print_input_data( File_directory ):
print( "------------------------------------------------------------------------------------------" ) print( "------------------------------------------------------------------------------------------" )
print( ) print( )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " ) print( " Printing the basic parameter and setting in the AMSS-NCKU simulation " )
print( ) print( )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads ) print( )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node )
print( )
print( " The form of computational equation = ", input_data.Equation_Class ) print( " The form of computational equation = ", input_data.Equation_Class )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method )
print( ) print( )
@@ -144,14 +140,10 @@ def print_input_data( File_directory ):
file0 = open(filepath, 'w') file0 = open(filepath, 'w')
print( file=file0 ) print( file=file0 )
print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 ) print( " Printing the basic parameter and setting in the AMSS-NCKU simulation ", file=file0 )
print( file=file0 ) print( file=file0 )
print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 ) print( " The number of MPI processes in the AMSS-NCKU simulation = ", input_data.MPI_processes, file=file0 )
print( " The number of OMP threads per MPI process = ", input_data.OMP_Threads, file=file0 ) print( file=file0 )
if getattr(input_data, "MPI_hosts", []):
print( " The MPI host list in the AMSS-NCKU simulation = ", input_data.MPI_hosts, file=file0 )
print( " The number of MPI ranks launched per host = ", input_data.MPI_processes_per_node, file=file0 )
print( file=file0 )
print( " The form of computational equation = ", input_data.Equation_Class, file=file0 ) print( " The form of computational equation = ", input_data.Equation_Class, file=file0 )
print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 ) print( " The initial data in this simulation = ", input_data.Initial_Data_Method, file=file0 )
print( file=file0 ) print( file=file0 )