Compare commits
56 Commits
chb-rebase
...
cjy-goldst
| Author | SHA1 | Date | |
|---|---|---|---|
| 9c31384b2f | |||
| e4e741caa1 | |||
| 65e0f95f40 | |||
| f9fbf97e64 | |||
| 968522995b | |||
| f3988ac8ca | |||
| e4c25eb21f | |||
| 4b10519876 | |||
| 3a58273501 | |||
| 5c65cea2f0 | |||
| 8c1f4d8108 | |||
| d310ef918b | |||
| b35e1b289f | |||
| 05851b2c59 | |||
| 3b39583d67 | |||
| 688bdb6708 | |||
| 5070134857 | |||
| 4012e9d068 | |||
| b3c367f15b | |||
| e73911f292 | |||
| 7543d3e8c7 | |||
| 42c69fab24 | |||
| 95220a05c8 | |||
| 466b084a58 | |||
| 61ccef9f97 | |||
| e11363e06e | |||
| f70e90f694 | |||
|
|
75dd5353b0 | ||
|
|
23a82d063b | ||
| 524d1d1512 | |||
| 44efb2e08c | |||
| 16013081e0 | |||
| 03416a7b28 | |||
| cca3c16c2b | |||
| e5231849ee | |||
| a766e49ff0 | |||
| 1a518cd3f6 | |||
| 1dc622e516 | |||
| 3046a0ccde | |||
| d4ec69c98a | |||
| 2c0a3055d4 | |||
| 1eba73acbe | |||
| b91cfff301 | |||
| e29ca2dca9 | |||
| 6493101ca0 | |||
| 169986cde1 | |||
| 1fbc213888 | |||
| 6024708a48 | |||
| bc457d981e | |||
| 51dead090e | |||
| 34d6922a66 | |||
| 8010ad27ed | |||
| 38e691f013 | |||
| 808387aa11 | |||
| c2b676abf2 | |||
| 2c60533501 |
@@ -4320,7 +4320,7 @@ Parallel::SyncCache::SyncCache()
|
|||||||
: valid(false), cpusize(0), combined_src(0), combined_dst(0),
|
: valid(false), cpusize(0), combined_src(0), combined_dst(0),
|
||||||
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
|
send_lengths(0), recv_lengths(0), send_bufs(0), recv_bufs(0),
|
||||||
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
|
send_buf_caps(0), recv_buf_caps(0), reqs(0), stats(0), max_reqs(0),
|
||||||
lengths_valid(false)
|
lengths_valid(false), tc_req_node(0), tc_req_is_recv(0), tc_completed(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
// SyncCache invalidate: free grid segment lists but keep buffers
|
// SyncCache invalidate: free grid segment lists but keep buffers
|
||||||
@@ -4359,11 +4359,15 @@ void Parallel::SyncCache::destroy()
|
|||||||
if (recv_bufs) delete[] recv_bufs;
|
if (recv_bufs) delete[] recv_bufs;
|
||||||
if (reqs) delete[] reqs;
|
if (reqs) delete[] reqs;
|
||||||
if (stats) delete[] stats;
|
if (stats) delete[] stats;
|
||||||
|
if (tc_req_node) delete[] tc_req_node;
|
||||||
|
if (tc_req_is_recv) delete[] tc_req_is_recv;
|
||||||
|
if (tc_completed) delete[] tc_completed;
|
||||||
combined_src = combined_dst = 0;
|
combined_src = combined_dst = 0;
|
||||||
send_lengths = recv_lengths = 0;
|
send_lengths = recv_lengths = 0;
|
||||||
send_buf_caps = recv_buf_caps = 0;
|
send_buf_caps = recv_buf_caps = 0;
|
||||||
send_bufs = recv_bufs = 0;
|
send_bufs = recv_bufs = 0;
|
||||||
reqs = 0; stats = 0;
|
reqs = 0; stats = 0;
|
||||||
|
tc_req_node = 0; tc_req_is_recv = 0; tc_completed = 0;
|
||||||
cpusize = 0; max_reqs = 0;
|
cpusize = 0; max_reqs = 0;
|
||||||
}
|
}
|
||||||
// transfer_cached: reuse pre-allocated buffers from SyncCache
|
// transfer_cached: reuse pre-allocated buffers from SyncCache
|
||||||
@@ -4379,9 +4383,9 @@ void Parallel::transfer_cached(MyList<Parallel::gridseg> **src, MyList<Parallel:
|
|||||||
int req_no = 0;
|
int req_no = 0;
|
||||||
int pending_recv = 0;
|
int pending_recv = 0;
|
||||||
int node;
|
int node;
|
||||||
int *req_node = new int[cache.max_reqs];
|
int *req_node = cache.tc_req_node;
|
||||||
int *req_is_recv = new int[cache.max_reqs];
|
int *req_is_recv = cache.tc_req_is_recv;
|
||||||
int *completed = new int[cache.max_reqs];
|
int *completed = cache.tc_completed;
|
||||||
|
|
||||||
// Post receives first so peers can progress rendezvous early.
|
// Post receives first so peers can progress rendezvous early.
|
||||||
for (node = 0; node < cpusize; node++)
|
for (node = 0; node < cpusize; node++)
|
||||||
@@ -4466,12 +4470,7 @@ void Parallel::transfer_cached(MyList<Parallel::gridseg> **src, MyList<Parallel:
|
|||||||
|
|
||||||
if (self_len > 0)
|
if (self_len > 0)
|
||||||
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
|
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList1, VarList2, Symmetry);
|
||||||
|
|
||||||
delete[] req_node;
|
|
||||||
delete[] req_is_recv;
|
|
||||||
delete[] completed;
|
|
||||||
}
|
}
|
||||||
// Sync_cached: build grid segment lists on first call, reuse on subsequent calls
|
|
||||||
void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache)
|
void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry, SyncCache &cache)
|
||||||
{
|
{
|
||||||
if (!cache.valid)
|
if (!cache.valid)
|
||||||
@@ -4499,6 +4498,9 @@ void Parallel::Sync_cached(MyList<Patch> *PatL, MyList<var> *VarList, int Symmet
|
|||||||
cache.max_reqs = 2 * cpusize;
|
cache.max_reqs = 2 * cpusize;
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
cache.stats = new MPI_Status[cache.max_reqs];
|
||||||
|
cache.tc_req_node = new int[cache.max_reqs];
|
||||||
|
cache.tc_req_is_recv = new int[cache.max_reqs];
|
||||||
|
cache.tc_completed = new int[cache.max_reqs];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
for (int node = 0; node < cpusize; node++)
|
||||||
@@ -4599,6 +4601,9 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
cache.max_reqs = 2 * cpusize;
|
cache.max_reqs = 2 * cpusize;
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
cache.stats = new MPI_Status[cache.max_reqs];
|
||||||
|
cache.tc_req_node = new int[cache.max_reqs];
|
||||||
|
cache.tc_req_is_recv = new int[cache.max_reqs];
|
||||||
|
cache.tc_completed = new int[cache.max_reqs];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
for (int node = 0; node < cpusize; node++)
|
||||||
@@ -4669,6 +4674,11 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
int cpusize = cache.cpusize;
|
int cpusize = cache.cpusize;
|
||||||
state.req_no = 0;
|
state.req_no = 0;
|
||||||
state.active = true;
|
state.active = true;
|
||||||
|
state.pending_recv = 0;
|
||||||
|
// Allocate tracking arrays
|
||||||
|
delete[] state.req_node; delete[] state.req_is_recv;
|
||||||
|
state.req_node = new int[cache.max_reqs];
|
||||||
|
state.req_is_recv = new int[cache.max_reqs];
|
||||||
|
|
||||||
MyList<Parallel::gridseg> **src = cache.combined_src;
|
MyList<Parallel::gridseg> **src = cache.combined_src;
|
||||||
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
||||||
@@ -4713,6 +4723,8 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
cache.send_buf_caps[node] = slength;
|
cache.send_buf_caps[node] = slength;
|
||||||
}
|
}
|
||||||
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
data_packer(cache.send_bufs[node], src[myrank], dst[myrank], node, PACK, VarList, VarList, Symmetry);
|
||||||
|
state.req_node[state.req_no] = node;
|
||||||
|
state.req_is_recv[state.req_no] = 0;
|
||||||
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
MPI_Isend((void *)cache.send_bufs[node], slength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||||||
}
|
}
|
||||||
int rlength;
|
int rlength;
|
||||||
@@ -4730,29 +4742,60 @@ void Parallel::Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetr
|
|||||||
cache.recv_bufs[node] = new double[rlength];
|
cache.recv_bufs[node] = new double[rlength];
|
||||||
cache.recv_buf_caps[node] = rlength;
|
cache.recv_buf_caps[node] = rlength;
|
||||||
}
|
}
|
||||||
|
state.req_node[state.req_no] = node;
|
||||||
|
state.req_is_recv[state.req_no] = 1;
|
||||||
|
state.pending_recv++;
|
||||||
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
MPI_Irecv((void *)cache.recv_bufs[node], rlength, MPI_DOUBLE, node, 2, MPI_COMM_WORLD, cache.reqs + state.req_no++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cache.lengths_valid = true;
|
cache.lengths_valid = true;
|
||||||
}
|
}
|
||||||
// Sync_finish: wait for async MPI operations and unpack
|
// Sync_finish: progressive unpack as receives complete, then wait for sends
|
||||||
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
void Parallel::Sync_finish(SyncCache &cache, AsyncSyncState &state,
|
||||||
MyList<var> *VarList, int Symmetry)
|
MyList<var> *VarList, int Symmetry)
|
||||||
{
|
{
|
||||||
if (!state.active)
|
if (!state.active)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
MPI_Waitall(state.req_no, cache.reqs, cache.stats);
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
int cpusize = cache.cpusize;
|
|
||||||
MyList<Parallel::gridseg> **src = cache.combined_src;
|
MyList<Parallel::gridseg> **src = cache.combined_src;
|
||||||
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
MyList<Parallel::gridseg> **dst = cache.combined_dst;
|
||||||
|
|
||||||
for (int node = 0; node < cpusize; node++)
|
// Unpack local data first (no MPI needed)
|
||||||
if (cache.recv_bufs[node] && cache.recv_lengths[node] > 0)
|
if (cache.recv_bufs[myrank] && cache.recv_lengths[myrank] > 0)
|
||||||
data_packer(cache.recv_bufs[node], src[node], dst[node], node, UNPACK, VarList, VarList, Symmetry);
|
data_packer(cache.recv_bufs[myrank], src[myrank], dst[myrank], myrank, UNPACK, VarList, VarList, Symmetry);
|
||||||
|
|
||||||
|
// Progressive unpack of remote receives
|
||||||
|
if (state.pending_recv > 0 && state.req_no > 0)
|
||||||
|
{
|
||||||
|
int pending = state.pending_recv;
|
||||||
|
int *completed = new int[cache.max_reqs];
|
||||||
|
while (pending > 0)
|
||||||
|
{
|
||||||
|
int outcount = 0;
|
||||||
|
MPI_Waitsome(state.req_no, cache.reqs, &outcount, completed, cache.stats);
|
||||||
|
if (outcount == MPI_UNDEFINED) break;
|
||||||
|
for (int i = 0; i < outcount; i++)
|
||||||
|
{
|
||||||
|
int idx = completed[i];
|
||||||
|
if (idx >= 0 && state.req_is_recv[idx])
|
||||||
|
{
|
||||||
|
int recv_node = state.req_node[idx];
|
||||||
|
data_packer(cache.recv_bufs[recv_node], src[recv_node], dst[recv_node], recv_node, UNPACK, VarList, VarList, Symmetry);
|
||||||
|
pending--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[] completed;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for remaining sends
|
||||||
|
if (state.req_no > 0) MPI_Waitall(state.req_no, cache.reqs, cache.stats);
|
||||||
|
|
||||||
|
delete[] state.req_node; state.req_node = 0;
|
||||||
|
delete[] state.req_is_recv; state.req_is_recv = 0;
|
||||||
state.active = false;
|
state.active = false;
|
||||||
}
|
}
|
||||||
// collect buffer grid segments or blocks for the periodic boundary condition of given patch
|
// collect buffer grid segments or blocks for the periodic boundary condition of given patch
|
||||||
@@ -5241,6 +5284,41 @@ double Parallel::L2Norm(Patch *Pat, var *vf)
|
|||||||
|
|
||||||
return tvf;
|
return tvf;
|
||||||
}
|
}
|
||||||
|
void Parallel::L2Norm7(Patch *Pat, var **vf, double *norms)
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
|
||||||
|
double tvf[7], dtvf[7];
|
||||||
|
int BDW = ghost_width;
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] = 0;
|
||||||
|
|
||||||
|
MyList<Block> *BP = Pat->blb;
|
||||||
|
while (BP)
|
||||||
|
{
|
||||||
|
Block *cg = BP->data;
|
||||||
|
if (myrank == cg->rank)
|
||||||
|
{
|
||||||
|
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||||
|
Pat->bbox[0], Pat->bbox[1], Pat->bbox[2],
|
||||||
|
Pat->bbox[3], Pat->bbox[4], Pat->bbox[5],
|
||||||
|
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
|
||||||
|
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
|
||||||
|
cg->fgfs[vf[6]->sgfn], tvf, BDW);
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] += tvf[i];
|
||||||
|
}
|
||||||
|
if (BP == Pat->ble)
|
||||||
|
break;
|
||||||
|
BP = BP->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
norms[i] = sqrt(tvf[i]);
|
||||||
|
}
|
||||||
double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here)
|
double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here)
|
||||||
{
|
{
|
||||||
int myrank;
|
int myrank;
|
||||||
@@ -5272,6 +5350,41 @@ double Parallel::L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here)
|
|||||||
|
|
||||||
return tvf;
|
return tvf;
|
||||||
}
|
}
|
||||||
|
void Parallel::L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here)
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
|
||||||
|
double tvf[7], dtvf[7];
|
||||||
|
int BDW = ghost_width;
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] = 0;
|
||||||
|
|
||||||
|
MyList<Block> *BP = Pat->blb;
|
||||||
|
while (BP)
|
||||||
|
{
|
||||||
|
Block *cg = BP->data;
|
||||||
|
if (myrank == cg->rank)
|
||||||
|
{
|
||||||
|
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||||
|
Pat->bbox[0], Pat->bbox[1], Pat->bbox[2],
|
||||||
|
Pat->bbox[3], Pat->bbox[4], Pat->bbox[5],
|
||||||
|
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
|
||||||
|
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
|
||||||
|
cg->fgfs[vf[6]->sgfn], tvf, BDW);
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] += tvf[i];
|
||||||
|
}
|
||||||
|
if (BP == Pat->ble)
|
||||||
|
break;
|
||||||
|
BP = BP->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, Comm_here);
|
||||||
|
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
norms[i] = sqrt(tvf[i]);
|
||||||
|
}
|
||||||
void Parallel::checkgsl(MyList<Parallel::gridseg> *pp, bool first_only)
|
void Parallel::checkgsl(MyList<Parallel::gridseg> *pp, bool first_only)
|
||||||
{
|
{
|
||||||
int myrank = 0;
|
int myrank = 0;
|
||||||
@@ -5819,6 +5932,9 @@ void Parallel::Restrict_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|||||||
cache.max_reqs = 2 * cpusize;
|
cache.max_reqs = 2 * cpusize;
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
cache.stats = new MPI_Status[cache.max_reqs];
|
||||||
|
cache.tc_req_node = new int[cache.max_reqs];
|
||||||
|
cache.tc_req_is_recv = new int[cache.max_reqs];
|
||||||
|
cache.tc_completed = new int[cache.max_reqs];
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
|
MyList<Parallel::gridseg> *dst = build_complete_gsl(PatcL);
|
||||||
@@ -5865,6 +5981,9 @@ void Parallel::OutBdLow2Hi_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|||||||
cache.max_reqs = 2 * cpusize;
|
cache.max_reqs = 2 * cpusize;
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
cache.stats = new MPI_Status[cache.max_reqs];
|
||||||
|
cache.tc_req_node = new int[cache.max_reqs];
|
||||||
|
cache.tc_req_is_recv = new int[cache.max_reqs];
|
||||||
|
cache.tc_completed = new int[cache.max_reqs];
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||||||
@@ -5911,6 +6030,9 @@ void Parallel::OutBdLow2Himix_cached(MyList<Patch> *PatcL, MyList<Patch> *PatfL,
|
|||||||
cache.max_reqs = 2 * cpusize;
|
cache.max_reqs = 2 * cpusize;
|
||||||
cache.reqs = new MPI_Request[cache.max_reqs];
|
cache.reqs = new MPI_Request[cache.max_reqs];
|
||||||
cache.stats = new MPI_Status[cache.max_reqs];
|
cache.stats = new MPI_Status[cache.max_reqs];
|
||||||
|
cache.tc_req_node = new int[cache.max_reqs];
|
||||||
|
cache.tc_req_is_recv = new int[cache.max_reqs];
|
||||||
|
cache.tc_completed = new int[cache.max_reqs];
|
||||||
}
|
}
|
||||||
|
|
||||||
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
MyList<Parallel::gridseg> *dst = build_buffer_gsl(PatfL);
|
||||||
|
|||||||
@@ -108,6 +108,9 @@ namespace Parallel
|
|||||||
MPI_Status *stats;
|
MPI_Status *stats;
|
||||||
int max_reqs;
|
int max_reqs;
|
||||||
bool lengths_valid;
|
bool lengths_valid;
|
||||||
|
int *tc_req_node;
|
||||||
|
int *tc_req_is_recv;
|
||||||
|
int *tc_completed;
|
||||||
SyncCache();
|
SyncCache();
|
||||||
void invalidate();
|
void invalidate();
|
||||||
void destroy();
|
void destroy();
|
||||||
@@ -121,7 +124,10 @@ namespace Parallel
|
|||||||
struct AsyncSyncState {
|
struct AsyncSyncState {
|
||||||
int req_no;
|
int req_no;
|
||||||
bool active;
|
bool active;
|
||||||
AsyncSyncState() : req_no(0), active(false) {}
|
int *req_node;
|
||||||
|
int *req_is_recv;
|
||||||
|
int pending_recv;
|
||||||
|
AsyncSyncState() : req_no(0), active(false), req_node(0), req_is_recv(0), pending_recv(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
void Sync_start(MyList<Patch> *PatL, MyList<var> *VarList, int Symmetry,
|
||||||
@@ -177,6 +183,7 @@ namespace Parallel
|
|||||||
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
MyList<Parallel::gridseg> **out_src, MyList<Parallel::gridseg> **out_dst);
|
||||||
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
void PeriodicBD(Patch *Pat, MyList<var> *VarList, int Symmetry);
|
||||||
double L2Norm(Patch *Pat, var *vf);
|
double L2Norm(Patch *Pat, var *vf);
|
||||||
|
void L2Norm7(Patch *Pat, var **vf, double *norms);
|
||||||
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
void checkgsl(MyList<Parallel::gridseg> *pp, bool first_only);
|
||||||
void checkvarl(MyList<var> *pp, bool first_only);
|
void checkvarl(MyList<var> *pp, bool first_only);
|
||||||
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
MyList<Parallel::gridseg> *divide_gsl(MyList<Parallel::gridseg> *p, Patch *Pat);
|
||||||
@@ -212,6 +219,7 @@ namespace Parallel
|
|||||||
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
void checkpatchlist(MyList<Patch> *PatL, bool buflog);
|
||||||
|
|
||||||
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
double L2Norm(Patch *Pat, var *vf, MPI_Comm Comm_here);
|
||||||
|
void L2Norm7(Patch *Pat, var **vf, double *norms, MPI_Comm Comm_here);
|
||||||
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
bool PatList_Interp_Points(MyList<Patch> *PatL, MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
double *Shellf, int Symmetry, MPI_Comm Comm_here);
|
||||||
|
|||||||
@@ -3472,6 +3472,43 @@ double ShellPatch::L2Norm(var *vf)
|
|||||||
|
|
||||||
return tvf;
|
return tvf;
|
||||||
}
|
}
|
||||||
|
void ShellPatch::L2Norm7(var **vf, double *norms)
|
||||||
|
{
|
||||||
|
double tvf[7], dtvf[7];
|
||||||
|
int BDW = overghost;
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] = 0;
|
||||||
|
|
||||||
|
MyList<ss_patch> *sPp = PatL;
|
||||||
|
while (sPp)
|
||||||
|
{
|
||||||
|
MyList<Block> *Bp = sPp->data->blb;
|
||||||
|
while (Bp)
|
||||||
|
{
|
||||||
|
Block *cg = Bp->data;
|
||||||
|
if (myrank == cg->rank)
|
||||||
|
{
|
||||||
|
f_l2normhelper7(cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||||
|
sPp->data->bbox[0], sPp->data->bbox[1], sPp->data->bbox[2],
|
||||||
|
sPp->data->bbox[3], sPp->data->bbox[4], sPp->data->bbox[5],
|
||||||
|
cg->fgfs[vf[0]->sgfn], cg->fgfs[vf[1]->sgfn], cg->fgfs[vf[2]->sgfn],
|
||||||
|
cg->fgfs[vf[3]->sgfn], cg->fgfs[vf[4]->sgfn], cg->fgfs[vf[5]->sgfn],
|
||||||
|
cg->fgfs[vf[6]->sgfn], tvf, BDW);
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
dtvf[i] += tvf[i];
|
||||||
|
}
|
||||||
|
if (Bp == sPp->data->ble)
|
||||||
|
break;
|
||||||
|
Bp = Bp->next;
|
||||||
|
}
|
||||||
|
sPp = sPp->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(dtvf, tvf, 7, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
for (int i = 0; i < 7; i++)
|
||||||
|
norms[i] = sqrt(tvf[i]);
|
||||||
|
}
|
||||||
|
|
||||||
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
// find maximum of abstract value, XX store position for maximum, Shellf store maximum themselvs
|
||||||
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
void ShellPatch::Find_Maximum(MyList<var> *VarList, double *XX,
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ public:
|
|||||||
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
void write_Pablo_file_ss(int *ext, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax,
|
||||||
char *filename, int sst);
|
char *filename, int sst);
|
||||||
double L2Norm(var *vf);
|
double L2Norm(var *vf);
|
||||||
|
void L2Norm7(var **vf, double *norms);
|
||||||
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
void Find_Maximum(MyList<var> *VarList, double *XX, double *Shellf);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,239 @@ using namespace std;
|
|||||||
#include "derivatives.h"
|
#include "derivatives.h"
|
||||||
#include "ricci_gamma.h"
|
#include "ricci_gamma.h"
|
||||||
|
|
||||||
|
// Compile-time switch for per-timestep memory usage collection/printing.
|
||||||
|
// Default is OFF to reduce overhead in production runs.
|
||||||
|
#ifndef BSSN_ENABLE_MEM_USAGE_LOG
|
||||||
|
#define BSSN_ENABLE_MEM_USAGE_LOG 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_FINE_TIMING
|
||||||
|
#define BSSN_FINE_TIMING 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_FINE_TIMING_EVERY
|
||||||
|
#define BSSN_FINE_TIMING_EVERY 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_FINE_TIMING_TOPN
|
||||||
|
#define BSSN_FINE_TIMING_TOPN 8
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_KERNEL_FINE_TIMING
|
||||||
|
#define BSSN_KERNEL_FINE_TIMING 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
|
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if BSSN_FINE_TIMING
|
||||||
|
namespace step_timing
|
||||||
|
{
|
||||||
|
enum Bucket
|
||||||
|
{
|
||||||
|
TB_ANALYSIS_PSI4 = 0,
|
||||||
|
TB_ANALYSIS_SURFACE,
|
||||||
|
TB_ANALYSIS_IO,
|
||||||
|
TB_BH_PREDICTOR,
|
||||||
|
TB_PREDICTOR_RHS,
|
||||||
|
TB_PREDICTOR_SYNC,
|
||||||
|
TB_BH_CORRECTOR,
|
||||||
|
TB_CORRECTOR_RHS,
|
||||||
|
TB_CORRECTOR_SYNC,
|
||||||
|
TB_STATE_SWAP,
|
||||||
|
TB_RESTRICT_PROLONG,
|
||||||
|
TB_CONSTRAINT_OUT,
|
||||||
|
TB_DUMP_3D,
|
||||||
|
TB_DUMP_2D,
|
||||||
|
TB_CHECKPOINT,
|
||||||
|
TB_REGRID,
|
||||||
|
TB_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
|
static double local_bucket_seconds[TB_COUNT];
|
||||||
|
|
||||||
|
static const char *bucket_labels[TB_COUNT] =
|
||||||
|
{
|
||||||
|
"analysis_psi4",
|
||||||
|
"analysis_surface",
|
||||||
|
"analysis_io",
|
||||||
|
"bh_predictor",
|
||||||
|
"predictor_rhs",
|
||||||
|
"predictor_sync",
|
||||||
|
"bh_corrector",
|
||||||
|
"corrector_rhs",
|
||||||
|
"corrector_sync",
|
||||||
|
"state_swap",
|
||||||
|
"restrict_prolong",
|
||||||
|
"constraint_out",
|
||||||
|
"dump_3d",
|
||||||
|
"dump_2d",
|
||||||
|
"checkpoint",
|
||||||
|
"regrid"
|
||||||
|
};
|
||||||
|
|
||||||
|
void reset()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < TB_COUNT; i++)
|
||||||
|
local_bucket_seconds[i] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(Bucket bucket, double seconds)
|
||||||
|
{
|
||||||
|
local_bucket_seconds[int(bucket)] += seconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
void report(int myrank, int nprocs, monitor *TimingMonitor,
|
||||||
|
int step_index, double phys_time, double step_wall_seconds)
|
||||||
|
{
|
||||||
|
double max_bucket_seconds[TB_COUNT];
|
||||||
|
double avg_bucket_seconds[TB_COUNT];
|
||||||
|
|
||||||
|
MPI_Reduce(local_bucket_seconds, max_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Reduce(local_bucket_seconds, avg_bucket_seconds, TB_COUNT, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (myrank != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (int i = 0; i < TB_COUNT; i++)
|
||||||
|
avg_bucket_seconds[i] /= Mymax(1, nprocs);
|
||||||
|
|
||||||
|
if (TimingMonitor)
|
||||||
|
{
|
||||||
|
double row[2 + 2 * TB_COUNT];
|
||||||
|
row[0] = double(step_index);
|
||||||
|
row[1] = step_wall_seconds;
|
||||||
|
for (int i = 0; i < TB_COUNT; i++)
|
||||||
|
{
|
||||||
|
row[2 + i] = max_bucket_seconds[i];
|
||||||
|
row[2 + TB_COUNT + i] = avg_bucket_seconds[i];
|
||||||
|
}
|
||||||
|
TimingMonitor->writefile(phys_time, 2 + 2 * TB_COUNT, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
double residual = step_wall_seconds;
|
||||||
|
for (int i = 0; i < TB_COUNT; i++)
|
||||||
|
residual -= max_bucket_seconds[i];
|
||||||
|
if (residual < 0.0)
|
||||||
|
residual = 0.0;
|
||||||
|
|
||||||
|
int order[TB_COUNT];
|
||||||
|
for (int i = 0; i < TB_COUNT; i++)
|
||||||
|
order[i] = i;
|
||||||
|
|
||||||
|
for (int i = 0; i < TB_COUNT - 1; i++)
|
||||||
|
for (int j = i + 1; j < TB_COUNT; j++)
|
||||||
|
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
|
||||||
|
{
|
||||||
|
int tmp = order[i];
|
||||||
|
order[i] = order[j];
|
||||||
|
order[j] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
ios::fmtflags old_flags = cout.flags();
|
||||||
|
streamsize old_precision = cout.precision();
|
||||||
|
|
||||||
|
cout << " Fine timing hot spots (max rank wall estimate):" << endl;
|
||||||
|
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, TB_COUNT);
|
||||||
|
for (int i = 0; i < topn; i++)
|
||||||
|
{
|
||||||
|
const int ib = order[i];
|
||||||
|
const double frac = (step_wall_seconds > 0.0) ? (100.0 * max_bucket_seconds[ib] / step_wall_seconds) : 0.0;
|
||||||
|
cout << " "
|
||||||
|
<< setw(20) << left << bucket_labels[ib]
|
||||||
|
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
|
||||||
|
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
|
||||||
|
}
|
||||||
|
if (residual > 1.0e-6)
|
||||||
|
{
|
||||||
|
const double frac = (step_wall_seconds > 0.0) ? (100.0 * residual / step_wall_seconds) : 0.0;
|
||||||
|
cout << " "
|
||||||
|
<< setw(20) << left << "unprofiled_residual"
|
||||||
|
<< " = " << setw(10) << right << setprecision(6) << residual
|
||||||
|
<< " s (" << setw(6) << setprecision(4) << frac << "%)" << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
cout.flags(old_flags);
|
||||||
|
cout.precision(old_precision);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define STEP_TIMER_DECL(var_name) const double var_name = MPI_Wtime()
|
||||||
|
#define STEP_TIMER_ADD(bucket_name, var_name) step_timing::add(step_timing::bucket_name, MPI_Wtime() - (var_name))
|
||||||
|
#else
|
||||||
|
#define STEP_TIMER_DECL(var_name)
|
||||||
|
#define STEP_TIMER_ADD(bucket_name, var_name)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
namespace rhs_kernel_timing_report
|
||||||
|
{
|
||||||
|
void report(int myrank, int nprocs, int step_index, double step_wall_seconds)
|
||||||
|
{
|
||||||
|
const int bucket_count = f_bssn_rhs_kernel_timing_bucket_count();
|
||||||
|
const double *local_bucket_seconds = f_bssn_rhs_kernel_timing_local_seconds();
|
||||||
|
|
||||||
|
if (bucket_count <= 0 || !local_bucket_seconds)
|
||||||
|
return;
|
||||||
|
|
||||||
|
double *max_bucket_seconds = new double[bucket_count];
|
||||||
|
double *avg_bucket_seconds = new double[bucket_count];
|
||||||
|
int *order = new int[bucket_count];
|
||||||
|
|
||||||
|
MPI_Reduce((void *)local_bucket_seconds, max_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
|
||||||
|
MPI_Reduce((void *)local_bucket_seconds, avg_bucket_seconds, bucket_count, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||||
|
|
||||||
|
if (myrank == 0)
|
||||||
|
{
|
||||||
|
double kernel_total = 0.0;
|
||||||
|
for (int i = 0; i < bucket_count; ++i)
|
||||||
|
{
|
||||||
|
avg_bucket_seconds[i] /= Mymax(1, nprocs);
|
||||||
|
order[i] = i;
|
||||||
|
kernel_total += max_bucket_seconds[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < bucket_count - 1; ++i)
|
||||||
|
for (int j = i + 1; j < bucket_count; ++j)
|
||||||
|
if (max_bucket_seconds[order[j]] > max_bucket_seconds[order[i]])
|
||||||
|
{
|
||||||
|
int tmp = order[i];
|
||||||
|
order[i] = order[j];
|
||||||
|
order[j] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
ios::fmtflags old_flags = cout.flags();
|
||||||
|
streamsize old_precision = cout.precision();
|
||||||
|
|
||||||
|
const double kernel_frac = (step_wall_seconds > 0.0) ? (100.0 * kernel_total / step_wall_seconds) : 0.0;
|
||||||
|
cout << " RHS kernel split (max-rank accumulated over step " << step_index << "): total "
|
||||||
|
<< setprecision(6) << kernel_total << " s (" << setprecision(4)
|
||||||
|
<< kernel_frac << "% of coarse step)" << endl;
|
||||||
|
|
||||||
|
const int topn = Mymin(BSSN_FINE_TIMING_TOPN, bucket_count);
|
||||||
|
for (int i = 0; i < topn; ++i)
|
||||||
|
{
|
||||||
|
const int ib = order[i];
|
||||||
|
const double frac = (kernel_total > 0.0) ? (100.0 * max_bucket_seconds[ib] / kernel_total) : 0.0;
|
||||||
|
cout << " "
|
||||||
|
<< setw(20) << left << f_bssn_rhs_kernel_timing_label(ib)
|
||||||
|
<< " = " << setw(10) << right << setprecision(6) << max_bucket_seconds[ib]
|
||||||
|
<< " s (" << setw(6) << setprecision(4) << frac << "% of kernel)" << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
cout.flags(old_flags);
|
||||||
|
cout.precision(old_precision);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] max_bucket_seconds;
|
||||||
|
delete[] avg_bucket_seconds;
|
||||||
|
delete[] order;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
|
|
||||||
// define bssn_class
|
// define bssn_class
|
||||||
@@ -59,6 +292,7 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
|
|||||||
xc(0), yc(0), zc(0), xr(0), yr(0), zr(0), trigger(0), dTT(0), dumpid(0),
|
xc(0), yc(0), zc(0), xr(0), yr(0), zr(0), trigger(0), dTT(0), dumpid(0),
|
||||||
#endif
|
#endif
|
||||||
a_lev(a_levi), maxl(maxli), decn(decni), maxrex(maxrexi), drex(drexi),
|
a_lev(a_levi), maxl(maxli), decn(decni), maxrex(maxrexi), drex(drexi),
|
||||||
|
ConstraintRefreshLevels(0),
|
||||||
CheckPoint(0)
|
CheckPoint(0)
|
||||||
// CheckPoint(0)
|
// CheckPoint(0)
|
||||||
{
|
{
|
||||||
@@ -101,6 +335,24 @@ bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
|
|||||||
a_stream.str("");
|
a_stream.str("");
|
||||||
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
|
a_stream << setw(15) << "# time Ham Px Py Pz Gx Gy Gz";
|
||||||
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
|
ConVMonitor = new monitor("bssn_constraint.dat", myrank, a_stream.str());
|
||||||
|
|
||||||
|
#if BSSN_FINE_TIMING
|
||||||
|
a_stream.clear();
|
||||||
|
a_stream.str("");
|
||||||
|
a_stream << setw(8) << "# step";
|
||||||
|
a_stream << setw(14) << "wall";
|
||||||
|
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
|
||||||
|
a_stream << setw(18) << step_timing::bucket_labels[ib];
|
||||||
|
for (int ib = 0; ib < step_timing::TB_COUNT; ib++)
|
||||||
|
{
|
||||||
|
char str_avg[64];
|
||||||
|
sprintf(str_avg, "avg_%s", step_timing::bucket_labels[ib]);
|
||||||
|
a_stream << setw(18) << str_avg;
|
||||||
|
}
|
||||||
|
TimingMonitor = new monitor("bssn_step_timing.dat", myrank, a_stream.str());
|
||||||
|
#else
|
||||||
|
TimingMonitor = 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
// setup sphere integration engine
|
// setup sphere integration engine
|
||||||
Waveshell = new surface_integral(Symmetry);
|
Waveshell = new surface_integral(Symmetry);
|
||||||
@@ -696,6 +948,9 @@ void bssn_class::Initialize()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
|
GH = new cgh(0, ngfs, Symmetry, pname, checkrun, ErrorMonitor);
|
||||||
|
ConstraintRefreshLevels = new int[GH->levels];
|
||||||
|
for (int il = 0; il < GH->levels; il++)
|
||||||
|
ConstraintRefreshLevels[il] = 0;
|
||||||
if (checkrun)
|
if (checkrun)
|
||||||
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
|
CheckPoint->readcheck_cgh(PhysTime, GH, myrank, nprocs, Symmetry);
|
||||||
else
|
else
|
||||||
@@ -736,6 +991,8 @@ void bssn_class::Initialize()
|
|||||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
||||||
|
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -783,6 +1040,8 @@ bssn_class::~bssn_class()
|
|||||||
DumpList->clearList();
|
DumpList->clearList();
|
||||||
ConstraintList->clearList();
|
ConstraintList->clearList();
|
||||||
|
|
||||||
|
delete[] ConstraintRefreshLevels;
|
||||||
|
|
||||||
delete phio;
|
delete phio;
|
||||||
delete trKo;
|
delete trKo;
|
||||||
delete gxxo;
|
delete gxxo;
|
||||||
@@ -1042,6 +1301,7 @@ bssn_class::~bssn_class()
|
|||||||
delete BHMonitor;
|
delete BHMonitor;
|
||||||
delete MAPMonitor;
|
delete MAPMonitor;
|
||||||
delete ConVMonitor;
|
delete ConVMonitor;
|
||||||
|
delete TimingMonitor;
|
||||||
delete Waveshell;
|
delete Waveshell;
|
||||||
|
|
||||||
delete CheckPoint;
|
delete CheckPoint;
|
||||||
@@ -2127,8 +2387,10 @@ void bssn_class::Evolve(int Steps)
|
|||||||
#endif
|
#endif
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#if BSSN_ENABLE_MEM_USAGE_LOG
|
||||||
perf bssn_perf;
|
perf bssn_perf;
|
||||||
size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
|
size_t current_min, current_avg, current_max, peak_min, peak_avg, peak_max;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (int lev = 0; lev < GH->levels; lev++)
|
for (int lev = 0; lev < GH->levels; lev++)
|
||||||
GH->Lt[lev] = PhysTime;
|
GH->Lt[lev] = PhysTime;
|
||||||
@@ -2137,6 +2399,15 @@ void bssn_class::Evolve(int Steps)
|
|||||||
|
|
||||||
for (int ncount = 1; ncount < Steps + 1; ncount++)
|
for (int ncount = 1; ncount < Steps + 1; ncount++)
|
||||||
{
|
{
|
||||||
|
#if BSSN_FINE_TIMING
|
||||||
|
step_timing::reset();
|
||||||
|
#endif
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
f_bssn_rhs_kernel_timing_reset();
|
||||||
|
#endif
|
||||||
|
#if (BSSN_FINE_TIMING || BSSN_KERNEL_FINE_TIMING)
|
||||||
|
const double step_wall_start = MPI_Wtime();
|
||||||
|
#endif
|
||||||
// special for large mass ratio consideration
|
// special for large mass ratio consideration
|
||||||
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
// if(fabs(Porg0[0][0]-Porg0[1][0])+fabs(Porg0[0][1]-Porg0[1][1])+fabs(Porg0[0][2]-Porg0[1][2])<1e-6)
|
||||||
// { GH->levels=GH->movls; }
|
// { GH->levels=GH->movls; }
|
||||||
@@ -2163,6 +2434,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
// When LastDump >= DumpTime, output corresponding binary data
|
// When LastDump >= DumpTime, output corresponding binary data
|
||||||
if (LastDump >= DumpTime)
|
if (LastDump >= DumpTime)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_dump3d);
|
||||||
// misc::tillherecheck("before Dump_Data");
|
// misc::tillherecheck("before Dump_Data");
|
||||||
|
|
||||||
for (int lev = 0; lev < GH->levels; lev++)
|
for (int lev = 0; lev < GH->levels; lev++)
|
||||||
@@ -2170,6 +2442,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
|
SH->Dump_Data(DumpList, 0, PhysTime, dT_mon);
|
||||||
#endif
|
#endif
|
||||||
|
STEP_TIMER_ADD(TB_DUMP_3D, timer_dump3d);
|
||||||
|
|
||||||
LastDump = 0;
|
LastDump = 0;
|
||||||
|
|
||||||
@@ -2182,10 +2455,12 @@ void bssn_class::Evolve(int Steps)
|
|||||||
// When Last2dDump >= d2DumpTime, output corresponding 2D data
|
// When Last2dDump >= d2DumpTime, output corresponding 2D data
|
||||||
if (Last2dDump >= d2DumpTime)
|
if (Last2dDump >= d2DumpTime)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_dump2d);
|
||||||
// misc::tillherecheck("before 2dDump_Data");
|
// misc::tillherecheck("before 2dDump_Data");
|
||||||
|
|
||||||
for (int lev = 0; lev < GH->levels; lev++)
|
for (int lev = 0; lev < GH->levels; lev++)
|
||||||
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
Parallel::d2Dump_Data(GH->PatL[lev], DumpList, 0, PhysTime, dT_mon);
|
||||||
|
STEP_TIMER_ADD(TB_DUMP_2D, timer_dump2d);
|
||||||
|
|
||||||
Last2dDump = 0;
|
Last2dDump = 0;
|
||||||
|
|
||||||
@@ -2210,10 +2485,12 @@ void bssn_class::Evolve(int Steps)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
#if (REGLEV == 1)
|
#if (REGLEV == 1)
|
||||||
|
STEP_TIMER_DECL(timer_regrid);
|
||||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
#if (REGLEV == 0 && (PSTR == 1 || PSTR == 2))
|
||||||
@@ -2222,6 +2499,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor);
|
// fgt(PhysTime-dT_mon,StartTime,dT_mon/2),ErrorMonitor);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if BSSN_ENABLE_MEM_USAGE_LOG
|
||||||
// Retrieve memory usage information used during computation; master process prints it
|
// Retrieve memory usage information used during computation; master process prints it
|
||||||
bssn_perf.MemoryUsage(¤t_min, ¤t_avg, ¤t_max,
|
bssn_perf.MemoryUsage(¤t_min, ¤t_avg, ¤t_max,
|
||||||
&peak_min, &peak_avg, &peak_max, nprocs);
|
&peak_min, &peak_avg, &peak_max, nprocs);
|
||||||
@@ -2237,6 +2515,7 @@ void bssn_class::Evolve(int Steps)
|
|||||||
(double)peak_max / (1024.0 * 1024.0));
|
(double)peak_max / (1024.0 * 1024.0));
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Output puncture positions at each step
|
// Output puncture positions at each step
|
||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
@@ -2251,10 +2530,13 @@ void bssn_class::Evolve(int Steps)
|
|||||||
<< endl;
|
<< endl;
|
||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
#if BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
cout << " If you think the physical evolution time is enough for this simulation, please input 'stop' in the terminal to stop the MPI processes in the next evolution step ! " << endl;
|
cout << " If you think the physical evolution time is enough for this simulation, please input 'stop' in the terminal to stop the MPI processes in the next evolution step ! " << endl;
|
||||||
|
#endif
|
||||||
// cout << endl;
|
// cout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// If an "abort" command is detected on stdin, terminate MPI processes
|
// If an "abort" command is detected on stdin, terminate MPI processes
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@@ -2282,10 +2564,12 @@ void bssn_class::Evolve(int Steps)
|
|||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
#endif
|
||||||
|
|
||||||
// When LastCheck >= CheckTime, perform runtime checks and output status data
|
// When LastCheck >= CheckTime, perform runtime checks and output status data
|
||||||
if (LastCheck >= CheckTime)
|
if (LastCheck >= CheckTime)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_checkpoint);
|
||||||
LastCheck = 0;
|
LastCheck = 0;
|
||||||
|
|
||||||
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
|
CheckPoint->write_Black_Hole_position(BH_num_input, BH_num, Porg0, Porgbr, Mass);
|
||||||
@@ -2294,7 +2578,20 @@ void bssn_class::Evolve(int Steps)
|
|||||||
CheckPoint->writecheck_sh(PhysTime, SH);
|
CheckPoint->writecheck_sh(PhysTime, SH);
|
||||||
#endif
|
#endif
|
||||||
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
|
CheckPoint->write_bssn(LastDump, Last2dDump, LastAnas);
|
||||||
|
STEP_TIMER_ADD(TB_CHECKPOINT, timer_checkpoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if (BSSN_FINE_TIMING || BSSN_KERNEL_FINE_TIMING)
|
||||||
|
const double step_wall_seconds = MPI_Wtime() - step_wall_start;
|
||||||
|
#endif
|
||||||
|
#if BSSN_FINE_TIMING
|
||||||
|
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
||||||
|
step_timing::report(myrank, nprocs, TimingMonitor, ncount, PhysTime, step_wall_seconds);
|
||||||
|
#endif
|
||||||
|
#if BSSN_KERNEL_FINE_TIMING
|
||||||
|
if (ncount % BSSN_FINE_TIMING_EVERY == 0)
|
||||||
|
rhs_kernel_timing_report::report(myrank, nprocs, ncount, step_wall_seconds);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
#ifdef With_AHF
|
#ifdef With_AHF
|
||||||
@@ -2426,10 +2723,16 @@ void bssn_class::RecursiveStep(int lev)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (REGLEV == 0)
|
#if (REGLEV == 0)
|
||||||
|
STEP_TIMER_DECL(timer_regrid_onelevel);
|
||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
{
|
||||||
|
if (ConstraintRefreshLevels)
|
||||||
|
ConstraintRefreshLevels[lev] = 1;
|
||||||
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
}
|
||||||
|
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2608,7 +2911,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2775,7 +3078,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2790,7 +3093,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2809,7 +3112,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -2825,7 +3128,7 @@ void bssn_class::ParallelStep()
|
|||||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); }
|
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||||
|
|
||||||
// a_stream.clear();
|
// a_stream.clear();
|
||||||
// a_stream.str("");
|
// a_stream.str("");
|
||||||
@@ -3022,6 +3325,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
|
|
||||||
// new code 2013-2-15, zjcao
|
// new code 2013-2-15, zjcao
|
||||||
#if (MAPBH == 1)
|
#if (MAPBH == 1)
|
||||||
|
STEP_TIMER_DECL(timer_bh_predictor);
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
{
|
{
|
||||||
@@ -3052,6 +3356,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||||
|
|
||||||
// data analysis part
|
// data analysis part
|
||||||
// Warning NOTE: the variables1 are used as temp storege room
|
// Warning NOTE: the variables1 are used as temp storege room
|
||||||
@@ -3074,6 +3379,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
|
STEP_TIMER_DECL(timer_predictor_rhs);
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
@@ -3349,6 +3655,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
STEP_TIMER_ADD(TB_PREDICTOR_RHS, timer_predictor_rhs);
|
||||||
|
|
||||||
|
STEP_TIMER_DECL(timer_predictor_sync);
|
||||||
Parallel::AsyncSyncState async_pre;
|
Parallel::AsyncSyncState async_pre;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||||
|
|
||||||
@@ -3386,6 +3695,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
// for black hole position
|
// for black hole position
|
||||||
@@ -3430,6 +3740,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
// corrector
|
// corrector
|
||||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_corrector_rhs);
|
||||||
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
|
// for RK4: t0, t0+dt/2, t0+dt/2, t0+dt;
|
||||||
if (iter_count == 1 || iter_count == 3)
|
if (iter_count == 1 || iter_count == 3)
|
||||||
TRK4 += dT_lev / 2;
|
TRK4 += dT_lev / 2;
|
||||||
@@ -3709,6 +4020,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
STEP_TIMER_ADD(TB_CORRECTOR_RHS, timer_corrector_rhs);
|
||||||
|
|
||||||
|
STEP_TIMER_DECL(timer_corrector_sync);
|
||||||
Parallel::AsyncSyncState async_cor;
|
Parallel::AsyncSyncState async_cor;
|
||||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||||
|
|
||||||
@@ -3748,8 +4062,10 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
STEP_TIMER_ADD(TB_CORRECTOR_SYNC, timer_corrector_sync);
|
||||||
|
|
||||||
#if (MAPBH == 0)
|
#if (MAPBH == 0)
|
||||||
|
STEP_TIMER_DECL(timer_bh_corrector);
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
{
|
{
|
||||||
@@ -3782,11 +4098,13 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_BH_CORRECTOR, timer_bh_corrector);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// swap time level
|
// swap time level
|
||||||
if (iter_count < 3)
|
if (iter_count < 3)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_state_swap);
|
||||||
Pp = GH->PatL[lev];
|
Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
{
|
{
|
||||||
@@ -3833,9 +4151,11 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_swap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if (RPS == 0)
|
#if (RPS == 0)
|
||||||
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
// mesh refinement boundary part
|
// mesh refinement boundary part
|
||||||
RestrictProlong(lev, YN, BB);
|
RestrictProlong(lev, YN, BB);
|
||||||
|
|
||||||
@@ -3856,6 +4176,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
#endif
|
#endif
|
||||||
// note the data structure before update
|
// note the data structure before update
|
||||||
// SynchList_cor 1 -----------
|
// SynchList_cor 1 -----------
|
||||||
@@ -3864,6 +4185,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
//
|
//
|
||||||
// OldStateList old -----------
|
// OldStateList old -----------
|
||||||
// update
|
// update
|
||||||
|
STEP_TIMER_DECL(timer_state_commit);
|
||||||
Pp = GH->PatL[lev];
|
Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
{
|
{
|
||||||
@@ -3920,6 +4242,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_STATE_SWAP, timer_state_commit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -4246,7 +4569,9 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
STEP_TIMER_ADD(TB_PREDICTOR_SYNC, timer_predictor_sync);
|
||||||
|
|
||||||
|
STEP_TIMER_DECL(timer_bh_predictor);
|
||||||
// for black hole position
|
// for black hole position
|
||||||
if (BH_num > 0 && lev == GH->levels - 1)
|
if (BH_num > 0 && lev == GH->levels - 1)
|
||||||
{
|
{
|
||||||
@@ -4285,6 +4610,7 @@ void bssn_class::Step(int lev, int YN)
|
|||||||
{
|
{
|
||||||
AnalysisStuff(lev, dT_lev);
|
AnalysisStuff(lev, dT_lev);
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_BH_PREDICTOR, timer_bh_predictor);
|
||||||
// corrector
|
// corrector
|
||||||
for (iter_count = 1; iter_count < 3; iter_count++)
|
for (iter_count = 1; iter_count < 3; iter_count++)
|
||||||
{
|
{
|
||||||
@@ -5755,6 +6081,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
//
|
//
|
||||||
// SynchList_cor old -----------
|
// SynchList_cor old -----------
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
// stringstream a_stream;
|
// stringstream a_stream;
|
||||||
// a_stream.setf(ios::left);
|
// a_stream.setf(ios::left);
|
||||||
@@ -5796,7 +6123,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
@@ -5820,7 +6147,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -5847,7 +6174,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
@@ -5871,7 +6198,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -5897,6 +6224,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
|||||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -5916,6 +6244,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
//
|
//
|
||||||
// SynchList_cor old -----------
|
// SynchList_cor old -----------
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"starting RestrictProlong_aux");
|
||||||
|
|
||||||
if (lev >= GH->levels - 1)
|
if (lev >= GH->levels - 1)
|
||||||
@@ -5940,7 +6269,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
@@ -5950,7 +6279,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -5962,7 +6291,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
else // no time refinement levels and for all same time levels
|
else // no time refinement levels and for all same time levels
|
||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||||
@@ -5972,7 +6301,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -5984,6 +6313,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -5994,6 +6324,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
|||||||
|
|
||||||
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||||
{
|
{
|
||||||
|
STEP_TIMER_DECL(timer_restrict_prolong);
|
||||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||||
// we assume for fine
|
// we assume for fine
|
||||||
// SynchList_cor 1 -----------
|
// SynchList_cor 1 -----------
|
||||||
@@ -6027,7 +6358,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||||
@@ -6037,7 +6368,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6051,7 +6382,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
if (myrank == 0)
|
if (myrank == 0)
|
||||||
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
|
||||||
#elif (RPB == 1)
|
#elif (RPB == 1)
|
||||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
|
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
|
||||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||||
@@ -6061,7 +6392,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6073,6 +6404,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
|||||||
|
|
||||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||||
}
|
}
|
||||||
|
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||||
}
|
}
|
||||||
|
|
||||||
//================================================================================================
|
//================================================================================================
|
||||||
@@ -6102,7 +6434,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
|
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6115,7 +6447,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
|||||||
{
|
{
|
||||||
#if (RPB == 0)
|
#if (RPB == 0)
|
||||||
#if (MIXOUTB == 0)
|
#if (MIXOUTB == 0)
|
||||||
Parallel::OutBdLow2Hi(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||||
#elif (MIXOUTB == 1)
|
#elif (MIXOUTB == 1)
|
||||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||||
#endif
|
#endif
|
||||||
@@ -6823,18 +7155,15 @@ void bssn_class::compute_Porg_rhs(double **BH_PS,double **BH_RHS,var *forx,var *
|
|||||||
|
|
||||||
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
|
void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, var *fory, var *forz, int ilev)
|
||||||
{
|
{
|
||||||
const int InList = 3;
|
MyList<var> DG_List_x(forx);
|
||||||
|
MyList<var> DG_List_y(fory);
|
||||||
|
MyList<var> DG_List_z(forz);
|
||||||
|
DG_List_x.next = &DG_List_y;
|
||||||
|
DG_List_y.next = &DG_List_z;
|
||||||
|
|
||||||
MyList<var> *DG_List = new MyList<var>(forx);
|
double shellf[3];
|
||||||
DG_List->insert(fory);
|
double pox_buf[3][1];
|
||||||
DG_List->insert(forz);
|
double *pox[3] = {pox_buf[0], pox_buf[1], pox_buf[2]};
|
||||||
|
|
||||||
double *x1, *y1, *z1;
|
|
||||||
double *shellf;
|
|
||||||
shellf = new double[3];
|
|
||||||
double *pox[3];
|
|
||||||
for (int i = 0; i < 3; i++)
|
|
||||||
pox[i] = new double[1];
|
|
||||||
|
|
||||||
for (int n = 0; n < BH_num; n++)
|
for (int n = 0; n < BH_num; n++)
|
||||||
{
|
{
|
||||||
@@ -6845,9 +7174,9 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
|
|||||||
int lev = ilev;
|
int lev = ilev;
|
||||||
|
|
||||||
#if (PSTR == 0)
|
#if (PSTR == 0)
|
||||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry))
|
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry))
|
||||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], DG_List, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
|
while (!Parallel::PatList_Interp_Points(GH->PatL[lev], &DG_List_x, 1, pox, shellf, Symmetry, GH->Commlev[lev]))
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
lev--;
|
lev--;
|
||||||
@@ -6856,7 +7185,7 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
|
|||||||
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
|
ErrorMonitor->outfile << "fail to find black holes at t = " << PhysTime << endl;
|
||||||
for (n = 0; n < BH_num; n++)
|
for (n = 0; n < BH_num; n++)
|
||||||
ErrorMonitor->outfile << "(x,y,z) = ("
|
ErrorMonitor->outfile << "(x,y,z) = ("
|
||||||
<< pox[0][n] << "," << pox[1][n] << "," << pox[2][n]
|
<< BH_PS[n][0] << "," << BH_PS[n][1] << "," << BH_PS[n][2]
|
||||||
<< ")" << endl;
|
<< ")" << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -6869,11 +7198,6 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
|
|||||||
BH_RHS[n][2] = -shellf[2];
|
BH_RHS[n][2] = -shellf[2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DG_List->clearList();
|
|
||||||
delete[] shellf;
|
|
||||||
for (int i = 0; i < 3; i++)
|
|
||||||
delete[] pox[i];
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -7096,6 +7420,10 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
|
|||||||
IP = new double[NN];
|
IP = new double[NN];
|
||||||
RoutMAP = new double[7];
|
RoutMAP = new double[7];
|
||||||
double Rex = maxrex;
|
double Rex = maxrex;
|
||||||
|
bool patch_mass_prepared = false;
|
||||||
|
#ifdef WithShell
|
||||||
|
bool shell_mass_prepared = false;
|
||||||
|
#endif
|
||||||
for (int i = 0; i < decn; i++)
|
for (int i = 0; i < decn; i++)
|
||||||
{
|
{
|
||||||
#ifdef Point_Psi4
|
#ifdef Point_Psi4
|
||||||
@@ -7123,7 +7451,8 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
|
|||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
||||||
RoutMAP, ErrorMonitor);
|
RoutMAP, ErrorMonitor, !patch_mass_prepared);
|
||||||
|
patch_mass_prepared = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -7131,44 +7460,52 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
|
|||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
||||||
RoutMAP, ErrorMonitor);
|
RoutMAP, ErrorMonitor, !shell_mass_prepared);
|
||||||
|
shell_mass_prepared = true;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
|
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
|
||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
||||||
RoutMAP, ErrorMonitor);
|
RoutMAP, ErrorMonitor, !patch_mass_prepared);
|
||||||
|
patch_mass_prepared = true;
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before surface integral");
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
if (lev > 0 || Rex < GH->bbox[0][0][3])
|
if (lev > 0 || Rex < GH->bbox[0][0][3])
|
||||||
{
|
{
|
||||||
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor);
|
Waveshell->surf_WaveMassPAng(Rex, lev, GH,
|
||||||
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
|
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
|
||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
phi0, trK0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
RoutMAP, ErrorMonitor);
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
|
||||||
|
RoutMAP, ErrorMonitor, !patch_mass_prepared);
|
||||||
|
patch_mass_prepared = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Waveshell->surf_Wave(Rex, lev, SH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor);
|
Waveshell->surf_WaveMassPAng(Rex, lev, SH,
|
||||||
Waveshell->surf_MassPAng(Rex, lev, SH, phi0, trK0,
|
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
|
||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
phi0, trK0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
RoutMAP, ErrorMonitor);
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
|
||||||
|
RoutMAP, ErrorMonitor, !shell_mass_prepared);
|
||||||
|
shell_mass_prepared = true;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#if (PSTR == 0)
|
#if (PSTR == 0)
|
||||||
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor);
|
Waveshell->surf_WaveMassPAng(Rex, lev, GH,
|
||||||
Waveshell->surf_MassPAng(Rex, lev, GH, phi0, trK0,
|
Rpsi4, Ipsi4, 2, maxl, NN, RP, IP,
|
||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
phi0, trK0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
RoutMAP, ErrorMonitor);
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1,
|
||||||
|
RoutMAP, ErrorMonitor, !patch_mass_prepared);
|
||||||
|
patch_mass_prepared = true;
|
||||||
#elif (PSTR == 1 || PSTR == 2)
|
#elif (PSTR == 1 || PSTR == 2)
|
||||||
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]);
|
Waveshell->surf_Wave(Rex, lev, GH, Rpsi4, Ipsi4, 2, maxl, NN, RP, IP, ErrorMonitor, GH->Commlev[lev]);
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after surf_Wave");
|
||||||
@@ -7176,7 +7513,8 @@ void bssn_class::AnalysisStuff(int lev, double dT_lev)
|
|||||||
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
gxx0, gxy0, gxz0, gyy0, gyz0, gzz0,
|
||||||
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
Axx0, Axy0, Axz0, Ayy0, Ayz0, Azz0,
|
||||||
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
Gmx0, Gmy0, Gmz0, Sfx1, Sfy1, Sfz1, // here we can not touch rhs variables, but 1 variables
|
||||||
RoutMAP, ErrorMonitor, GH->Commlev[lev]);
|
RoutMAP, ErrorMonitor, GH->Commlev[lev], !patch_mass_prepared);
|
||||||
|
patch_mass_prepared = true;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral");
|
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"end surface integral");
|
||||||
@@ -7249,7 +7587,7 @@ void bssn_class::Constraint_Out()
|
|||||||
for (int lev = 0; lev < GH->levels; lev++)
|
for (int lev = 0; lev < GH->levels; lev++)
|
||||||
{
|
{
|
||||||
// make sure the data consistent for higher levels
|
// make sure the data consistent for higher levels
|
||||||
if (lev > 0) // if the constrait quantities can be reused from the step rhs calculation
|
if (lev > 0 && ConstraintRefreshLevels && ConstraintRefreshLevels[lev]) // only refresh levels whose grid layout changed after evolution
|
||||||
{
|
{
|
||||||
double TRK4 = PhysTime;
|
double TRK4 = PhysTime;
|
||||||
double ndeps = numepsb;
|
double ndeps = numepsb;
|
||||||
@@ -7403,35 +7741,18 @@ void bssn_class::Constraint_Out()
|
|||||||
#if (PSTR == 1 || PSTR == 2)
|
#if (PSTR == 1 || PSTR == 2)
|
||||||
double ConV_h[7];
|
double ConV_h[7];
|
||||||
#endif
|
#endif
|
||||||
|
var *ConstraintVars[7] = {Cons_Ham, Cons_Px, Cons_Py, Cons_Pz, Cons_Gx, Cons_Gy, Cons_Gz};
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
ConV[0] = SH->L2Norm(Cons_Ham);
|
SH->L2Norm7(ConstraintVars, ConV);
|
||||||
ConV[1] = SH->L2Norm(Cons_Px);
|
|
||||||
ConV[2] = SH->L2Norm(Cons_Py);
|
|
||||||
ConV[3] = SH->L2Norm(Cons_Pz);
|
|
||||||
ConV[4] = SH->L2Norm(Cons_Gx);
|
|
||||||
ConV[5] = SH->L2Norm(Cons_Gy);
|
|
||||||
ConV[6] = SH->L2Norm(Cons_Gz);
|
|
||||||
ConVMonitor->writefile(PhysTime, 7, ConV);
|
ConVMonitor->writefile(PhysTime, 7, ConV);
|
||||||
#endif
|
#endif
|
||||||
for (int levi = 0; levi < GH->levels; levi++)
|
for (int levi = 0; levi < GH->levels; levi++)
|
||||||
{
|
{
|
||||||
#if (PSTR == 0)
|
#if (PSTR == 0)
|
||||||
ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham);
|
Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV);
|
||||||
ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px);
|
|
||||||
ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py);
|
|
||||||
ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz);
|
|
||||||
ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx);
|
|
||||||
ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy);
|
|
||||||
ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz);
|
|
||||||
#elif (PSTR == 1 || PSTR == 2)
|
#elif (PSTR == 1 || PSTR == 2)
|
||||||
ConV[0] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Ham, GH->Commlev[levi]);
|
Parallel::L2Norm7(GH->PatL[levi]->data, ConstraintVars, ConV, GH->Commlev[levi]);
|
||||||
ConV[1] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Px, GH->Commlev[levi]);
|
|
||||||
ConV[2] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Py, GH->Commlev[levi]);
|
|
||||||
ConV[3] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Pz, GH->Commlev[levi]);
|
|
||||||
ConV[4] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gx, GH->Commlev[levi]);
|
|
||||||
ConV[5] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gy, GH->Commlev[levi]);
|
|
||||||
ConV[6] = Parallel::L2Norm(GH->PatL[levi]->data, Cons_Gz, GH->Commlev[levi]);
|
|
||||||
// misc::tillherecheck("before collect data to cpu0");
|
// misc::tillherecheck("before collect data to cpu0");
|
||||||
// MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different
|
// MPI_ALLREDUCE( sendbuf, recvbuf, count, datatype, op, comm), sendbu and recvbuf must be different
|
||||||
if (levi > 0)
|
if (levi > 0)
|
||||||
@@ -7462,6 +7783,9 @@ void bssn_class::Constraint_Out()
|
|||||||
Interp_Constraint(false);
|
Interp_Constraint(false);
|
||||||
|
|
||||||
LastConsOut = 0;
|
LastConsOut = 0;
|
||||||
|
if (ConstraintRefreshLevels)
|
||||||
|
for (int lev = 0; lev < GH->levels; lev++)
|
||||||
|
ConstraintRefreshLevels[lev] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ public:
|
|||||||
double StartTime, TotalTime;
|
double StartTime, TotalTime;
|
||||||
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
double AnasTime, DumpTime, d2DumpTime, CheckTime;
|
||||||
double LastAnas, LastConsOut;
|
double LastAnas, LastConsOut;
|
||||||
|
int *ConstraintRefreshLevels;
|
||||||
double Courant;
|
double Courant;
|
||||||
double numepss, numepsb, numepsh;
|
double numepss, numepsb, numepsh;
|
||||||
int Symmetry;
|
int Symmetry;
|
||||||
@@ -130,9 +131,11 @@ public:
|
|||||||
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
|
Parallel::SyncCache *sync_cache_cor; // per-level cache for corrector sync
|
||||||
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
|
Parallel::SyncCache *sync_cache_rp_coarse; // RestrictProlong sync on PatL[lev-1]
|
||||||
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
|
Parallel::SyncCache *sync_cache_rp_fine; // RestrictProlong sync on PatL[lev]
|
||||||
|
Parallel::SyncCache *sync_cache_restrict; // cached Restrict in RestrictProlong
|
||||||
|
Parallel::SyncCache *sync_cache_outbd; // cached OutBdLow2Hi in RestrictProlong
|
||||||
|
|
||||||
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
monitor *ErrorMonitor, *Psi4Monitor, *BHMonitor, *MAPMonitor;
|
||||||
monitor *ConVMonitor;
|
monitor *ConVMonitor, *TimingMonitor;
|
||||||
surface_integral *Waveshell;
|
surface_integral *Waveshell;
|
||||||
checkpoint *CheckPoint;
|
checkpoint *CheckPoint;
|
||||||
|
|
||||||
|
|||||||
@@ -62,6 +62,7 @@
|
|||||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
|
||||||
! gont = 0: success; gont = 1: something wrong
|
! gont = 0: success; gont = 1: something wrong
|
||||||
integer::gont
|
integer::gont
|
||||||
|
integer :: i,j,k
|
||||||
|
|
||||||
!~~~~~~> Other variables:
|
!~~~~~~> Other variables:
|
||||||
|
|
||||||
@@ -85,6 +86,13 @@
|
|||||||
|
|
||||||
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
real*8,dimension(3) ::SSS,AAS,ASA,SAA,ASS,SAS,SSA
|
||||||
real*8 :: dX, dY, dZ, PI
|
real*8 :: dX, dY, dZ, PI
|
||||||
|
real*8 :: divb_loc,det_loc
|
||||||
|
real*8 :: gupxx_loc,gupxy_loc,gupxz_loc,gupyy_loc,gupyz_loc,gupzz_loc
|
||||||
|
real*8 :: Rxx_loc,Rxy_loc,Rxz_loc,Ryy_loc,Ryz_loc,Rzz_loc
|
||||||
|
real*8 :: fxx_loc,fxy_loc,fxz_loc
|
||||||
|
real*8 :: Gamxa_loc,Gamya_loc,Gamza_loc
|
||||||
|
real*8 :: f_loc,chin_loc
|
||||||
|
real*8 :: l_fxx,l_fxy,l_fxz,l_fyy,l_fyz,l_fzz,S_loc
|
||||||
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
real*8, parameter :: ZEO = 0.d0,ONE = 1.D0, TWO = 2.D0, FOUR = 4.D0
|
||||||
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
|
real*8, parameter :: EIGHT = 8.D0, HALF = 0.5D0, THR = 3.d0
|
||||||
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
|
real*8, parameter :: SYM = 1.D0, ANTI= - 1.D0
|
||||||
@@ -97,7 +105,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (GAUGE == 6 || GAUGE == 7)
|
#if (GAUGE == 6 || GAUGE == 7)
|
||||||
integer :: BHN,i,j,k
|
integer :: BHN
|
||||||
real*8, dimension(9) :: Porg
|
real*8, dimension(9) :: Porg
|
||||||
real*8, dimension(3) :: Mass
|
real*8, dimension(3) :: Mass
|
||||||
real*8 :: r1,r2,M,A,w1,w2,C1,C2
|
real*8 :: r1,r2,M,A,w1,w2,C1,C2
|
||||||
@@ -145,22 +153,24 @@
|
|||||||
dY = Y(2) - Y(1)
|
dY = Y(2) - Y(1)
|
||||||
dZ = Z(2) - Z(1)
|
dZ = Z(2) - Z(1)
|
||||||
|
|
||||||
alpn1 = Lap + ONE
|
do k=1,ex(3)
|
||||||
chin1 = chi + ONE
|
do j=1,ex(2)
|
||||||
gxx = dxx + ONE
|
do i=1,ex(1)
|
||||||
gyy = dyy + ONE
|
alpn1(i,j,k) = Lap(i,j,k) + ONE
|
||||||
gzz = dzz + ONE
|
chin1(i,j,k) = chi(i,j,k) + ONE
|
||||||
|
gxx(i,j,k) = dxx(i,j,k) + ONE
|
||||||
|
gyy(i,j,k) = dyy(i,j,k) + ONE
|
||||||
|
gzz(i,j,k) = dzz(i,j,k) + ONE
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
|
call fderivs(ex,betax,betaxx,betaxy,betaxz,X,Y,Z,ANTI, SYM, SYM,Symmetry,Lev)
|
||||||
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
|
call fderivs(ex,betay,betayx,betayy,betayz,X,Y,Z, SYM,ANTI, SYM,Symmetry,Lev)
|
||||||
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
|
call fderivs(ex,betaz,betazx,betazy,betazz,X,Y,Z, SYM, SYM,ANTI,Symmetry,Lev)
|
||||||
|
|
||||||
div_beta = betaxx + betayy + betazz
|
|
||||||
|
|
||||||
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
call fderivs(ex,chi,chix,chiy,chiz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||||
|
|
||||||
chi_rhs = F2o3 *chin1*( alpn1 * trK - div_beta ) !rhs for chi
|
|
||||||
|
|
||||||
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
call fderivs(ex,dxx,gxxx,gxxy,gxxz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||||
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
|
call fderivs(ex,gxy,gxyx,gxyy,gxyz,X,Y,Z,ANTI,ANTI,SYM ,Symmetry,Lev)
|
||||||
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
|
call fderivs(ex,gxz,gxzx,gxzy,gxzz,X,Y,Z,ANTI,SYM ,ANTI,Symmetry,Lev)
|
||||||
@@ -168,151 +178,179 @@
|
|||||||
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
|
call fderivs(ex,gyz,gyzx,gyzy,gyzz,X,Y,Z,SYM ,ANTI,ANTI,Symmetry,Lev)
|
||||||
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
call fderivs(ex,dzz,gzzx,gzzy,gzzz,X,Y,Z,SYM ,SYM ,SYM ,Symmetry,Lev)
|
||||||
|
|
||||||
gxx_rhs = - TWO * alpn1 * Axx - F2o3 * gxx * div_beta + &
|
do k=1,ex(3)
|
||||||
TWO *( gxx * betaxx + gxy * betayx + gxz * betazx)
|
do j=1,ex(2)
|
||||||
|
do i=1,ex(1)
|
||||||
|
divb_loc = betaxx(i,j,k) + betayy(i,j,k) + betazz(i,j,k)
|
||||||
|
div_beta(i,j,k) = divb_loc
|
||||||
|
|
||||||
gyy_rhs = - TWO * alpn1 * Ayy - F2o3 * gyy * div_beta + &
|
chi_rhs(i,j,k) = F2o3 * chin1(i,j,k) * (alpn1(i,j,k) * trK(i,j,k) - divb_loc)
|
||||||
TWO *( gxy * betaxy + gyy * betayy + gyz * betazy)
|
|
||||||
|
|
||||||
gzz_rhs = - TWO * alpn1 * Azz - F2o3 * gzz * div_beta + &
|
gxx_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axx(i,j,k) - F2o3 * gxx(i,j,k) * divb_loc + &
|
||||||
TWO *( gxz * betaxz + gyz * betayz + gzz * betazz)
|
TWO * ( gxx(i,j,k) * betaxx(i,j,k) + gxy(i,j,k) * betayx(i,j,k) + gxz(i,j,k) * betazx(i,j,k) )
|
||||||
|
|
||||||
gxy_rhs = - TWO * alpn1 * Axy + F1o3 * gxy * div_beta + &
|
gyy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayy(i,j,k) - F2o3 * gyy(i,j,k) * divb_loc + &
|
||||||
gxx * betaxy + gxz * betazy + &
|
TWO * ( gxy(i,j,k) * betaxy(i,j,k) + gyy(i,j,k) * betayy(i,j,k) + gyz(i,j,k) * betazy(i,j,k) )
|
||||||
gyy * betayx + gyz * betazx &
|
|
||||||
- gxy * betazz
|
|
||||||
|
|
||||||
gyz_rhs = - TWO * alpn1 * Ayz + F1o3 * gyz * div_beta + &
|
gzz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Azz(i,j,k) - F2o3 * gzz(i,j,k) * divb_loc + &
|
||||||
gxy * betaxz + gyy * betayz + &
|
TWO * ( gxz(i,j,k) * betaxz(i,j,k) + gyz(i,j,k) * betayz(i,j,k) + gzz(i,j,k) * betazz(i,j,k) )
|
||||||
gxz * betaxy + gzz * betazy &
|
|
||||||
- gyz * betaxx
|
|
||||||
|
|
||||||
gxz_rhs = - TWO * alpn1 * Axz + F1o3 * gxz * div_beta + &
|
gxy_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axy(i,j,k) + F1o3 * gxy(i,j,k) * divb_loc + &
|
||||||
gxx * betaxz + gxy * betayz + &
|
gxx(i,j,k) * betaxy(i,j,k) + gxz(i,j,k) * betazy(i,j,k) + gyy(i,j,k) * betayx(i,j,k) + &
|
||||||
gyz * betayx + gzz * betazx &
|
gyz(i,j,k) * betazx(i,j,k) - gxy(i,j,k) * betazz(i,j,k)
|
||||||
- gxz * betayy !rhs for gij
|
|
||||||
|
|
||||||
! invert tilted metric
|
gyz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Ayz(i,j,k) + F1o3 * gyz(i,j,k) * divb_loc + &
|
||||||
gupzz = gxx * gyy * gzz + gxy * gyz * gxz + gxz * gxy * gyz - &
|
gxy(i,j,k) * betaxz(i,j,k) + gyy(i,j,k) * betayz(i,j,k) + gxz(i,j,k) * betaxy(i,j,k) + &
|
||||||
gxz * gyy * gxz - gxy * gxy * gzz - gxx * gyz * gyz
|
gzz(i,j,k) * betazy(i,j,k) - gyz(i,j,k) * betaxx(i,j,k)
|
||||||
gupxx = ( gyy * gzz - gyz * gyz ) / gupzz
|
|
||||||
gupxy = - ( gxy * gzz - gyz * gxz ) / gupzz
|
|
||||||
gupxz = ( gxy * gyz - gyy * gxz ) / gupzz
|
|
||||||
gupyy = ( gxx * gzz - gxz * gxz ) / gupzz
|
|
||||||
gupyz = - ( gxx * gyz - gxy * gxz ) / gupzz
|
|
||||||
gupzz = ( gxx * gyy - gxy * gxy ) / gupzz
|
|
||||||
|
|
||||||
if(co == 0)then
|
gxz_rhs(i,j,k) = - TWO * alpn1(i,j,k) * Axz(i,j,k) + F1o3 * gxz(i,j,k) * divb_loc + &
|
||||||
! Gam^i_Res = Gam^i + gup^ij_,j
|
gxx(i,j,k) * betaxz(i,j,k) + gxy(i,j,k) * betayz(i,j,k) + gyz(i,j,k) * betayx(i,j,k) + &
|
||||||
Gmx_Res = Gamx - (gupxx*(gupxx*gxxx+gupxy*gxyx+gupxz*gxzx)&
|
gzz(i,j,k) * betazx(i,j,k) - gxz(i,j,k) * betayy(i,j,k)
|
||||||
+gupxy*(gupxx*gxyx+gupxy*gyyx+gupxz*gyzx)&
|
|
||||||
+gupxz*(gupxx*gxzx+gupxy*gyzx+gupxz*gzzx)&
|
|
||||||
+gupxx*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
|
|
||||||
+gupxy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
|
|
||||||
+gupxz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
|
|
||||||
+gupxx*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
|
||||||
+gupxy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
|
||||||
+gupxz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
|
||||||
Gmy_Res = Gamy - (gupxx*(gupxy*gxxx+gupyy*gxyx+gupyz*gxzx)&
|
|
||||||
+gupxy*(gupxy*gxyx+gupyy*gyyx+gupyz*gyzx)&
|
|
||||||
+gupxz*(gupxy*gxzx+gupyy*gyzx+gupyz*gzzx)&
|
|
||||||
+gupxy*(gupxy*gxxy+gupyy*gxyy+gupyz*gxzy)&
|
|
||||||
+gupyy*(gupxy*gxyy+gupyy*gyyy+gupyz*gyzy)&
|
|
||||||
+gupyz*(gupxy*gxzy+gupyy*gyzy+gupyz*gzzy)&
|
|
||||||
+gupxy*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
|
||||||
+gupyy*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
|
||||||
+gupyz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
|
||||||
Gmz_Res = Gamz - (gupxx*(gupxz*gxxx+gupyz*gxyx+gupzz*gxzx)&
|
|
||||||
+gupxy*(gupxz*gxyx+gupyz*gyyx+gupzz*gyzx)&
|
|
||||||
+gupxz*(gupxz*gxzx+gupyz*gyzx+gupzz*gzzx)&
|
|
||||||
+gupxy*(gupxz*gxxy+gupyz*gxyy+gupzz*gxzy)&
|
|
||||||
+gupyy*(gupxz*gxyy+gupyz*gyyy+gupzz*gyzy)&
|
|
||||||
+gupyz*(gupxz*gxzy+gupyz*gyzy+gupzz*gzzy)&
|
|
||||||
+gupxz*(gupxz*gxxz+gupyz*gxyz+gupzz*gxzz)&
|
|
||||||
+gupyz*(gupxz*gxyz+gupyz*gyyz+gupzz*gyzz)&
|
|
||||||
+gupzz*(gupxz*gxzz+gupyz*gyzz+gupzz*gzzz))
|
|
||||||
endif
|
|
||||||
|
|
||||||
! second kind of connection
|
det_loc = gxx(i,j,k) * gyy(i,j,k) * gzz(i,j,k) + gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) + &
|
||||||
Gamxxx =HALF*( gupxx*gxxx + gupxy*(TWO*gxyx - gxxy ) + gupxz*(TWO*gxzx - gxxz ))
|
gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) - gxz(i,j,k) * gyy(i,j,k) * gxz(i,j,k) - &
|
||||||
Gamyxx =HALF*( gupxy*gxxx + gupyy*(TWO*gxyx - gxxy ) + gupyz*(TWO*gxzx - gxxz ))
|
gxy(i,j,k) * gxy(i,j,k) * gzz(i,j,k) - gxx(i,j,k) * gyz(i,j,k) * gyz(i,j,k)
|
||||||
Gamzxx =HALF*( gupxz*gxxx + gupyz*(TWO*gxyx - gxxy ) + gupzz*(TWO*gxzx - gxxz ))
|
gupxx_loc = ( gyy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gyz(i,j,k) ) / det_loc
|
||||||
|
gupxy_loc = - ( gxy(i,j,k) * gzz(i,j,k) - gyz(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||||
|
gupxz_loc = ( gxy(i,j,k) * gyz(i,j,k) - gyy(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||||
|
gupyy_loc = ( gxx(i,j,k) * gzz(i,j,k) - gxz(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||||
|
gupyz_loc = - ( gxx(i,j,k) * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / det_loc
|
||||||
|
gupzz_loc = ( gxx(i,j,k) * gyy(i,j,k) - gxy(i,j,k) * gxy(i,j,k) ) / det_loc
|
||||||
|
gupxx(i,j,k) = gupxx_loc
|
||||||
|
gupxy(i,j,k) = gupxy_loc
|
||||||
|
gupxz(i,j,k) = gupxz_loc
|
||||||
|
gupyy(i,j,k) = gupyy_loc
|
||||||
|
gupyz(i,j,k) = gupyz_loc
|
||||||
|
gupzz(i,j,k) = gupzz_loc
|
||||||
|
|
||||||
Gamxyy =HALF*( gupxx*(TWO*gxyy - gyyx ) + gupxy*gyyy + gupxz*(TWO*gyzy - gyyz ))
|
if(co == 0)then
|
||||||
Gamyyy =HALF*( gupxy*(TWO*gxyy - gyyx ) + gupyy*gyyy + gupyz*(TWO*gyzy - gyyz ))
|
Gmx_Res(i,j,k) = Gamx(i,j,k) - ( &
|
||||||
Gamzyy =HALF*( gupxz*(TWO*gxyy - gyyx ) + gupyz*gyyy + gupzz*(TWO*gyzy - gyyz ))
|
gupxx_loc*(gupxx_loc*gxxx(i,j,k)+gupxy_loc*gxyx(i,j,k)+gupxz_loc*gxzx(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxx_loc*gxyx(i,j,k)+gupxy_loc*gyyx(i,j,k)+gupxz_loc*gyzx(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxx_loc*gxzx(i,j,k)+gupxy_loc*gyzx(i,j,k)+gupxz_loc*gzzx(i,j,k)) + &
|
||||||
|
gupxx_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
|
||||||
|
gupxx_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||||
|
Gmy_Res(i,j,k) = Gamy(i,j,k) - ( &
|
||||||
|
gupxx_loc*(gupxy_loc*gxxx(i,j,k)+gupyy_loc*gxyx(i,j,k)+gupyz_loc*gxzx(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxy_loc*gxyx(i,j,k)+gupyy_loc*gyyx(i,j,k)+gupyz_loc*gyzx(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxy_loc*gxzx(i,j,k)+gupyy_loc*gyzx(i,j,k)+gupyz_loc*gzzx(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxy_loc*gxxy(i,j,k)+gupyy_loc*gxyy(i,j,k)+gupyz_loc*gxzy(i,j,k)) + &
|
||||||
|
gupyy_loc*(gupxy_loc*gxyy(i,j,k)+gupyy_loc*gyyy(i,j,k)+gupyz_loc*gyzy(i,j,k)) + &
|
||||||
|
gupyz_loc*(gupxy_loc*gxzy(i,j,k)+gupyy_loc*gyzy(i,j,k)+gupyz_loc*gzzy(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||||
|
gupyy_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||||
|
gupyz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||||
|
Gmz_Res(i,j,k) = Gamz(i,j,k) - ( &
|
||||||
|
gupxx_loc*(gupxz_loc*gxxx(i,j,k)+gupyz_loc*gxyx(i,j,k)+gupzz_loc*gxzx(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxz_loc*gxyx(i,j,k)+gupyz_loc*gyyx(i,j,k)+gupzz_loc*gyzx(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxz_loc*gxzx(i,j,k)+gupyz_loc*gyzx(i,j,k)+gupzz_loc*gzzx(i,j,k)) + &
|
||||||
|
gupxy_loc*(gupxz_loc*gxxy(i,j,k)+gupyz_loc*gxyy(i,j,k)+gupzz_loc*gxzy(i,j,k)) + &
|
||||||
|
gupyy_loc*(gupxz_loc*gxyy(i,j,k)+gupyz_loc*gyyy(i,j,k)+gupzz_loc*gyzy(i,j,k)) + &
|
||||||
|
gupyz_loc*(gupxz_loc*gxzy(i,j,k)+gupyz_loc*gyzy(i,j,k)+gupzz_loc*gzzy(i,j,k)) + &
|
||||||
|
gupxz_loc*(gupxz_loc*gxxz(i,j,k)+gupyz_loc*gxyz(i,j,k)+gupzz_loc*gxzz(i,j,k)) + &
|
||||||
|
gupyz_loc*(gupxz_loc*gxyz(i,j,k)+gupyz_loc*gyyz(i,j,k)+gupzz_loc*gyzz(i,j,k)) + &
|
||||||
|
gupzz_loc*(gupxz_loc*gxzz(i,j,k)+gupyz_loc*gyzz(i,j,k)+gupzz_loc*gzzz(i,j,k)))
|
||||||
|
endif
|
||||||
|
|
||||||
Gamxzz =HALF*( gupxx*(TWO*gxzz - gzzx ) + gupxy*(TWO*gyzz - gzzy ) + gupxz*gzzz)
|
Gamxxx(i,j,k)=HALF*( gupxx_loc*gxxx(i,j,k) + gupxy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupxz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||||
Gamyzz =HALF*( gupxy*(TWO*gxzz - gzzx ) + gupyy*(TWO*gyzz - gzzy ) + gupyz*gzzz)
|
Gamyxx(i,j,k)=HALF*( gupxy_loc*gxxx(i,j,k) + gupyy_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupyz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||||
Gamzzz =HALF*( gupxz*(TWO*gxzz - gzzx ) + gupyz*(TWO*gyzz - gzzy ) + gupzz*gzzz)
|
Gamzxx(i,j,k)=HALF*( gupxz_loc*gxxx(i,j,k) + gupyz_loc*(TWO*gxyx(i,j,k) - gxxy(i,j,k)) + gupzz_loc*(TWO*gxzx(i,j,k) - gxxz(i,j,k)))
|
||||||
|
|
||||||
Gamxxy =HALF*( gupxx*gxxy + gupxy*gyyx + gupxz*( gxzy + gyzx - gxyz ) )
|
Gamxyy(i,j,k)=HALF*( gupxx_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupxy_loc*gyyy(i,j,k) + gupxz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||||
Gamyxy =HALF*( gupxy*gxxy + gupyy*gyyx + gupyz*( gxzy + gyzx - gxyz ) )
|
Gamyyy(i,j,k)=HALF*( gupxy_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyy_loc*gyyy(i,j,k) + gupyz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||||
Gamzxy =HALF*( gupxz*gxxy + gupyz*gyyx + gupzz*( gxzy + gyzx - gxyz ) )
|
Gamzyy(i,j,k)=HALF*( gupxz_loc*(TWO*gxyy(i,j,k) - gyyx(i,j,k)) + gupyz_loc*gyyy(i,j,k) + gupzz_loc*(TWO*gyzy(i,j,k) - gyyz(i,j,k)))
|
||||||
|
|
||||||
Gamxxz =HALF*( gupxx*gxxz + gupxy*( gxyz + gyzx - gxzy ) + gupxz*gzzx )
|
Gamxzz(i,j,k)=HALF*( gupxx_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupxy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupxz_loc*gzzz(i,j,k))
|
||||||
Gamyxz =HALF*( gupxy*gxxz + gupyy*( gxyz + gyzx - gxzy ) + gupyz*gzzx )
|
Gamyzz(i,j,k)=HALF*( gupxy_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyy_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupyz_loc*gzzz(i,j,k))
|
||||||
Gamzxz =HALF*( gupxz*gxxz + gupyz*( gxyz + gyzx - gxzy ) + gupzz*gzzx )
|
Gamzzz(i,j,k)=HALF*( gupxz_loc*(TWO*gxzz(i,j,k) - gzzx(i,j,k)) + gupyz_loc*(TWO*gyzz(i,j,k) - gzzy(i,j,k)) + gupzz_loc*gzzz(i,j,k))
|
||||||
|
|
||||||
Gamxyz =HALF*( gupxx*( gxyz + gxzy - gyzx ) + gupxy*gyyz + gupxz*gzzy )
|
Gamxxy(i,j,k)=HALF*( gupxx_loc*gxxy(i,j,k) + gupxy_loc*gyyx(i,j,k) + gupxz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||||
Gamyyz =HALF*( gupxy*( gxyz + gxzy - gyzx ) + gupyy*gyyz + gupyz*gzzy )
|
Gamyxy(i,j,k)=HALF*( gupxy_loc*gxxy(i,j,k) + gupyy_loc*gyyx(i,j,k) + gupyz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||||
Gamzyz =HALF*( gupxz*( gxyz + gxzy - gyzx ) + gupyz*gyyz + gupzz*gzzy )
|
Gamzxy(i,j,k)=HALF*( gupxz_loc*gxxy(i,j,k) + gupyz_loc*gyyx(i,j,k) + gupzz_loc*(gxzy(i,j,k) + gyzx(i,j,k) - gxyz(i,j,k)) )
|
||||||
|
|
||||||
|
Gamxxz(i,j,k)=HALF*( gupxx_loc*gxxz(i,j,k) + gupxy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupxz_loc*gzzx(i,j,k) )
|
||||||
|
Gamyxz(i,j,k)=HALF*( gupxy_loc*gxxz(i,j,k) + gupyy_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupyz_loc*gzzx(i,j,k) )
|
||||||
|
Gamzxz(i,j,k)=HALF*( gupxz_loc*gxxz(i,j,k) + gupyz_loc*(gxyz(i,j,k) + gyzx(i,j,k) - gxzy(i,j,k)) + gupzz_loc*gzzx(i,j,k) )
|
||||||
|
|
||||||
|
Gamxyz(i,j,k)=HALF*( gupxx_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupxy_loc*gyyz(i,j,k) + gupxz_loc*gzzy(i,j,k) )
|
||||||
|
Gamyyz(i,j,k)=HALF*( gupxy_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyy_loc*gyyz(i,j,k) + gupyz_loc*gzzy(i,j,k) )
|
||||||
|
Gamzyz(i,j,k)=HALF*( gupxz_loc*(gxyz(i,j,k) + gxzy(i,j,k) - gyzx(i,j,k)) + gupyz_loc*gyyz(i,j,k) + gupzz_loc*gzzy(i,j,k) )
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
! Raise indices of \tilde A_{ij} and store in R_ij
|
! Raise indices of \tilde A_{ij} and store in R_ij
|
||||||
|
|
||||||
Rxx = gupxx * gupxx * Axx + gupxy * gupxy * Ayy + gupxz * gupxz * Azz + &
|
|
||||||
TWO*(gupxx * gupxy * Axy + gupxx * gupxz * Axz + gupxy * gupxz * Ayz)
|
|
||||||
|
|
||||||
Ryy = gupxy * gupxy * Axx + gupyy * gupyy * Ayy + gupyz * gupyz * Azz + &
|
|
||||||
TWO*(gupxy * gupyy * Axy + gupxy * gupyz * Axz + gupyy * gupyz * Ayz)
|
|
||||||
|
|
||||||
Rzz = gupxz * gupxz * Axx + gupyz * gupyz * Ayy + gupzz * gupzz * Azz + &
|
|
||||||
TWO*(gupxz * gupyz * Axy + gupxz * gupzz * Axz + gupyz * gupzz * Ayz)
|
|
||||||
|
|
||||||
Rxy = gupxx * gupxy * Axx + gupxy * gupyy * Ayy + gupxz * gupyz * Azz + &
|
|
||||||
(gupxx * gupyy + gupxy * gupxy)* Axy + &
|
|
||||||
(gupxx * gupyz + gupxz * gupxy)* Axz + &
|
|
||||||
(gupxy * gupyz + gupxz * gupyy)* Ayz
|
|
||||||
|
|
||||||
Rxz = gupxx * gupxz * Axx + gupxy * gupyz * Ayy + gupxz * gupzz * Azz + &
|
|
||||||
(gupxx * gupyz + gupxy * gupxz)* Axy + &
|
|
||||||
(gupxx * gupzz + gupxz * gupxz)* Axz + &
|
|
||||||
(gupxy * gupzz + gupxz * gupyz)* Ayz
|
|
||||||
|
|
||||||
Ryz = gupxy * gupxz * Axx + gupyy * gupyz * Ayy + gupyz * gupzz * Azz + &
|
|
||||||
(gupxy * gupyz + gupyy * gupxz)* Axy + &
|
|
||||||
(gupxy * gupzz + gupyz * gupxz)* Axz + &
|
|
||||||
(gupyy * gupzz + gupyz * gupyz)* Ayz
|
|
||||||
|
|
||||||
! Right hand side for Gam^i without shift terms...
|
! Right hand side for Gam^i without shift terms...
|
||||||
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fderivs(ex,Lap,Lapx,Lapy,Lapz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||||
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
call fderivs(ex,trK,Kx,Ky,Kz,X,Y,Z,SYM,SYM,SYM,symmetry,Lev)
|
||||||
|
do k=1,ex(3)
|
||||||
|
do j=1,ex(2)
|
||||||
|
do i=1,ex(1)
|
||||||
|
gupxx_loc = gupxx(i,j,k)
|
||||||
|
gupxy_loc = gupxy(i,j,k)
|
||||||
|
gupxz_loc = gupxz(i,j,k)
|
||||||
|
gupyy_loc = gupyy(i,j,k)
|
||||||
|
gupyz_loc = gupyz(i,j,k)
|
||||||
|
gupzz_loc = gupzz(i,j,k)
|
||||||
|
|
||||||
Gamx_rhs = - TWO * ( Lapx * Rxx + Lapy * Rxy + Lapz * Rxz ) + &
|
Rxx_loc = gupxx_loc * gupxx_loc * Axx(i,j,k) + gupxy_loc * gupxy_loc * Ayy(i,j,k) + gupxz_loc * gupxz_loc * Azz(i,j,k) + &
|
||||||
TWO * alpn1 * ( &
|
TWO * (gupxx_loc * gupxy_loc * Axy(i,j,k) + gupxx_loc * gupxz_loc * Axz(i,j,k) + gupxy_loc * gupxz_loc * Ayz(i,j,k))
|
||||||
-F3o2/chin1 * ( chix * Rxx + chiy * Rxy + chiz * Rxz ) - &
|
Ryy_loc = gupxy_loc * gupxy_loc * Axx(i,j,k) + gupyy_loc * gupyy_loc * Ayy(i,j,k) + gupyz_loc * gupyz_loc * Azz(i,j,k) + &
|
||||||
gupxx * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
TWO * (gupxy_loc * gupyy_loc * Axy(i,j,k) + gupxy_loc * gupyz_loc * Axz(i,j,k) + gupyy_loc * gupyz_loc * Ayz(i,j,k))
|
||||||
gupxy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
Rzz_loc = gupxz_loc * gupxz_loc * Axx(i,j,k) + gupyz_loc * gupyz_loc * Ayy(i,j,k) + gupzz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||||
gupxz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
TWO * (gupxz_loc * gupyz_loc * Axy(i,j,k) + gupxz_loc * gupzz_loc * Axz(i,j,k) + gupyz_loc * gupzz_loc * Ayz(i,j,k))
|
||||||
Gamxxx * Rxx + Gamxyy * Ryy + Gamxzz * Rzz + &
|
Rxy_loc = gupxx_loc * gupxy_loc * Axx(i,j,k) + gupxy_loc * gupyy_loc * Ayy(i,j,k) + gupxz_loc * gupyz_loc * Azz(i,j,k) + &
|
||||||
TWO * ( Gamxxy * Rxy + Gamxxz * Rxz + Gamxyz * Ryz ) )
|
(gupxx_loc * gupyy_loc + gupxy_loc * gupxy_loc) * Axy(i,j,k) + &
|
||||||
|
(gupxx_loc * gupyz_loc + gupxz_loc * gupxy_loc) * Axz(i,j,k) + &
|
||||||
|
(gupxy_loc * gupyz_loc + gupxz_loc * gupyy_loc) * Ayz(i,j,k)
|
||||||
|
Rxz_loc = gupxx_loc * gupxz_loc * Axx(i,j,k) + gupxy_loc * gupyz_loc * Ayy(i,j,k) + gupxz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||||
|
(gupxx_loc * gupyz_loc + gupxy_loc * gupxz_loc) * Axy(i,j,k) + &
|
||||||
|
(gupxx_loc * gupzz_loc + gupxz_loc * gupxz_loc) * Axz(i,j,k) + &
|
||||||
|
(gupxy_loc * gupzz_loc + gupxz_loc * gupyz_loc) * Ayz(i,j,k)
|
||||||
|
Ryz_loc = gupxy_loc * gupxz_loc * Axx(i,j,k) + gupyy_loc * gupyz_loc * Ayy(i,j,k) + gupyz_loc * gupzz_loc * Azz(i,j,k) + &
|
||||||
|
(gupxy_loc * gupyz_loc + gupyy_loc * gupxz_loc) * Axy(i,j,k) + &
|
||||||
|
(gupxy_loc * gupzz_loc + gupyz_loc * gupxz_loc) * Axz(i,j,k) + &
|
||||||
|
(gupyy_loc * gupzz_loc + gupyz_loc * gupyz_loc) * Ayz(i,j,k)
|
||||||
|
Rxx(i,j,k) = Rxx_loc
|
||||||
|
Ryy(i,j,k) = Ryy_loc
|
||||||
|
Rzz(i,j,k) = Rzz_loc
|
||||||
|
Rxy(i,j,k) = Rxy_loc
|
||||||
|
Rxz(i,j,k) = Rxz_loc
|
||||||
|
Ryz(i,j,k) = Ryz_loc
|
||||||
|
|
||||||
Gamy_rhs = - TWO * ( Lapx * Rxy + Lapy * Ryy + Lapz * Ryz ) + &
|
Gamx_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxx_loc + Lapy(i,j,k) * Rxy_loc + Lapz(i,j,k) * Rxz_loc) + &
|
||||||
TWO * alpn1 * ( &
|
TWO * alpn1(i,j,k) * ( &
|
||||||
-F3o2/chin1 * ( chix * Rxy + chiy * Ryy + chiz * Ryz ) - &
|
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxx_loc + chiy(i,j,k) * Rxy_loc + chiz(i,j,k) * Rxz_loc) - &
|
||||||
gupxy * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
gupxx_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||||
gupyy * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
gupxy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||||
gupyz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
gupxz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||||
Gamyxx * Rxx + Gamyyy * Ryy + Gamyzz * Rzz + &
|
Gamxxx(i,j,k) * Rxx_loc + Gamxyy(i,j,k) * Ryy_loc + Gamxzz(i,j,k) * Rzz_loc + &
|
||||||
TWO * ( Gamyxy * Rxy + Gamyxz * Rxz + Gamyyz * Ryz ) )
|
TWO * (Gamxxy(i,j,k) * Rxy_loc + Gamxxz(i,j,k) * Rxz_loc + Gamxyz(i,j,k) * Ryz_loc))
|
||||||
|
|
||||||
Gamz_rhs = - TWO * ( Lapx * Rxz + Lapy * Ryz + Lapz * Rzz ) + &
|
Gamy_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxy_loc + Lapy(i,j,k) * Ryy_loc + Lapz(i,j,k) * Ryz_loc) + &
|
||||||
TWO * alpn1 * ( &
|
TWO * alpn1(i,j,k) * ( &
|
||||||
-F3o2/chin1 * ( chix * Rxz + chiy * Ryz + chiz * Rzz ) - &
|
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxy_loc + chiy(i,j,k) * Ryy_loc + chiz(i,j,k) * Ryz_loc) - &
|
||||||
gupxz * ( F2o3 * Kx + EIGHT * PI * Sx ) - &
|
gupxy_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||||
gupyz * ( F2o3 * Ky + EIGHT * PI * Sy ) - &
|
gupyy_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||||
gupzz * ( F2o3 * Kz + EIGHT * PI * Sz ) + &
|
gupyz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||||
Gamzxx * Rxx + Gamzyy * Ryy + Gamzzz * Rzz + &
|
Gamyxx(i,j,k) * Rxx_loc + Gamyyy(i,j,k) * Ryy_loc + Gamyzz(i,j,k) * Rzz_loc + &
|
||||||
TWO * ( Gamzxy * Rxy + Gamzxz * Rxz + Gamzyz * Ryz ) )
|
TWO * (Gamyxy(i,j,k) * Rxy_loc + Gamyxz(i,j,k) * Rxz_loc + Gamyyz(i,j,k) * Ryz_loc))
|
||||||
|
|
||||||
|
Gamz_rhs(i,j,k) = - TWO * (Lapx(i,j,k) * Rxz_loc + Lapy(i,j,k) * Ryz_loc + Lapz(i,j,k) * Rzz_loc) + &
|
||||||
|
TWO * alpn1(i,j,k) * ( &
|
||||||
|
-F3o2/chin1(i,j,k) * (chix(i,j,k) * Rxz_loc + chiy(i,j,k) * Ryz_loc + chiz(i,j,k) * Rzz_loc) - &
|
||||||
|
gupxz_loc * (F2o3 * Kx(i,j,k) + EIGHT * PI * Sx(i,j,k)) - &
|
||||||
|
gupyz_loc * (F2o3 * Ky(i,j,k) + EIGHT * PI * Sy(i,j,k)) - &
|
||||||
|
gupzz_loc * (F2o3 * Kz(i,j,k) + EIGHT * PI * Sz(i,j,k)) + &
|
||||||
|
Gamzxx(i,j,k) * Rxx_loc + Gamzyy(i,j,k) * Ryy_loc + Gamzzz(i,j,k) * Rzz_loc + &
|
||||||
|
TWO * (Gamzxy(i,j,k) * Rxy_loc + Gamzxz(i,j,k) * Rxz_loc + Gamzyz(i,j,k) * Ryz_loc))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
|
call fdderivs(ex,betax,gxxx,gxyx,gxzx,gyyx,gyzx,gzzx,&
|
||||||
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
|
X,Y,Z,ANTI,SYM, SYM ,Symmetry,Lev)
|
||||||
@@ -321,38 +359,54 @@
|
|||||||
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
|
call fdderivs(ex,betaz,gxxz,gxyz,gxzz,gyyz,gyzz,gzzz,&
|
||||||
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
|
X,Y,Z,SYM ,SYM, ANTI,Symmetry,Lev)
|
||||||
|
|
||||||
fxx = gxxx + gxyy + gxzz
|
|
||||||
fxy = gxyx + gyyy + gyzz
|
|
||||||
fxz = gxzx + gyzy + gzzz
|
|
||||||
|
|
||||||
Gamxa = gupxx * Gamxxx + gupyy * Gamxyy + gupzz * Gamxzz + &
|
|
||||||
TWO*( gupxy * Gamxxy + gupxz * Gamxxz + gupyz * Gamxyz )
|
|
||||||
Gamya = gupxx * Gamyxx + gupyy * Gamyyy + gupzz * Gamyzz + &
|
|
||||||
TWO*( gupxy * Gamyxy + gupxz * Gamyxz + gupyz * Gamyyz )
|
|
||||||
Gamza = gupxx * Gamzxx + gupyy * Gamzyy + gupzz * Gamzzz + &
|
|
||||||
TWO*( gupxy * Gamzxy + gupxz * Gamzxz + gupyz * Gamzyz )
|
|
||||||
|
|
||||||
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
|
call fderivs(ex,Gamx,Gamxx,Gamxy,Gamxz,X,Y,Z,ANTI,SYM ,SYM ,Symmetry,Lev)
|
||||||
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
call fderivs(ex,Gamy,Gamyx,Gamyy,Gamyz,X,Y,Z,SYM ,ANTI,SYM ,Symmetry,Lev)
|
||||||
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
|
call fderivs(ex,Gamz,Gamzx,Gamzy,Gamzz,X,Y,Z,SYM ,SYM ,ANTI,Symmetry,Lev)
|
||||||
|
do k=1,ex(3)
|
||||||
|
do j=1,ex(2)
|
||||||
|
do i=1,ex(1)
|
||||||
|
divb_loc = div_beta(i,j,k)
|
||||||
|
fxx_loc = gxxx(i,j,k) + gxyy(i,j,k) + gxzz(i,j,k)
|
||||||
|
fxy_loc = gxyx(i,j,k) + gyyy(i,j,k) + gyzz(i,j,k)
|
||||||
|
fxz_loc = gxzx(i,j,k) + gyzy(i,j,k) + gzzz(i,j,k)
|
||||||
|
|
||||||
Gamx_rhs = Gamx_rhs + F2o3 * Gamxa * div_beta - &
|
gupxx_loc = gupxx(i,j,k)
|
||||||
Gamxa * betaxx - Gamya * betaxy - Gamza * betaxz + &
|
gupxy_loc = gupxy(i,j,k)
|
||||||
F1o3 * (gupxx * fxx + gupxy * fxy + gupxz * fxz ) + &
|
gupxz_loc = gupxz(i,j,k)
|
||||||
gupxx * gxxx + gupyy * gyyx + gupzz * gzzx + &
|
gupyy_loc = gupyy(i,j,k)
|
||||||
TWO * (gupxy * gxyx + gupxz * gxzx + gupyz * gyzx )
|
gupyz_loc = gupyz(i,j,k)
|
||||||
|
gupzz_loc = gupzz(i,j,k)
|
||||||
|
|
||||||
Gamy_rhs = Gamy_rhs + F2o3 * Gamya * div_beta - &
|
Gamxa_loc = gupxx_loc * Gamxxx(i,j,k) + gupyy_loc * Gamxyy(i,j,k) + gupzz_loc * Gamxzz(i,j,k) + &
|
||||||
Gamxa * betayx - Gamya * betayy - Gamza * betayz + &
|
TWO * (gupxy_loc * Gamxxy(i,j,k) + gupxz_loc * Gamxxz(i,j,k) + gupyz_loc * Gamxyz(i,j,k))
|
||||||
F1o3 * (gupxy * fxx + gupyy * fxy + gupyz * fxz ) + &
|
Gamya_loc = gupxx_loc * Gamyxx(i,j,k) + gupyy_loc * Gamyyy(i,j,k) + gupzz_loc * Gamyzz(i,j,k) + &
|
||||||
gupxx * gxxy + gupyy * gyyy + gupzz * gzzy + &
|
TWO * (gupxy_loc * Gamyxy(i,j,k) + gupxz_loc * Gamyxz(i,j,k) + gupyz_loc * Gamyyz(i,j,k))
|
||||||
TWO * (gupxy * gxyy + gupxz * gxzy + gupyz * gyzy )
|
Gamza_loc = gupxx_loc * Gamzxx(i,j,k) + gupyy_loc * Gamzyy(i,j,k) + gupzz_loc * Gamzzz(i,j,k) + &
|
||||||
|
TWO * (gupxy_loc * Gamzxy(i,j,k) + gupxz_loc * Gamzxz(i,j,k) + gupyz_loc * Gamzyz(i,j,k))
|
||||||
|
Gamxa(i,j,k) = Gamxa_loc
|
||||||
|
Gamya(i,j,k) = Gamya_loc
|
||||||
|
Gamza(i,j,k) = Gamza_loc
|
||||||
|
|
||||||
Gamz_rhs = Gamz_rhs + F2o3 * Gamza * div_beta - &
|
Gamx_rhs(i,j,k) = Gamx_rhs(i,j,k) + F2o3 * Gamxa_loc * divb_loc - &
|
||||||
Gamxa * betazx - Gamya * betazy - Gamza * betazz + &
|
Gamxa_loc * betaxx(i,j,k) - Gamya_loc * betaxy(i,j,k) - Gamza_loc * betaxz(i,j,k) + &
|
||||||
F1o3 * (gupxz * fxx + gupyz * fxy + gupzz * fxz ) + &
|
F1o3 * (gupxx_loc * fxx_loc + gupxy_loc * fxy_loc + gupxz_loc * fxz_loc) + &
|
||||||
gupxx * gxxz + gupyy * gyyz + gupzz * gzzz + &
|
gupxx_loc * gxxx(i,j,k) + gupyy_loc * gyyx(i,j,k) + gupzz_loc * gzzx(i,j,k) + &
|
||||||
TWO * (gupxy * gxyz + gupxz * gxzz + gupyz * gyzz ) !rhs for Gam^i
|
TWO * (gupxy_loc * gxyx(i,j,k) + gupxz_loc * gxzx(i,j,k) + gupyz_loc * gyzx(i,j,k))
|
||||||
|
|
||||||
|
Gamy_rhs(i,j,k) = Gamy_rhs(i,j,k) + F2o3 * Gamya_loc * divb_loc - &
|
||||||
|
Gamxa_loc * betayx(i,j,k) - Gamya_loc * betayy(i,j,k) - Gamza_loc * betayz(i,j,k) + &
|
||||||
|
F1o3 * (gupxy_loc * fxx_loc + gupyy_loc * fxy_loc + gupyz_loc * fxz_loc) + &
|
||||||
|
gupxx_loc * gxxy(i,j,k) + gupyy_loc * gyyy(i,j,k) + gupzz_loc * gzzy(i,j,k) + &
|
||||||
|
TWO * (gupxy_loc * gxyy(i,j,k) + gupxz_loc * gxzy(i,j,k) + gupyz_loc * gyzy(i,j,k))
|
||||||
|
|
||||||
|
Gamz_rhs(i,j,k) = Gamz_rhs(i,j,k) + F2o3 * Gamza_loc * divb_loc - &
|
||||||
|
Gamxa_loc * betazx(i,j,k) - Gamya_loc * betazy(i,j,k) - Gamza_loc * betazz(i,j,k) + &
|
||||||
|
F1o3 * (gupxz_loc * fxx_loc + gupyz_loc * fxy_loc + gupzz_loc * fxz_loc) + &
|
||||||
|
gupxx_loc * gxxz(i,j,k) + gupyy_loc * gyyz(i,j,k) + gupzz_loc * gzzz(i,j,k) + &
|
||||||
|
TWO * (gupxy_loc * gxyz(i,j,k) + gupxz_loc * gxzz(i,j,k) + gupyz_loc * gyzz(i,j,k))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
!first kind of connection stored in gij,k
|
!first kind of connection stored in gij,k
|
||||||
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
|
gxxx = gxx * Gamxxx + gxy * Gamyxx + gxz * Gamzxx
|
||||||
@@ -604,189 +658,187 @@
|
|||||||
!covariant second derivative of chi respect to tilted metric
|
!covariant second derivative of chi respect to tilted metric
|
||||||
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
call fdderivs(ex,chi,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z,SYM,SYM,SYM,Symmetry,Lev)
|
||||||
|
|
||||||
fxx = fxx - Gamxxx * chix - Gamyxx * chiy - Gamzxx * chiz
|
do k=1,ex(3)
|
||||||
fxy = fxy - Gamxxy * chix - Gamyxy * chiy - Gamzxy * chiz
|
do j=1,ex(2)
|
||||||
fxz = fxz - Gamxxz * chix - Gamyxz * chiy - Gamzxz * chiz
|
do i=1,ex(1)
|
||||||
fyy = fyy - Gamxyy * chix - Gamyyy * chiy - Gamzyy * chiz
|
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k) * chix(i,j,k) - Gamyxx(i,j,k) * chiy(i,j,k) - Gamzxx(i,j,k) * chiz(i,j,k)
|
||||||
fyz = fyz - Gamxyz * chix - Gamyyz * chiy - Gamzyz * chiz
|
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k) * chix(i,j,k) - Gamyxy(i,j,k) * chiy(i,j,k) - Gamzxy(i,j,k) * chiz(i,j,k)
|
||||||
fzz = fzz - Gamxzz * chix - Gamyzz * chiy - Gamzzz * chiz
|
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k) * chix(i,j,k) - Gamyxz(i,j,k) * chiy(i,j,k) - Gamzxz(i,j,k) * chiz(i,j,k)
|
||||||
! Store D^l D_l chi - 3/(2*chi) D^l chi D_l chi in f
|
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k) * chix(i,j,k) - Gamyyy(i,j,k) * chiy(i,j,k) - Gamzyy(i,j,k) * chiz(i,j,k)
|
||||||
|
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k) * chix(i,j,k) - Gamyyz(i,j,k) * chiy(i,j,k) - Gamzyz(i,j,k) * chiz(i,j,k)
|
||||||
|
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k) * chix(i,j,k) - Gamyzz(i,j,k) * chiy(i,j,k) - Gamzzz(i,j,k) * chiz(i,j,k)
|
||||||
|
|
||||||
f = gupxx * ( fxx - F3o2/chin1 * chix * chix ) + &
|
chin_loc = chin1(i,j,k)
|
||||||
gupyy * ( fyy - F3o2/chin1 * chiy * chiy ) + &
|
f_loc = gupxx(i,j,k) * (fxx(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chix(i,j,k)) + &
|
||||||
gupzz * ( fzz - F3o2/chin1 * chiz * chiz ) + &
|
gupyy(i,j,k) * (fyy(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiy(i,j,k)) + &
|
||||||
TWO * gupxy * ( fxy - F3o2/chin1 * chix * chiy ) + &
|
gupzz(i,j,k) * (fzz(i,j,k) - F3o2/chin_loc * chiz(i,j,k) * chiz(i,j,k)) + &
|
||||||
TWO * gupxz * ( fxz - F3o2/chin1 * chix * chiz ) + &
|
TWO * gupxy(i,j,k) * (fxy(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiy(i,j,k)) + &
|
||||||
TWO * gupyz * ( fyz - F3o2/chin1 * chiy * chiz )
|
TWO * gupxz(i,j,k) * (fxz(i,j,k) - F3o2/chin_loc * chix(i,j,k) * chiz(i,j,k)) + &
|
||||||
! Add chi part to Ricci tensor:
|
TWO * gupyz(i,j,k) * (fyz(i,j,k) - F3o2/chin_loc * chiy(i,j,k) * chiz(i,j,k))
|
||||||
|
f(i,j,k) = f_loc
|
||||||
|
|
||||||
Rxx = Rxx + (fxx - chix*chix/chin1/TWO + gxx * f)/chin1/TWO
|
Rxx(i,j,k) = Rxx(i,j,k) + (fxx(i,j,k) - chix(i,j,k)*chix(i,j,k)/chin_loc/TWO + gxx(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
Ryy = Ryy + (fyy - chiy*chiy/chin1/TWO + gyy * f)/chin1/TWO
|
Ryy(i,j,k) = Ryy(i,j,k) + (fyy(i,j,k) - chiy(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gyy(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
Rzz = Rzz + (fzz - chiz*chiz/chin1/TWO + gzz * f)/chin1/TWO
|
Rzz(i,j,k) = Rzz(i,j,k) + (fzz(i,j,k) - chiz(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gzz(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
Rxy = Rxy + (fxy - chix*chiy/chin1/TWO + gxy * f)/chin1/TWO
|
Rxy(i,j,k) = Rxy(i,j,k) + (fxy(i,j,k) - chix(i,j,k)*chiy(i,j,k)/chin_loc/TWO + gxy(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
Rxz = Rxz + (fxz - chix*chiz/chin1/TWO + gxz * f)/chin1/TWO
|
Rxz(i,j,k) = Rxz(i,j,k) + (fxz(i,j,k) - chix(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gxz(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
Ryz = Ryz + (fyz - chiy*chiz/chin1/TWO + gyz * f)/chin1/TWO
|
Ryz(i,j,k) = Ryz(i,j,k) + (fyz(i,j,k) - chiy(i,j,k)*chiz(i,j,k)/chin_loc/TWO + gyz(i,j,k) * f_loc)/chin_loc/TWO
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
! covariant second derivatives of the lapse respect to physical metric
|
! covariant second derivatives of the lapse respect to physical metric
|
||||||
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
call fdderivs(ex,Lap,fxx,fxy,fxz,fyy,fyz,fzz,X,Y,Z, &
|
||||||
SYM,SYM,SYM,symmetry,Lev)
|
SYM,SYM,SYM,symmetry,Lev)
|
||||||
|
|
||||||
gxxx = (gupxx * chix + gupxy * chiy + gupxz * chiz)/chin1
|
do k=1,ex(3)
|
||||||
gxxy = (gupxy * chix + gupyy * chiy + gupyz * chiz)/chin1
|
do j=1,ex(2)
|
||||||
gxxz = (gupxz * chix + gupyz * chiy + gupzz * chiz)/chin1
|
do i=1,ex(1)
|
||||||
! now get physical second kind of connection
|
chin_loc = chin1(i,j,k)
|
||||||
Gamxxx = Gamxxx - ( (chix + chix)/chin1 - gxx * gxxx )*HALF
|
gxxx(i,j,k) = (gupxx(i,j,k) * chix(i,j,k) + gupxy(i,j,k) * chiy(i,j,k) + gupxz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||||
Gamyxx = Gamyxx - ( - gxx * gxxy )*HALF
|
gxxy(i,j,k) = (gupxy(i,j,k) * chix(i,j,k) + gupyy(i,j,k) * chiy(i,j,k) + gupyz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||||
Gamzxx = Gamzxx - ( - gxx * gxxz )*HALF
|
gxxz(i,j,k) = (gupxz(i,j,k) * chix(i,j,k) + gupyz(i,j,k) * chiy(i,j,k) + gupzz(i,j,k) * chiz(i,j,k)) / chin_loc
|
||||||
Gamxyy = Gamxyy - ( - gyy * gxxx )*HALF
|
|
||||||
Gamyyy = Gamyyy - ( (chiy + chiy)/chin1 - gyy * gxxy )*HALF
|
|
||||||
Gamzyy = Gamzyy - ( - gyy * gxxz )*HALF
|
|
||||||
Gamxzz = Gamxzz - ( - gzz * gxxx )*HALF
|
|
||||||
Gamyzz = Gamyzz - ( - gzz * gxxy )*HALF
|
|
||||||
Gamzzz = Gamzzz - ( (chiz + chiz)/chin1 - gzz * gxxz )*HALF
|
|
||||||
Gamxxy = Gamxxy - ( chiy /chin1 - gxy * gxxx )*HALF
|
|
||||||
Gamyxy = Gamyxy - ( chix /chin1 - gxy * gxxy )*HALF
|
|
||||||
Gamzxy = Gamzxy - ( - gxy * gxxz )*HALF
|
|
||||||
Gamxxz = Gamxxz - ( chiz /chin1 - gxz * gxxx )*HALF
|
|
||||||
Gamyxz = Gamyxz - ( - gxz * gxxy )*HALF
|
|
||||||
Gamzxz = Gamzxz - ( chix /chin1 - gxz * gxxz )*HALF
|
|
||||||
Gamxyz = Gamxyz - ( - gyz * gxxx )*HALF
|
|
||||||
Gamyyz = Gamyyz - ( chiz /chin1 - gyz * gxxy )*HALF
|
|
||||||
Gamzyz = Gamzyz - ( chiy /chin1 - gyz * gxxz )*HALF
|
|
||||||
|
|
||||||
fxx = fxx - Gamxxx*Lapx - Gamyxx*Lapy - Gamzxx*Lapz
|
Gamxxx(i,j,k) = Gamxxx(i,j,k) - ( (chix(i,j,k) + chix(i,j,k))/chin_loc - gxx(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
fyy = fyy - Gamxyy*Lapx - Gamyyy*Lapy - Gamzyy*Lapz
|
Gamyxx(i,j,k) = Gamyxx(i,j,k) - ( - gxx(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
fzz = fzz - Gamxzz*Lapx - Gamyzz*Lapy - Gamzzz*Lapz
|
Gamzxx(i,j,k) = Gamzxx(i,j,k) - ( - gxx(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
fxy = fxy - Gamxxy*Lapx - Gamyxy*Lapy - Gamzxy*Lapz
|
Gamxyy(i,j,k) = Gamxyy(i,j,k) - ( - gyy(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
fxz = fxz - Gamxxz*Lapx - Gamyxz*Lapy - Gamzxz*Lapz
|
Gamyyy(i,j,k) = Gamyyy(i,j,k) - ( (chiy(i,j,k) + chiy(i,j,k))/chin_loc - gyy(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
fyz = fyz - Gamxyz*Lapx - Gamyyz*Lapy - Gamzyz*Lapz
|
Gamzyy(i,j,k) = Gamzyy(i,j,k) - ( - gyy(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
|
Gamxzz(i,j,k) = Gamxzz(i,j,k) - ( - gzz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
|
Gamyzz(i,j,k) = Gamyzz(i,j,k) - ( - gzz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
|
Gamzzz(i,j,k) = Gamzzz(i,j,k) - ( (chiz(i,j,k) + chiz(i,j,k))/chin_loc - gzz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
|
Gamxxy(i,j,k) = Gamxxy(i,j,k) - ( chiy(i,j,k) /chin_loc - gxy(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
|
Gamyxy(i,j,k) = Gamyxy(i,j,k) - ( chix(i,j,k) /chin_loc - gxy(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
|
Gamzxy(i,j,k) = Gamzxy(i,j,k) - ( - gxy(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
|
Gamxxz(i,j,k) = Gamxxz(i,j,k) - ( chiz(i,j,k) /chin_loc - gxz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
|
Gamyxz(i,j,k) = Gamyxz(i,j,k) - ( - gxz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
|
Gamzxz(i,j,k) = Gamzxz(i,j,k) - ( chix(i,j,k) /chin_loc - gxz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
|
Gamxyz(i,j,k) = Gamxyz(i,j,k) - ( - gyz(i,j,k) * gxxx(i,j,k) )*HALF
|
||||||
|
Gamyyz(i,j,k) = Gamyyz(i,j,k) - ( chiz(i,j,k) /chin_loc - gyz(i,j,k) * gxxy(i,j,k) )*HALF
|
||||||
|
Gamzyz(i,j,k) = Gamzyz(i,j,k) - ( chiy(i,j,k) /chin_loc - gyz(i,j,k) * gxxz(i,j,k) )*HALF
|
||||||
|
|
||||||
! store D^i D_i Lap in trK_rhs upto chi
|
fxx(i,j,k) = fxx(i,j,k) - Gamxxx(i,j,k)*Lapx(i,j,k) - Gamyxx(i,j,k)*Lapy(i,j,k) - Gamzxx(i,j,k)*Lapz(i,j,k)
|
||||||
trK_rhs = gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
fyy(i,j,k) = fyy(i,j,k) - Gamxyy(i,j,k)*Lapx(i,j,k) - Gamyyy(i,j,k)*Lapy(i,j,k) - Gamzyy(i,j,k)*Lapz(i,j,k)
|
||||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz )
|
fzz(i,j,k) = fzz(i,j,k) - Gamxzz(i,j,k)*Lapx(i,j,k) - Gamyzz(i,j,k)*Lapy(i,j,k) - Gamzzz(i,j,k)*Lapz(i,j,k)
|
||||||
#if 1
|
fxy(i,j,k) = fxy(i,j,k) - Gamxxy(i,j,k)*Lapx(i,j,k) - Gamyxy(i,j,k)*Lapy(i,j,k) - Gamzxy(i,j,k)*Lapz(i,j,k)
|
||||||
!! follow bam code
|
fxz(i,j,k) = fxz(i,j,k) - Gamxxz(i,j,k)*Lapx(i,j,k) - Gamyxz(i,j,k)*Lapy(i,j,k) - Gamzxz(i,j,k)*Lapz(i,j,k)
|
||||||
S = chin1 * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
|
fyz(i,j,k) = fyz(i,j,k) - Gamxyz(i,j,k)*Lapx(i,j,k) - Gamyyz(i,j,k)*Lapy(i,j,k) - Gamzyz(i,j,k)*Lapz(i,j,k)
|
||||||
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
|
|
||||||
f = F2o3 * trK * trK -(&
|
|
||||||
gupxx * ( &
|
|
||||||
gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
|
|
||||||
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz) ) + &
|
|
||||||
gupyy * ( &
|
|
||||||
gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
|
|
||||||
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz) ) + &
|
|
||||||
gupzz * ( &
|
|
||||||
gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
|
|
||||||
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz) ) + &
|
|
||||||
TWO * ( &
|
|
||||||
gupxy * ( &
|
|
||||||
gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
|
|
||||||
gupxy * (Axx * Ayy + Axy * Axy) + &
|
|
||||||
gupxz * (Axx * Ayz + Axz * Axy) + &
|
|
||||||
gupyz * (Axy * Ayz + Axz * Ayy) ) + &
|
|
||||||
gupxz * ( &
|
|
||||||
gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
|
|
||||||
gupxy * (Axx * Ayz + Axy * Axz) + &
|
|
||||||
gupxz * (Axx * Azz + Axz * Axz) + &
|
|
||||||
gupyz * (Axy * Azz + Axz * Ayz) ) + &
|
|
||||||
gupyz * ( &
|
|
||||||
gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
|
|
||||||
gupxy * (Axy * Ayz + Ayy * Axz) + &
|
|
||||||
gupxz * (Axy * Azz + Ayz * Axz) + &
|
|
||||||
gupyz * (Ayy * Azz + Ayz * Ayz) ) )) -1.6d1*PI*rho + EIGHT * PI * S
|
|
||||||
f = - F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
|
||||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + alpn1/chin1*f)
|
|
||||||
|
|
||||||
fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
|
trK_rhs(i,j,k) = gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||||
fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
|
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k))
|
||||||
fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
|
enddo
|
||||||
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
|
enddo
|
||||||
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
|
enddo
|
||||||
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
|
do k=1,ex(3)
|
||||||
#else
|
do j=1,ex(2)
|
||||||
! Add lapse and S_ij parts to Ricci tensor:
|
do i=1,ex(1)
|
||||||
|
divb_loc = div_beta(i,j,k)
|
||||||
|
chin_loc = chin1(i,j,k)
|
||||||
|
|
||||||
fxx = alpn1 * (Rxx - EIGHT * PI * Sxx) - fxx
|
S_loc = chin_loc * ( gupxx(i,j,k) * Sxx(i,j,k) + gupyy(i,j,k) * Syy(i,j,k) + gupzz(i,j,k) * Szz(i,j,k) + &
|
||||||
fxy = alpn1 * (Rxy - EIGHT * PI * Sxy) - fxy
|
TWO * (gupxy(i,j,k) * Sxy(i,j,k) + gupxz(i,j,k) * Sxz(i,j,k) + gupyz(i,j,k) * Syz(i,j,k)) )
|
||||||
fxz = alpn1 * (Rxz - EIGHT * PI * Sxz) - fxz
|
S(i,j,k) = S_loc
|
||||||
fyy = alpn1 * (Ryy - EIGHT * PI * Syy) - fyy
|
|
||||||
fyz = alpn1 * (Ryz - EIGHT * PI * Syz) - fyz
|
|
||||||
fzz = alpn1 * (Rzz - EIGHT * PI * Szz) - fzz
|
|
||||||
|
|
||||||
! Compute trace-free part (note: chi^-1 and chi cancel!):
|
f_loc = F2o3 * trK(i,j,k) * trK(i,j,k) - ( &
|
||||||
|
gupxx(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + &
|
||||||
|
TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + &
|
||||||
|
gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k)) ) + &
|
||||||
|
gupyy(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k)) ) + &
|
||||||
|
gupzz(i,j,k) * ( gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + &
|
||||||
|
TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
|
||||||
|
gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k)) ) + &
|
||||||
|
TWO * ( gupxy(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
|
||||||
|
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
|
||||||
|
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k)) ) + &
|
||||||
|
gupxz(i,j,k) * ( gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + &
|
||||||
|
gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k)) ) + &
|
||||||
|
gupyz(i,j,k) * ( gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + &
|
||||||
|
gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k)) ) ) ) - &
|
||||||
|
F16 * PI * rho(i,j,k) + EIGHT * PI * S_loc
|
||||||
|
|
||||||
f = F1o3 *( gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
f_loc = -F1o3 * ( gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||||
TWO* ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) )
|
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + &
|
||||||
#endif
|
alpn1(i,j,k)/chin_loc * f_loc )
|
||||||
|
f(i,j,k) = f_loc
|
||||||
|
|
||||||
Axx_rhs = fxx - gxx * f
|
l_fxx = alpn1(i,j,k) * (Rxx(i,j,k) - EIGHT * PI * Sxx(i,j,k)) - fxx(i,j,k)
|
||||||
Ayy_rhs = fyy - gyy * f
|
l_fxy = alpn1(i,j,k) * (Rxy(i,j,k) - EIGHT * PI * Sxy(i,j,k)) - fxy(i,j,k)
|
||||||
Azz_rhs = fzz - gzz * f
|
l_fxz = alpn1(i,j,k) * (Rxz(i,j,k) - EIGHT * PI * Sxz(i,j,k)) - fxz(i,j,k)
|
||||||
Axy_rhs = fxy - gxy * f
|
l_fyy = alpn1(i,j,k) * (Ryy(i,j,k) - EIGHT * PI * Syy(i,j,k)) - fyy(i,j,k)
|
||||||
Axz_rhs = fxz - gxz * f
|
l_fyz = alpn1(i,j,k) * (Ryz(i,j,k) - EIGHT * PI * Syz(i,j,k)) - fyz(i,j,k)
|
||||||
Ayz_rhs = fyz - gyz * f
|
l_fzz = alpn1(i,j,k) * (Rzz(i,j,k) - EIGHT * PI * Szz(i,j,k)) - fzz(i,j,k)
|
||||||
|
|
||||||
! Now: store A_il A^l_j into fij:
|
Axx_rhs(i,j,k) = l_fxx - gxx(i,j,k) * f_loc
|
||||||
|
Ayy_rhs(i,j,k) = l_fyy - gyy(i,j,k) * f_loc
|
||||||
|
Azz_rhs(i,j,k) = l_fzz - gzz(i,j,k) * f_loc
|
||||||
|
Axy_rhs(i,j,k) = l_fxy - gxy(i,j,k) * f_loc
|
||||||
|
Axz_rhs(i,j,k) = l_fxz - gxz(i,j,k) * f_loc
|
||||||
|
Ayz_rhs(i,j,k) = l_fyz - gyz(i,j,k) * f_loc
|
||||||
|
|
||||||
fxx = gupxx * Axx * Axx + gupyy * Axy * Axy + gupzz * Axz * Axz + &
|
fxx(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axx(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + &
|
||||||
TWO * (gupxy * Axx * Axy + gupxz * Axx * Axz + gupyz * Axy * Axz)
|
gupzz(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + TWO * (gupxy(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + &
|
||||||
fyy = gupxx * Axy * Axy + gupyy * Ayy * Ayy + gupzz * Ayz * Ayz + &
|
gupxz(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyz(i,j,k) * Axy(i,j,k) * Axz(i,j,k))
|
||||||
TWO * (gupxy * Axy * Ayy + gupxz * Axy * Ayz + gupyz * Ayy * Ayz)
|
fyy(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayy(i,j,k) + &
|
||||||
fzz = gupxx * Axz * Axz + gupyy * Ayz * Ayz + gupzz * Azz * Azz + &
|
gupzz(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + TWO * (gupxy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||||
TWO * (gupxy * Axz * Ayz + gupxz * Axz * Azz + gupyz * Ayz * Azz)
|
gupxz(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + gupyz(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k))
|
||||||
fxy = gupxx * Axx * Axy + gupyy * Axy * Ayy + gupzz * Axz * Ayz + &
|
fzz(i,j,k) = gupxx(i,j,k) * Axz(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayz(i,j,k) * Ayz(i,j,k) + &
|
||||||
gupxy *(Axx * Ayy + Axy * Axy) + &
|
gupzz(i,j,k) * Azz(i,j,k) * Azz(i,j,k) + TWO * (gupxy(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + &
|
||||||
gupxz *(Axx * Ayz + Axz * Axy) + &
|
gupxz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupyz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k))
|
||||||
gupyz *(Axy * Ayz + Axz * Ayy)
|
fxy(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axy(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayy(i,j,k) + &
|
||||||
fxz = gupxx * Axx * Axz + gupyy * Axy * Ayz + gupzz * Axz * Azz + &
|
gupzz(i,j,k) * Axz(i,j,k) * Ayz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayy(i,j,k) + Axy(i,j,k) * Axy(i,j,k)) + &
|
||||||
gupxy *(Axx * Ayz + Axy * Axz) + &
|
gupxz(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Axy(i,j,k)) + &
|
||||||
gupxz *(Axx * Azz + Axz * Axz) + &
|
gupyz(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Axz(i,j,k) * Ayy(i,j,k))
|
||||||
gupyz *(Axy * Azz + Axz * Ayz)
|
fxz(i,j,k) = gupxx(i,j,k) * Axx(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Axy(i,j,k) * Ayz(i,j,k) + &
|
||||||
fyz = gupxx * Axy * Axz + gupyy * Ayy * Ayz + gupzz * Ayz * Azz + &
|
gupzz(i,j,k) * Axz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axx(i,j,k) * Ayz(i,j,k) + Axy(i,j,k) * Axz(i,j,k)) + &
|
||||||
gupxy *(Axy * Ayz + Ayy * Axz) + &
|
gupxz(i,j,k) * (Axx(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Axz(i,j,k)) + &
|
||||||
gupxz *(Axy * Azz + Ayz * Axz) + &
|
gupyz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Axz(i,j,k) * Ayz(i,j,k))
|
||||||
gupyz *(Ayy * Azz + Ayz * Ayz)
|
fyz(i,j,k) = gupxx(i,j,k) * Axy(i,j,k) * Axz(i,j,k) + gupyy(i,j,k) * Ayy(i,j,k) * Ayz(i,j,k) + &
|
||||||
|
gupzz(i,j,k) * Ayz(i,j,k) * Azz(i,j,k) + gupxy(i,j,k) * (Axy(i,j,k) * Ayz(i,j,k) + Ayy(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupxz(i,j,k) * (Axy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Axz(i,j,k)) + &
|
||||||
|
gupyz(i,j,k) * (Ayy(i,j,k) * Azz(i,j,k) + Ayz(i,j,k) * Ayz(i,j,k))
|
||||||
|
|
||||||
f = chin1
|
trK_rhs(i,j,k) = chin_loc * trK_rhs(i,j,k)
|
||||||
! store D^i D_i Lap in trK_rhs
|
|
||||||
trK_rhs = f*trK_rhs
|
|
||||||
|
|
||||||
Axx_rhs = f * Axx_rhs+ alpn1 * (trK * Axx - TWO * fxx) + &
|
Axx_rhs(i,j,k) = chin_loc * Axx_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axx(i,j,k) - TWO * fxx(i,j,k)) + &
|
||||||
TWO * ( Axx * betaxx + Axy * betayx + Axz * betazx )- &
|
TWO * (Axx(i,j,k) * betaxx(i,j,k) + Axy(i,j,k) * betayx(i,j,k) + Axz(i,j,k) * betazx(i,j,k)) - &
|
||||||
F2o3 * Axx * div_beta
|
F2o3 * Axx(i,j,k) * divb_loc
|
||||||
|
Ayy_rhs(i,j,k) = chin_loc * Ayy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayy(i,j,k) - TWO * fyy(i,j,k)) + &
|
||||||
|
TWO * (Axy(i,j,k) * betaxy(i,j,k) + Ayy(i,j,k) * betayy(i,j,k) + Ayz(i,j,k) * betazy(i,j,k)) - &
|
||||||
|
F2o3 * Ayy(i,j,k) * divb_loc
|
||||||
|
Azz_rhs(i,j,k) = chin_loc * Azz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Azz(i,j,k) - TWO * fzz(i,j,k)) + &
|
||||||
|
TWO * (Axz(i,j,k) * betaxz(i,j,k) + Ayz(i,j,k) * betayz(i,j,k) + Azz(i,j,k) * betazz(i,j,k)) - &
|
||||||
|
F2o3 * Azz(i,j,k) * divb_loc
|
||||||
|
Axy_rhs(i,j,k) = chin_loc * Axy_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axy(i,j,k) - TWO * fxy(i,j,k)) + &
|
||||||
|
Axx(i,j,k) * betaxy(i,j,k) + Axz(i,j,k) * betazy(i,j,k) + Ayy(i,j,k) * betayx(i,j,k) + &
|
||||||
|
Ayz(i,j,k) * betazx(i,j,k) + F1o3 * Axy(i,j,k) * divb_loc - Axy(i,j,k) * betazz(i,j,k)
|
||||||
|
Ayz_rhs(i,j,k) = chin_loc * Ayz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Ayz(i,j,k) - TWO * fyz(i,j,k)) + &
|
||||||
|
Axy(i,j,k) * betaxz(i,j,k) + Ayy(i,j,k) * betayz(i,j,k) + Axz(i,j,k) * betaxy(i,j,k) + &
|
||||||
|
Azz(i,j,k) * betazy(i,j,k) + F1o3 * Ayz(i,j,k) * divb_loc - Ayz(i,j,k) * betaxx(i,j,k)
|
||||||
|
Axz_rhs(i,j,k) = chin_loc * Axz_rhs(i,j,k) + alpn1(i,j,k) * (trK(i,j,k) * Axz(i,j,k) - TWO * fxz(i,j,k)) + &
|
||||||
|
Axx(i,j,k) * betaxz(i,j,k) + Axy(i,j,k) * betayz(i,j,k) + Ayz(i,j,k) * betayx(i,j,k) + &
|
||||||
|
Azz(i,j,k) * betazx(i,j,k) + F1o3 * Axz(i,j,k) * divb_loc - Axz(i,j,k) * betayy(i,j,k)
|
||||||
|
|
||||||
Ayy_rhs = f * Ayy_rhs+ alpn1 * (trK * Ayy - TWO * fyy) + &
|
trK_rhs(i,j,k) = - trK_rhs(i,j,k) + alpn1(i,j,k) * ( F1o3 * trK(i,j,k) * trK(i,j,k) + &
|
||||||
TWO * ( Axy * betaxy + Ayy * betayy + Ayz * betazy )- &
|
gupxx(i,j,k) * fxx(i,j,k) + gupyy(i,j,k) * fyy(i,j,k) + gupzz(i,j,k) * fzz(i,j,k) + &
|
||||||
F2o3 * Ayy * div_beta
|
TWO * (gupxy(i,j,k) * fxy(i,j,k) + gupxz(i,j,k) * fxz(i,j,k) + gupyz(i,j,k) * fyz(i,j,k)) + &
|
||||||
|
FOUR * PI * (rho(i,j,k) + S_loc) )
|
||||||
Azz_rhs = f * Azz_rhs+ alpn1 * (trK * Azz - TWO * fzz) + &
|
enddo
|
||||||
TWO * ( Axz * betaxz + Ayz * betayz + Azz * betazz )- &
|
enddo
|
||||||
F2o3 * Azz * div_beta
|
enddo
|
||||||
|
|
||||||
Axy_rhs = f * Axy_rhs+ alpn1 *( trK * Axy - TWO * fxy )+ &
|
|
||||||
Axx * betaxy + Axz * betazy + &
|
|
||||||
Ayy * betayx + Ayz * betazx + &
|
|
||||||
F1o3 * Axy * div_beta - Axy * betazz
|
|
||||||
|
|
||||||
Ayz_rhs = f * Ayz_rhs+ alpn1 *( trK * Ayz - TWO * fyz )+ &
|
|
||||||
Axy * betaxz + Ayy * betayz + &
|
|
||||||
Axz * betaxy + Azz * betazy + &
|
|
||||||
F1o3 * Ayz * div_beta - Ayz * betaxx
|
|
||||||
|
|
||||||
Axz_rhs = f * Axz_rhs+ alpn1 *( trK * Axz - TWO * fxz )+ &
|
|
||||||
Axx * betaxz + Axy * betayz + &
|
|
||||||
Ayz * betayx + Azz * betazx + &
|
|
||||||
F1o3 * Axz * div_beta - Axz * betayy !rhs for Aij
|
|
||||||
|
|
||||||
! Compute trace of S_ij
|
|
||||||
|
|
||||||
S = f * ( gupxx * Sxx + gupyy * Syy + gupzz * Szz + &
|
|
||||||
TWO * ( gupxy * Sxy + gupxz * Sxz + gupyz * Syz ) )
|
|
||||||
|
|
||||||
trK_rhs = - trK_rhs + alpn1 *( F1o3 * trK * trK + &
|
|
||||||
gupxx * fxx + gupyy * fyy + gupzz * fzz + &
|
|
||||||
TWO * ( gupxy * fxy + gupxz * fxz + gupyz * fyz ) + &
|
|
||||||
FOUR * PI * ( rho + S )) !rhs for trK
|
|
||||||
|
|
||||||
!!!! gauge variable part
|
!!!! gauge variable part
|
||||||
|
|
||||||
@@ -948,15 +1000,15 @@
|
|||||||
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency)
|
||||||
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
! lopsided_kodis shares the symmetry_bd buffer between advection and
|
||||||
! dissipation, eliminating redundant full-grid copies. For metric variables
|
! dissipation, eliminating redundant full-grid copies. For metric variables
|
||||||
! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero,
|
! gxx/gyy/gzz (=dxx/dyy/dzz+1): stencil coefficients sum to zero,
|
||||||
! so the constant offset has no effect on dissipation.
|
! so the constant offset has no effect on dissipation.
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_kodis(ex,X,Y,Z,dxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_kodis(ex,X,Y,Z,dyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_kodis(ex,X,Y,Z,dzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||||
|
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps)
|
||||||
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps)
|
||||||
|
|||||||
@@ -32,6 +32,19 @@
|
|||||||
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
#define f_compute_rhs_Z4c_ss compute_rhs_z4c_ss_
|
||||||
#define f_compute_constraint_fr compute_constraint_fr_
|
#define f_compute_constraint_fr compute_constraint_fr_
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
void f_bssn_rhs_kernel_timing_reset();
|
||||||
|
int f_bssn_rhs_kernel_timing_bucket_count();
|
||||||
|
const double *f_bssn_rhs_kernel_timing_local_seconds();
|
||||||
|
const char *f_bssn_rhs_kernel_timing_label(int);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
int f_compute_rhs_bssn(int *, double &, double *, double *, double *, // ex,T,X,Y,Z
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,36 +0,0 @@
|
|||||||
#ifndef BSSN_RHS_CUDA_H
|
|
||||||
#define BSSN_RHS_CUDA_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int f_compute_rhs_bssn(int *ex, double &T,
|
|
||||||
double *X, double *Y, double *Z,
|
|
||||||
double *chi, double *trK,
|
|
||||||
double *dxx, double *gxy, double *gxz, double *dyy, double *gyz, double *dzz,
|
|
||||||
double *Axx, double *Axy, double *Axz, double *Ayy, double *Ayz, double *Azz,
|
|
||||||
double *Gamx, double *Gamy, double *Gamz,
|
|
||||||
double *Lap, double *betax, double *betay, double *betaz,
|
|
||||||
double *dtSfx, double *dtSfy, double *dtSfz,
|
|
||||||
double *chi_rhs, double *trK_rhs,
|
|
||||||
double *gxx_rhs, double *gxy_rhs, double *gxz_rhs, double *gyy_rhs, double *gyz_rhs, double *gzz_rhs,
|
|
||||||
double *Axx_rhs, double *Axy_rhs, double *Axz_rhs, double *Ayy_rhs, double *Ayz_rhs, double *Azz_rhs,
|
|
||||||
double *Gamx_rhs, double *Gamy_rhs, double *Gamz_rhs,
|
|
||||||
double *Lap_rhs, double *betax_rhs, double *betay_rhs, double *betaz_rhs,
|
|
||||||
double *dtSfx_rhs, double *dtSfy_rhs, double *dtSfz_rhs,
|
|
||||||
double *rho, double *Sx, double *Sy, double *Sz,
|
|
||||||
double *Sxx, double *Sxy, double *Sxz, double *Syy, double *Syz, double *Szz,
|
|
||||||
double *Gamxxx, double *Gamxxy, double *Gamxxz, double *Gamxyy, double *Gamxyz, double *Gamxzz,
|
|
||||||
double *Gamyxx, double *Gamyxy, double *Gamyxz, double *Gamyyy, double *Gamyyz, double *Gamyzz,
|
|
||||||
double *Gamzxx, double *Gamzxy, double *Gamzxz, double *Gamzyy, double *Gamzyz, double *Gamzzz,
|
|
||||||
double *Rxx, double *Rxy, double *Rxz, double *Ryy, double *Ryz, double *Rzz,
|
|
||||||
double *ham_Res, double *movx_Res, double *movy_Res, double *movz_Res,
|
|
||||||
double *Gmx_Res, double *Gmy_Res, double *Gmz_Res,
|
|
||||||
int &Symmetry, int &Lev, double &eps, int &co);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1513,6 +1513,7 @@
|
|||||||
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
|
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
|
||||||
real*8, dimension(3) :: SoA
|
real*8, dimension(3) :: SoA
|
||||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||||
|
integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max
|
||||||
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
|
real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz
|
||||||
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
|
real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz
|
||||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||||
@@ -1565,9 +1566,47 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
|
i_core_min = max(1, imin+2)
|
||||||
|
i_core_max = min(ex(1), imax-2)
|
||||||
|
j_core_min = max(1, jmin+2)
|
||||||
|
j_core_max = min(ex(2), jmax-2)
|
||||||
|
k_core_min = max(1, kmin+2)
|
||||||
|
k_core_max = min(ex(3), kmax-2)
|
||||||
|
|
||||||
|
if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then
|
||||||
|
do k=k_core_min,k_core_max
|
||||||
|
do j=j_core_min,j_core_max
|
||||||
|
do i=i_core_min,i_core_max
|
||||||
|
! interior points always use 4th-order stencils without branch checks
|
||||||
|
fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i+2,j,k)+F16*fh(i+1,j,k) )
|
||||||
|
fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j+2,k)+F16*fh(i,j+1,k) )
|
||||||
|
fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) &
|
||||||
|
-fh(i,j,k+2)+F16*fh(i,j,k+1) )
|
||||||
|
fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) &
|
||||||
|
-F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) &
|
||||||
|
+F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) &
|
||||||
|
- (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k)))
|
||||||
|
fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) &
|
||||||
|
-F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) &
|
||||||
|
+F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) &
|
||||||
|
- (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2)))
|
||||||
|
fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) &
|
||||||
|
-F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) &
|
||||||
|
+F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) &
|
||||||
|
- (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2)))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
if(i>=i_core_min .and. i<=i_core_max .and. &
|
||||||
|
j>=j_core_min .and. j<=j_core_max .and. &
|
||||||
|
k>=k_core_min .and. k<=k_core_max) cycle
|
||||||
!~~~~~~ fxx
|
!~~~~~~ fxx
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
|
|||||||
@@ -81,26 +81,63 @@ void fderivs(const int ex[3],
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fortran loops:
|
* 两段式:
|
||||||
* do k=1,ex3-1
|
* 1) 先在二阶可用区域计算二阶模板
|
||||||
* do j=1,ex2-1
|
* 2) 再在高阶可用区域覆盖为四阶模板
|
||||||
* do i=1,ex1-1
|
|
||||||
*
|
*
|
||||||
* C: k0=0..ex3-2, j0=0..ex2-2, i0=0..ex1-2
|
* 与原 if/elseif 逻辑等价,但减少逐点分支判断。
|
||||||
*/
|
*/
|
||||||
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
const int i2_lo = (iminF > 0) ? iminF : 0;
|
||||||
const int kF = k0 + 1;
|
const int j2_lo = (jminF > 0) ? jminF : 0;
|
||||||
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
const int k2_lo = (kminF > 0) ? kminF : 0;
|
||||||
const int jF = j0 + 1;
|
const int i2_hi = ex1 - 2;
|
||||||
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
const int j2_hi = ex2 - 2;
|
||||||
const int iF = i0 + 1;
|
const int k2_hi = ex3 - 2;
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
|
||||||
|
const int i4_lo = (iminF + 1 > 0) ? (iminF + 1) : 0;
|
||||||
|
const int j4_lo = (jminF + 1 > 0) ? (jminF + 1) : 0;
|
||||||
|
const int k4_lo = (kminF + 1 > 0) ? (kminF + 1) : 0;
|
||||||
|
const int i4_hi = ex1 - 3;
|
||||||
|
const int j4_hi = ex2 - 3;
|
||||||
|
const int k4_hi = ex3 - 3;
|
||||||
|
|
||||||
|
if (i2_lo <= i2_hi && j2_lo <= j2_hi && k2_lo <= k2_hi) {
|
||||||
|
for (int k0 = k2_lo; k0 <= k2_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j2_lo; j0 <= j2_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i2_lo; i0 <= i2_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
fx[p] = d2dx * (
|
||||||
|
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fy[p] = d2dy * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
||||||
|
);
|
||||||
|
|
||||||
|
fz[p] = d2dz * (
|
||||||
|
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
||||||
|
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i4_lo <= i4_hi && j4_lo <= j4_hi && k4_lo <= k4_hi) {
|
||||||
|
for (int k0 = k4_lo; k0 <= k4_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j4_lo; j0 <= j4_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i4_lo; i0 <= i4_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
// if(i+2 <= imax .and. i-2 >= imin ... ) (全是 Fortran 索引)
|
|
||||||
if ((iF + 2) <= imaxF && (iF - 2) >= iminF &&
|
|
||||||
(jF + 2) <= jmaxF && (jF - 2) >= jminF &&
|
|
||||||
(kF + 2) <= kmaxF && (kF - 2) >= kminF)
|
|
||||||
{
|
|
||||||
fx[p] = d12dx * (
|
fx[p] = d12dx * (
|
||||||
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
fh[idx_fh_F_ord2(iF - 2, jF, kF, ex)] -
|
||||||
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
EIT * fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
||||||
@@ -122,26 +159,6 @@ void fderivs(const int ex[3],
|
|||||||
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
fh[idx_fh_F_ord2(iF, jF, kF + 2, ex)]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// elseif(i+1 <= imax .and. i-1 >= imin ...)
|
|
||||||
else if ((iF + 1) <= imaxF && (iF - 1) >= iminF &&
|
|
||||||
(jF + 1) <= jmaxF && (jF - 1) >= jminF &&
|
|
||||||
(kF + 1) <= kmaxF && (kF - 1) >= kminF)
|
|
||||||
{
|
|
||||||
fx[p] = d2dx * (
|
|
||||||
-fh[idx_fh_F_ord2(iF - 1, jF, kF, ex)] +
|
|
||||||
fh[idx_fh_F_ord2(iF + 1, jF, kF, ex)]
|
|
||||||
);
|
|
||||||
|
|
||||||
fy[p] = d2dy * (
|
|
||||||
-fh[idx_fh_F_ord2(iF, jF - 1, kF, ex)] +
|
|
||||||
fh[idx_fh_F_ord2(iF, jF + 1, kF, ex)]
|
|
||||||
);
|
|
||||||
|
|
||||||
fz[p] = d2dz * (
|
|
||||||
-fh[idx_fh_F_ord2(iF, jF, kF - 1, ex)] +
|
|
||||||
fh[idx_fh_F_ord2(iF, jF, kF + 1, ex)]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1327,35 +1327,6 @@ end subroutine d2dump
|
|||||||
|
|
||||||
return
|
return
|
||||||
end subroutine polint
|
end subroutine polint
|
||||||
|
|
||||||
subroutine polint0(xa, ya, y, ordn)
|
|
||||||
! Lagrange interpolation at x=0, O(n) direct formula
|
|
||||||
implicit none
|
|
||||||
integer, intent(in) :: ordn
|
|
||||||
real*8, dimension(ordn), intent(in) :: xa, ya
|
|
||||||
real*8, intent(out) :: y
|
|
||||||
|
|
||||||
integer :: j, k
|
|
||||||
real*8 :: wj
|
|
||||||
|
|
||||||
y = 0.d0
|
|
||||||
do j = 1, ordn
|
|
||||||
wj = 1.d0
|
|
||||||
do k = 1, ordn
|
|
||||||
if (k .ne. j) then
|
|
||||||
wj = wj * xa(k) / (xa(k) - xa(j))
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
y = y + wj * ya(j)
|
|
||||||
enddo
|
|
||||||
|
|
||||||
return
|
|
||||||
end subroutine polint0
|
|
||||||
!------------------------------------------------------------------------------
|
|
||||||
!
|
|
||||||
! interpolation in 2 dimensions, follow yx order
|
|
||||||
!
|
|
||||||
!------------------------------------------------------------------------------
|
|
||||||
!------------------------------------------------------------------------------
|
!------------------------------------------------------------------------------
|
||||||
! Compute Lagrange interpolation basis weights for one target point.
|
! Compute Lagrange interpolation basis weights for one target point.
|
||||||
!------------------------------------------------------------------------------
|
!------------------------------------------------------------------------------
|
||||||
@@ -1543,6 +1514,81 @@ f_out = f_out*dX*dY*dZ
|
|||||||
return
|
return
|
||||||
|
|
||||||
end subroutine l2normhelper
|
end subroutine l2normhelper
|
||||||
|
!--------------------------------------------------------------------------------------
|
||||||
|
subroutine l2normhelper7(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||||
|
f1,f2,f3,f4,f5,f6,f7,f_out,gw)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
!~~~~~~> Input parameters:
|
||||||
|
integer,intent(in ):: ex(1:3)
|
||||||
|
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)),xmin,ymin,zmin,xmax,ymax,zmax
|
||||||
|
integer,intent(in)::gw
|
||||||
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) :: f1,f2,f3,f4,f5,f6,f7
|
||||||
|
real*8, intent(out) :: f_out(7)
|
||||||
|
!~~~~~~> Other variables:
|
||||||
|
|
||||||
|
real*8 :: dX, dY, dZ
|
||||||
|
integer::imin,jmin,kmin
|
||||||
|
integer::imax,jmax,kmax
|
||||||
|
integer::i,j,k
|
||||||
|
real*8 :: s1,s2,s3,s4,s5,s6,s7
|
||||||
|
|
||||||
|
dX = X(2) - X(1)
|
||||||
|
dY = Y(2) - Y(1)
|
||||||
|
dZ = Z(2) - Z(1)
|
||||||
|
|
||||||
|
! for ghost zone
|
||||||
|
imin = gw+1
|
||||||
|
jmin = gw+1
|
||||||
|
kmin = gw+1
|
||||||
|
|
||||||
|
imax = ex(1) - gw
|
||||||
|
jmax = ex(2) - gw
|
||||||
|
kmax = ex(3) - gw
|
||||||
|
|
||||||
|
!for patch boundary (i.e., not ghost boundary)
|
||||||
|
|
||||||
|
if(dabs(X(ex(1))-xmax) < dX) imax = ex(1)
|
||||||
|
if(dabs(Y(ex(2))-ymax) < dY) jmax = ex(2)
|
||||||
|
if(dabs(Z(ex(3))-zmax) < dZ) kmax = ex(3)
|
||||||
|
if(dabs(X(1)-xmin) < dX) imin = 1
|
||||||
|
if(dabs(Y(1)-ymin) < dY) jmin = 1
|
||||||
|
if(dabs(Z(1)-zmin) < dZ) kmin = 1
|
||||||
|
|
||||||
|
s1 = 0.d0
|
||||||
|
s2 = 0.d0
|
||||||
|
s3 = 0.d0
|
||||||
|
s4 = 0.d0
|
||||||
|
s5 = 0.d0
|
||||||
|
s6 = 0.d0
|
||||||
|
s7 = 0.d0
|
||||||
|
|
||||||
|
do k=kmin,kmax
|
||||||
|
do j=jmin,jmax
|
||||||
|
!DIR$ SIMD REDUCTION(+:s1,s2,s3,s4,s5,s6,s7)
|
||||||
|
do i=imin,imax
|
||||||
|
s1 = s1 + f1(i,j,k)*f1(i,j,k)
|
||||||
|
s2 = s2 + f2(i,j,k)*f2(i,j,k)
|
||||||
|
s3 = s3 + f3(i,j,k)*f3(i,j,k)
|
||||||
|
s4 = s4 + f4(i,j,k)*f4(i,j,k)
|
||||||
|
s5 = s5 + f5(i,j,k)*f5(i,j,k)
|
||||||
|
s6 = s6 + f6(i,j,k)*f6(i,j,k)
|
||||||
|
s7 = s7 + f7(i,j,k)*f7(i,j,k)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
f_out(1) = s1*dX*dY*dZ
|
||||||
|
f_out(2) = s2*dX*dY*dZ
|
||||||
|
f_out(3) = s3*dX*dY*dZ
|
||||||
|
f_out(4) = s4*dX*dY*dZ
|
||||||
|
f_out(5) = s5*dX*dY*dZ
|
||||||
|
f_out(6) = s6*dX*dY*dZ
|
||||||
|
f_out(7) = s7*dX*dY*dZ
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine l2normhelper7
|
||||||
!--------------------------------------------------------------------------------------
|
!--------------------------------------------------------------------------------------
|
||||||
! calculate L2norm especially for shell Blocks
|
! calculate L2norm especially for shell Blocks
|
||||||
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
subroutine l2normhelper_sh(ex, X, Y, Z,xmin,ymin,zmin,xmax,ymax,zmax,&
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#define f_global_interpind2d global_interpind2d
|
#define f_global_interpind2d global_interpind2d
|
||||||
#define f_global_interpind1d global_interpind1d
|
#define f_global_interpind1d global_interpind1d
|
||||||
#define f_l2normhelper l2normhelper
|
#define f_l2normhelper l2normhelper
|
||||||
|
#define f_l2normhelper7 l2normhelper7
|
||||||
#define f_l2normhelper_sh l2normhelper_sh
|
#define f_l2normhelper_sh l2normhelper_sh
|
||||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
#define f_l2normhelper_sh_rms l2normhelper_sh_rms
|
||||||
#define f_average average
|
#define f_average average
|
||||||
@@ -42,6 +43,7 @@
|
|||||||
#define f_global_interpind2d GLOBAL_INTERPIND2D
|
#define f_global_interpind2d GLOBAL_INTERPIND2D
|
||||||
#define f_global_interpind1d GLOBAL_INTERPIND1D
|
#define f_global_interpind1d GLOBAL_INTERPIND1D
|
||||||
#define f_l2normhelper L2NORMHELPER
|
#define f_l2normhelper L2NORMHELPER
|
||||||
|
#define f_l2normhelper7 L2NORMHELPER7
|
||||||
#define f_l2normhelper_sh L2NORMHELPER_SH
|
#define f_l2normhelper_sh L2NORMHELPER_SH
|
||||||
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
#define f_l2normhelper_sh_rms L2NORMHELPER_SH_RMS
|
||||||
#define f_average AVERAGE
|
#define f_average AVERAGE
|
||||||
@@ -71,6 +73,7 @@
|
|||||||
#define f_global_interpind2d global_interpind2d_
|
#define f_global_interpind2d global_interpind2d_
|
||||||
#define f_global_interpind1d global_interpind1d_
|
#define f_global_interpind1d global_interpind1d_
|
||||||
#define f_l2normhelper l2normhelper_
|
#define f_l2normhelper l2normhelper_
|
||||||
|
#define f_l2normhelper7 l2normhelper7_
|
||||||
#define f_l2normhelper_sh l2normhelper_sh_
|
#define f_l2normhelper_sh l2normhelper_sh_
|
||||||
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
#define f_l2normhelper_sh_rms l2normhelper_sh_rms_
|
||||||
#define f_average average_
|
#define f_average average_
|
||||||
@@ -164,6 +167,15 @@ extern "C"
|
|||||||
double *, double &, int &);
|
double *, double &, int &);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
void f_l2normhelper7(int *, double *, double *, double *,
|
||||||
|
double &, double &, double &,
|
||||||
|
double &, double &, double &,
|
||||||
|
double *, double *, double *, double *,
|
||||||
|
double *, double *, double *, double *, int &);
|
||||||
|
}
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
void f_l2normhelper_sh(int *, double *, double *, double *,
|
void f_l2normhelper_sh(int *, double *, double *, double *,
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
/* 本头文件由自订profile框架自动生成并非人工硬编码针对Case优化 */
|
||||||
|
/* 更新:负载均衡问题已经通过优化插值函数解决,此profile静态均衡方案已弃用,本头文件现在未参与编译 */
|
||||||
/* Auto-generated from interp_lb_profile.bin — do not edit */
|
/* Auto-generated from interp_lb_profile.bin — do not edit */
|
||||||
#ifndef INTERP_LB_PROFILE_DATA_H
|
#ifndef INTERP_LB_PROFILE_DATA_H
|
||||||
#define INTERP_LB_PROFILE_DATA_H
|
#define INTERP_LB_PROFILE_DATA_H
|
||||||
|
|||||||
@@ -63,19 +63,28 @@ void kodis(const int ex[3],
|
|||||||
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
|
* C: k0=0..ex3-1, j0=0..ex2-1, i0=0..ex1-1
|
||||||
* 并定义 Fortran index: iF=i0+1, ...
|
* 并定义 Fortran index: iF=i0+1, ...
|
||||||
*/
|
*/
|
||||||
for (int k0 = 0; k0 < ex3; ++k0) {
|
// 收紧循环范围:只遍历满足 iF±3/jF±3/kF±3 条件的内部点
|
||||||
|
// iF-3 >= iminF => iF >= iminF+3 => i0 >= iminF+2 (因为 iF=i0+1)
|
||||||
|
// iF+3 <= imaxF => iF <= imaxF-3 => i0 <= imaxF-4
|
||||||
|
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
||||||
|
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
||||||
|
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
||||||
|
const int i0_hi = imaxF - 4; // inclusive
|
||||||
|
const int j0_hi = jmaxF - 4;
|
||||||
|
const int k0_hi = kmaxF - 4;
|
||||||
|
|
||||||
|
if (i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi) {
|
||||||
|
free(fh);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
const int kF = k0 + 1;
|
const int kF = k0 + 1;
|
||||||
for (int j0 = 0; j0 < ex2; ++j0) {
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
const int jF = j0 + 1;
|
const int jF = j0 + 1;
|
||||||
for (int i0 = 0; i0 < ex1; ++i0) {
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
const int iF = i0 + 1;
|
const int iF = i0 + 1;
|
||||||
|
|
||||||
// Fortran if 条件:
|
|
||||||
// i-3 >= imin .and. i+3 <= imax 等(都是 Fortran 索引)
|
|
||||||
if ((iF - 3) >= iminF && (iF + 3) <= imaxF &&
|
|
||||||
(jF - 3) >= jminF && (jF + 3) <= jmaxF &&
|
|
||||||
(kF - 3) >= kminF && (kF + 3) <= kmaxF)
|
|
||||||
{
|
|
||||||
const size_t p = idx_ex(i0, j0, k0, ex);
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
|
// 三个方向各一份同型的 7 点组合(实际上是对称的 6th-order dissipation/filter 核)
|
||||||
@@ -100,7 +109,6 @@ void kodis(const int ex[3],
|
|||||||
// Fortran:
|
// Fortran:
|
||||||
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
|
// f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof*(Dx_term + Dy_term + Dz_term)
|
||||||
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
248
AMSS_NCKU_source/lopsided_kodis_c.C
Normal file
248
AMSS_NCKU_source/lopsided_kodis_c.C
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
#include "tool.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Combined advection (lopsided) + KO dissipation (kodis).
|
||||||
|
* Uses one shared symmetry_bd buffer per call.
|
||||||
|
*/
|
||||||
|
void lopsided_kodis(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
|
int Symmetry, const double SoA[3], double eps)
|
||||||
|
{
|
||||||
|
const double ZEO = 0.0, ONE = 1.0, F3 = 3.0;
|
||||||
|
const double F6 = 6.0, F18 = 18.0;
|
||||||
|
const double F12 = 12.0, F10 = 10.0, EIT = 8.0;
|
||||||
|
const double SIX = 6.0, FIT = 15.0, TWT = 20.0;
|
||||||
|
const double cof = 64.0; // 2^6
|
||||||
|
|
||||||
|
const int NO_SYMM = 0, EQ_SYMM = 1;
|
||||||
|
|
||||||
|
const int ex1 = ex[0], ex2 = ex[1], ex3 = ex[2];
|
||||||
|
|
||||||
|
const double dX = X[1] - X[0];
|
||||||
|
const double dY = Y[1] - Y[0];
|
||||||
|
const double dZ = Z[1] - Z[0];
|
||||||
|
|
||||||
|
const double d12dx = ONE / F12 / dX;
|
||||||
|
const double d12dy = ONE / F12 / dY;
|
||||||
|
const double d12dz = ONE / F12 / dZ;
|
||||||
|
|
||||||
|
const int imaxF = ex1;
|
||||||
|
const int jmaxF = ex2;
|
||||||
|
const int kmaxF = ex3;
|
||||||
|
|
||||||
|
int iminF = 1, jminF = 1, kminF = 1;
|
||||||
|
if (Symmetry > NO_SYMM && fabs(Z[0]) < dZ) kminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2;
|
||||||
|
if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2;
|
||||||
|
|
||||||
|
// fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3)
|
||||||
|
const size_t nx = (size_t)ex1 + 3;
|
||||||
|
const size_t ny = (size_t)ex2 + 3;
|
||||||
|
const size_t nz = (size_t)ex3 + 3;
|
||||||
|
const size_t fh_size = nx * ny * nz;
|
||||||
|
|
||||||
|
double *fh = (double*)malloc(fh_size * sizeof(double));
|
||||||
|
if (!fh) return;
|
||||||
|
|
||||||
|
symmetry_bd(3, ex, f, fh, SoA);
|
||||||
|
|
||||||
|
// Advection (same stencil logic as lopsided_c.C)
|
||||||
|
for (int k0 = 0; k0 <= ex3 - 2; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = 0; j0 <= ex2 - 2; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = 0; i0 <= ex1 - 2; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double sfx = Sfx[p];
|
||||||
|
if (sfx > ZEO) {
|
||||||
|
if (i0 <= ex1 - 4) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||||
|
} else if (i0 <= ex1 - 3) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||||
|
} else if (i0 <= ex1 - 2) {
|
||||||
|
f_rhs[p] -= sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfx < ZEO) {
|
||||||
|
if ((i0 - 2) >= iminF) {
|
||||||
|
f_rhs[p] -= sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF - 3, jF, kF, ex)]);
|
||||||
|
} else if ((i0 - 1) >= iminF) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
( fh[idx_fh_F(iF - 2, jF, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF + 2, jF, kF, ex)]);
|
||||||
|
} else if (i0 >= iminF) {
|
||||||
|
f_rhs[p] += sfx * d12dx *
|
||||||
|
(-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF , jF, kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF + 3, jF, kF, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfy = Sfy[p];
|
||||||
|
if (sfy > ZEO) {
|
||||||
|
if (j0 <= ex2 - 4) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||||
|
} else if (j0 <= ex2 - 3) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||||
|
} else if (j0 <= ex2 - 2) {
|
||||||
|
f_rhs[p] -= sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfy < ZEO) {
|
||||||
|
if ((j0 - 2) >= jminF) {
|
||||||
|
f_rhs[p] -= sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF - 3, kF, ex)]);
|
||||||
|
} else if ((j0 - 1) >= jminF) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
( fh[idx_fh_F(iF, jF - 2, kF, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF + 2, kF, ex)]);
|
||||||
|
} else if (j0 >= jminF) {
|
||||||
|
f_rhs[p] += sfy * d12dy *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF , kF, ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF + 3, kF, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const double sfz = Sfz[p];
|
||||||
|
if (sfz > ZEO) {
|
||||||
|
if (k0 <= ex3 - 4) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||||
|
} else if (k0 <= ex3 - 3) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||||
|
} else if (k0 <= ex3 - 2) {
|
||||||
|
f_rhs[p] -= sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||||
|
}
|
||||||
|
} else if (sfz < ZEO) {
|
||||||
|
if ((k0 - 2) >= kminF) {
|
||||||
|
f_rhs[p] -= sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF - 3, ex)]);
|
||||||
|
} else if ((k0 - 1) >= kminF) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
( fh[idx_fh_F(iF, jF, kF - 2, ex)]
|
||||||
|
-EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
+EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
- fh[idx_fh_F(iF, jF, kF + 2, ex)]);
|
||||||
|
} else if (k0 >= kminF) {
|
||||||
|
f_rhs[p] += sfz * d12dz *
|
||||||
|
(-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)]
|
||||||
|
-F10 * fh[idx_fh_F(iF, jF, kF , ex)]
|
||||||
|
+F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)]
|
||||||
|
-F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)]
|
||||||
|
+ fh[idx_fh_F(iF, jF, kF + 3, ex)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// KO dissipation (same domain restriction as kodiss_c.C)
|
||||||
|
if (eps > ZEO) {
|
||||||
|
const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0;
|
||||||
|
const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0;
|
||||||
|
const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0;
|
||||||
|
const int i0_hi = imaxF - 4; // inclusive
|
||||||
|
const int j0_hi = jmaxF - 4;
|
||||||
|
const int k0_hi = kmaxF - 4;
|
||||||
|
|
||||||
|
if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) {
|
||||||
|
for (int k0 = k0_lo; k0 <= k0_hi; ++k0) {
|
||||||
|
const int kF = k0 + 1;
|
||||||
|
for (int j0 = j0_lo; j0 <= j0_hi; ++j0) {
|
||||||
|
const int jF = j0 + 1;
|
||||||
|
for (int i0 = i0_lo; i0 <= i0_hi; ++i0) {
|
||||||
|
const int iF = i0 + 1;
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
|
||||||
|
const double Dx_term =
|
||||||
|
((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX;
|
||||||
|
|
||||||
|
const double Dy_term =
|
||||||
|
((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY;
|
||||||
|
|
||||||
|
const double Dz_term =
|
||||||
|
((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) -
|
||||||
|
SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) +
|
||||||
|
FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) -
|
||||||
|
TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ;
|
||||||
|
|
||||||
|
f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(fh);
|
||||||
|
}
|
||||||
@@ -29,6 +29,16 @@
|
|||||||
|
|
||||||
#define REGLEV 0
|
#define REGLEV 0
|
||||||
|
|
||||||
|
#define BSSN_FINE_TIMING 0
|
||||||
|
|
||||||
|
#define BSSN_FINE_TIMING_EVERY 1
|
||||||
|
|
||||||
|
#define BSSN_FINE_TIMING_TOPN 8
|
||||||
|
|
||||||
|
#define BSSN_KERNEL_FINE_TIMING 0
|
||||||
|
|
||||||
|
#define BSSN_ENABLE_STDIN_ABORT_POLL 0
|
||||||
|
|
||||||
//#define USE_GPU
|
//#define USE_GPU
|
||||||
|
|
||||||
//#define CHECKDETAIL
|
//#define CHECKDETAIL
|
||||||
@@ -88,6 +98,21 @@
|
|||||||
// 0: for every level;
|
// 0: for every level;
|
||||||
// 1: for all
|
// 1: for all
|
||||||
//
|
//
|
||||||
|
// define BSSN_FINE_TIMING
|
||||||
|
// enable fine-grained per-timestep timing monitor
|
||||||
|
//
|
||||||
|
// define BSSN_FINE_TIMING_EVERY
|
||||||
|
// report timing every N coarse timesteps
|
||||||
|
//
|
||||||
|
// define BSSN_FINE_TIMING_TOPN
|
||||||
|
// number of hottest timing buckets shown in stdout
|
||||||
|
//
|
||||||
|
// define BSSN_KERNEL_FINE_TIMING
|
||||||
|
// enable split timing inside compute_rhs_bssn
|
||||||
|
//
|
||||||
|
// define BSSN_ENABLE_STDIN_ABORT_POLL
|
||||||
|
// poll stdin and broadcast abort flag every coarse step
|
||||||
|
//
|
||||||
// define USE_GPU
|
// define USE_GPU
|
||||||
// use gpu or not
|
// use gpu or not
|
||||||
//
|
//
|
||||||
@@ -142,4 +167,3 @@
|
|||||||
#define TINY 1e-10
|
#define TINY 1e-10
|
||||||
|
|
||||||
#endif /* MICRODEF_H */
|
#endif /* MICRODEF_H */
|
||||||
|
|
||||||
|
|||||||
@@ -20,12 +20,14 @@ CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
|||||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
else
|
else
|
||||||
## opt (default): maximum performance with PGO profile data
|
## opt (default): maximum performance with PGO profile data -fprofile-instr-use=$(PROFDATA) \
|
||||||
|
## PGO has been turned off, now tested and found to be negative optimization
|
||||||
|
## INTERP_LB_FLAGS has been turned off too, now tested and found to be negative optimization
|
||||||
|
|
||||||
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -43,10 +45,6 @@ endif
|
|||||||
.cu.o:
|
.cu.o:
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
||||||
|
|
||||||
# CUDA rewrite of BSSN RHS (drop-in replacement for bssn_rhs_c + stencil helpers)
|
|
||||||
bssn_rhs_cuda.o: bssn_rhs_cuda.cu macrodef.h
|
|
||||||
$(Cu) $(CUDA_APP_FLAGS) -c $< -o $@ $(CUDA_LIB_PATH)
|
|
||||||
|
|
||||||
# C rewrite of BSSN RHS kernel and helpers
|
# C rewrite of BSSN RHS kernel and helpers
|
||||||
bssn_rhs_c.o: bssn_rhs_c.C
|
bssn_rhs_c.o: bssn_rhs_c.C
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
@@ -63,9 +61,12 @@ kodiss_c.o: kodiss_c.C
|
|||||||
lopsided_c.o: lopsided_c.C
|
lopsided_c.o: lopsided_c.C
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
lopsided_kodis_c.o: lopsided_kodis_c.C
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
|
#interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||||
|
# ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||||
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
@@ -86,16 +87,12 @@ ifeq ($(USE_CXX_KERNELS),0)
|
|||||||
CFILES =
|
CFILES =
|
||||||
else
|
else
|
||||||
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o
|
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# CUDA rewrite: bssn_rhs_cuda.o replaces all CFILES (stencils are built-in)
|
|
||||||
CFILES_CUDA = bssn_rhs_cuda.o
|
|
||||||
|
|
||||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||||
ifeq ($(USE_CXX_RK4),1)
|
ifeq ($(USE_CXX_RK4),1)
|
||||||
CFILES += rungekutta4_rout_c.o
|
CFILES += rungekutta4_rout_c.o
|
||||||
CFILES_CUDA += rungekutta4_rout_c.o
|
|
||||||
RK4_F90_OBJ =
|
RK4_F90_OBJ =
|
||||||
else
|
else
|
||||||
RK4_F90_OBJ = rungekutta4_rout.o
|
RK4_F90_OBJ = rungekutta4_rout.o
|
||||||
@@ -181,11 +178,8 @@ $(CUDAFILES): bssn_gpu.h gpu_mem.h gpu_rhsSS_mem.h
|
|||||||
misc.o : zbesh.o
|
misc.o : zbesh.o
|
||||||
|
|
||||||
# projects
|
# projects
|
||||||
ABE: $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
ABE: $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS)
|
||||||
|
|
||||||
ABE_CUDA: $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS)
|
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES) $(CFILES_CUDA) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(LDLIBS) -lcudart $(CUDA_LIB_PATH)
|
|
||||||
|
|
||||||
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
ABEGPU: $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES)
|
||||||
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
$(CLINKER) $(CXXAPPFLAGS) -o $@ $(C++FILES_GPU) $(CFILES) $(F90FILES) $(F77FILES) $(AHFDOBJS) $(CUDAFILES) $(LDLIBS)
|
||||||
@@ -194,4 +188,4 @@ TwoPunctureABE: $(TwoPunctureFILES)
|
|||||||
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
$(CLINKER) $(TP_OPTFLAGS) -qopenmp -o $@ $(TwoPunctureFILES) $(LDLIBS)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm *.o ABE ABE_CUDA ABEGPU TwoPunctureABE make.log -f
|
rm *.o ABE ABEGPU TwoPunctureABE make.log -f
|
||||||
|
|||||||
@@ -62,4 +62,4 @@ CLINKER = mpiicpx
|
|||||||
Cu = nvcc
|
Cu = nvcc
|
||||||
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
CUDA_LIB_PATH = -L/usr/lib/cuda/lib64 -I/usr/include -I/usr/lib/cuda/include
|
||||||
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
#CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -arch compute_13 -code compute_13,sm_13 -Dfortran3 -Dnewc
|
||||||
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc -arch=sm_80
|
CUDA_APP_FLAGS = -c -g -O3 --ptxas-options=-v -Dfortran3 -Dnewc
|
||||||
|
|||||||
@@ -217,6 +217,7 @@
|
|||||||
real*8,dimension(2*ghost_width) :: X,Y,Z
|
real*8,dimension(2*ghost_width) :: X,Y,Z
|
||||||
real*8, dimension(2*ghost_width,2*ghost_width) :: tmp2
|
real*8, dimension(2*ghost_width,2*ghost_width) :: tmp2
|
||||||
real*8, dimension(2*ghost_width) :: tmp1
|
real*8, dimension(2*ghost_width) :: tmp1
|
||||||
|
real*8 :: ddy
|
||||||
real*8,dimension(3) :: ccp
|
real*8,dimension(3) :: ccp
|
||||||
|
|
||||||
#if (ghost_width == 2)
|
#if (ghost_width == 2)
|
||||||
@@ -579,7 +580,7 @@
|
|||||||
tmp1(ghost_width-cxI(1)+cxB(1) :ghost_width-cxI(1)+cxT(1) ) = funf(cxB(1):cxT(1),j,k)
|
tmp1(ghost_width-cxI(1)+cxB(1) :ghost_width-cxI(1)+cxT(1) ) = funf(cxB(1):cxT(1),j,k)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call polint0(X,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(X,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
! for y direction
|
! for y direction
|
||||||
elseif(sum(fg).eq.2.and.fg(2) .eq. 0.and. &
|
elseif(sum(fg).eq.2.and.fg(2) .eq. 0.and. &
|
||||||
@@ -689,7 +690,7 @@
|
|||||||
tmp1(ghost_width-cxI(2)+cxB(2) :ghost_width-cxI(2)+cxT(2) ) = funf(i,cxB(2):cxT(2),k)
|
tmp1(ghost_width-cxI(2)+cxB(2) :ghost_width-cxI(2)+cxT(2) ) = funf(i,cxB(2):cxT(2),k)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call polint0(Y,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(Y,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
! for z direction
|
! for z direction
|
||||||
elseif(sum(fg).eq.2.and.fg(3) .eq. 0.and. &
|
elseif(sum(fg).eq.2.and.fg(3) .eq. 0.and. &
|
||||||
@@ -801,7 +802,7 @@
|
|||||||
tmp1(ghost_width-cxI(3)+cxB(3) :ghost_width-cxI(3)+cxT(3) ) = funf(i,j,cxB(3):cxT(3))
|
tmp1(ghost_width-cxI(3)+cxB(3) :ghost_width-cxI(3)+cxT(3) ) = funf(i,j,cxB(3):cxT(3))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call polint0(Z,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(Z,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|||||||
@@ -217,6 +217,7 @@
|
|||||||
real*8,dimension(2*ghost_width) :: X,Y,Z
|
real*8,dimension(2*ghost_width) :: X,Y,Z
|
||||||
real*8, dimension(2*ghost_width,2*ghost_width) :: tmp2
|
real*8, dimension(2*ghost_width,2*ghost_width) :: tmp2
|
||||||
real*8, dimension(2*ghost_width) :: tmp1
|
real*8, dimension(2*ghost_width) :: tmp1
|
||||||
|
real*8 :: ddy
|
||||||
|
|
||||||
#if (ghost_width == 2)
|
#if (ghost_width == 2)
|
||||||
real*8, parameter :: C1=-1.d0/16,C2=9.d0/16
|
real*8, parameter :: C1=-1.d0/16,C2=9.d0/16
|
||||||
@@ -469,7 +470,7 @@
|
|||||||
|
|
||||||
tmp1(cxB(1)+ghost_width-i+1:cxT(1)+ghost_width-i+1) = fh(cxB(1):cxT(1),j,k)
|
tmp1(cxB(1)+ghost_width-i+1:cxT(1)+ghost_width-i+1) = fh(cxB(1):cxT(1),j,k)
|
||||||
|
|
||||||
call polint0(X,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(X,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
! for y direction
|
! for y direction
|
||||||
elseif (fg(2) .eq. 0)then
|
elseif (fg(2) .eq. 0)then
|
||||||
@@ -528,7 +529,7 @@
|
|||||||
|
|
||||||
tmp1(cxB(2)+ghost_width-j+1:cxT(2)+ghost_width-j+1) = fh(i,cxB(2):cxT(2),k)
|
tmp1(cxB(2)+ghost_width-j+1:cxT(2)+ghost_width-j+1) = fh(i,cxB(2):cxT(2),k)
|
||||||
|
|
||||||
call polint0(Y,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(Y,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
! for z direction
|
! for z direction
|
||||||
else
|
else
|
||||||
@@ -587,7 +588,7 @@
|
|||||||
|
|
||||||
tmp1(cxB(3)+ghost_width-k+1:cxT(3)+ghost_width-k+1) = fh(i,j,cxB(3):cxT(3))
|
tmp1(cxB(3)+ghost_width-k+1:cxT(3)+ghost_width-k+1) = fh(i,j,cxB(3):cxT(3))
|
||||||
|
|
||||||
call polint0(Z,tmp1,funf(i,j,k),2*ghost_width)
|
call polint(Z,tmp1,0.d0,funf(i,j,k),ddy,2*ghost_width)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <complex>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@@ -117,6 +118,62 @@ inline void rk4_stage3(std::size_t n,
|
|||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
||||||
|
void f_rungekutta4_scalar(double &dT, double &f0, double &f1, double &f_rhs, int &RK4) {
|
||||||
|
constexpr double F1o6 = 1.0 / 6.0;
|
||||||
|
constexpr double HLF = 0.5;
|
||||||
|
constexpr double TWO = 2.0;
|
||||||
|
|
||||||
|
switch (RK4) {
|
||||||
|
case 0:
|
||||||
|
f1 = f0 + HLF * dT * f_rhs;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
f_rhs = f_rhs + TWO * f1;
|
||||||
|
f1 = f0 + HLF * dT * f1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
f_rhs = f_rhs + TWO * f1;
|
||||||
|
f1 = f0 + dT * f1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::fprintf(stderr, "rungekutta4_scalar_c: invalid RK4 stage %d\n", RK4);
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rungekutta4_cplxscalar_(double &dT,
|
||||||
|
std::complex<double> &f0,
|
||||||
|
std::complex<double> &f1,
|
||||||
|
std::complex<double> &f_rhs,
|
||||||
|
int &RK4) {
|
||||||
|
constexpr double F1o6 = 1.0 / 6.0;
|
||||||
|
constexpr double HLF = 0.5;
|
||||||
|
constexpr double TWO = 2.0;
|
||||||
|
|
||||||
|
switch (RK4) {
|
||||||
|
case 0:
|
||||||
|
f1 = f0 + HLF * dT * f_rhs;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
f_rhs = f_rhs + TWO * f1;
|
||||||
|
f1 = f0 + HLF * dT * f1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
f_rhs = f_rhs + TWO * f1;
|
||||||
|
f1 = f0 + dT * f1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
f1 = f0 + F1o6 * dT * (f1 + f_rhs);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::fprintf(stderr, "rungekutta4_cplxscalar_c: invalid RK4 stage %d\n", RK4);
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int f_rungekutta4_rout(int *ex, double &dT,
|
int f_rungekutta4_rout(int *ex, double &dT,
|
||||||
double *f0, double *f1, double *f_rhs,
|
double *f0, double *f1, double *f_rhs,
|
||||||
int &RK4) {
|
int &RK4) {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -36,6 +36,11 @@ private:
|
|||||||
|
|
||||||
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
double *nx_g, *ny_g, *nz_g; // global list of unit normals
|
||||||
int myrank, cpusize;
|
int myrank, cpusize;
|
||||||
|
int wave_cache_spinw, wave_cache_maxl, wave_cache_modes;
|
||||||
|
double *wave_theta_pos, *wave_theta_neg;
|
||||||
|
double *wave_phi_cos, *wave_phi_sin;
|
||||||
|
void clear_wave_cache();
|
||||||
|
void build_wave_cache(int spinw, int maxl);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
surface_integral(int iSymmetry);
|
surface_integral(int iSymmetry);
|
||||||
@@ -82,13 +87,29 @@ public:
|
|||||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||||
var *Gmx, var *Gmy, var *Gmz,
|
var *Gmx, var *Gmy, var *Gmz,
|
||||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||||
double *Rout, monitor *Monitor);
|
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||||
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
void surf_MassPAng(double rex, int lev, ShellPatch *GH, var *chi, var *trK,
|
||||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||||
var *Gmx, var *Gmy, var *Gmz,
|
var *Gmx, var *Gmy, var *Gmz,
|
||||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||||
double *Rout, monitor *Monitor);
|
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||||
|
void surf_WaveMassPAng(double rex, int lev, cgh *GH,
|
||||||
|
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||||
|
var *chi, var *trK,
|
||||||
|
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||||
|
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||||
|
var *Gmx, var *Gmy, var *Gmz,
|
||||||
|
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||||
|
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||||
|
void surf_WaveMassPAng(double rex, int lev, ShellPatch *GH,
|
||||||
|
var *Rpsi4, var *Ipsi4, int spinw, int maxl, int NN, double *RP, double *IP,
|
||||||
|
var *chi, var *trK,
|
||||||
|
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||||
|
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||||
|
var *Gmx, var *Gmy, var *Gmz,
|
||||||
|
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs,
|
||||||
|
double *Rout, monitor *Monitor, bool refresh_mass_fields = true);
|
||||||
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
void surf_Wave(double rex, cgh *GH, ShellPatch *SH,
|
||||||
var *chi, var *trK,
|
var *chi, var *trK,
|
||||||
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
var *gxx, var *gxy, var *gxz, var *gyy, var *gyz, var *gzz,
|
||||||
@@ -115,7 +136,7 @@ public:
|
|||||||
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
var *Axx, var *Axy, var *Axz, var *Ayy, var *Ayz, var *Azz,
|
||||||
var *Gmx, var *Gmy, var *Gmz,
|
var *Gmx, var *Gmy, var *Gmz,
|
||||||
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
var *Sfx_rhs, var *Sfy_rhs, var *Sfz_rhs, // temparay memory for mass^i
|
||||||
double *Rout, monitor *Monitor, MPI_Comm Comm_here);
|
double *Rout, monitor *Monitor, MPI_Comm Comm_here, bool refresh_mass_fields = true);
|
||||||
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
void surf_Wave(double rex, int lev, cgh *GH, var *Rpsi4, var *Ipsi4,
|
||||||
int spinw, int maxl, int NN, double *RP, double *IP,
|
int spinw, int maxl, int NN, double *RP, double *IP,
|
||||||
monitor *Monitor, MPI_Comm Comm_here);
|
monitor *Monitor, MPI_Comm Comm_here);
|
||||||
|
|||||||
@@ -25,3 +25,9 @@ void lopsided(const int ex[3],
|
|||||||
const double *f, double *f_rhs,
|
const double *f, double *f_rhs,
|
||||||
const double *Sfx, const double *Sfy, const double *Sfz,
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
int Symmetry, const double SoA[3]);
|
int Symmetry, const double SoA[3]);
|
||||||
|
|
||||||
|
void lopsided_kodis(const int ex[3],
|
||||||
|
const double *X, const double *Y, const double *Z,
|
||||||
|
const double *f, double *f_rhs,
|
||||||
|
const double *Sfx, const double *Sfy, const double *Sfz,
|
||||||
|
int Symmetry, const double SoA[3], double eps);
|
||||||
|
|||||||
@@ -144,6 +144,62 @@ def generate_macrodef_h():
|
|||||||
print( "#define REGLEV 0", file=file1 )
|
print( "#define REGLEV 0", file=file1 )
|
||||||
print( file=file1 )
|
print( file=file1 )
|
||||||
|
|
||||||
|
# Define fine-grained timing/debug macros.
|
||||||
|
# All of them default to OFF so production builds do not pay profiling overhead.
|
||||||
|
|
||||||
|
fine_timing = getattr(input_data, "Fine_Timing",
|
||||||
|
getattr(input_data, "Finegrained_Timing", "no"))
|
||||||
|
kernel_fine_timing = getattr(input_data, "Kernel_Fine_Timing",
|
||||||
|
getattr(input_data, "BSSN_Kernel_Fine_Timing", "no"))
|
||||||
|
stdin_abort_poll = getattr(input_data, "Enable_Stdin_Abort_Poll",
|
||||||
|
getattr(input_data, "Stdin_Abort_Poll", "no"))
|
||||||
|
timing_report_every = max(1, int(getattr(
|
||||||
|
input_data, "Timing_Every_Steps",
|
||||||
|
getattr(input_data, "Timing_Report_Every", 1))))
|
||||||
|
timing_top_hotspots = max(1, int(getattr(
|
||||||
|
input_data, "Timing_Top_Hotspots", 8)))
|
||||||
|
|
||||||
|
if ( fine_timing == "yes" ):
|
||||||
|
print( "#define BSSN_FINE_TIMING 1", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
elif ( fine_timing == "no" ):
|
||||||
|
print( "#define BSSN_FINE_TIMING 0", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
else:
|
||||||
|
print( "Fine_Timing setting error!!!" )
|
||||||
|
print()
|
||||||
|
print( "# Fine_Timing setting error!!!", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
|
print( f"#define BSSN_FINE_TIMING_EVERY {timing_report_every}", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
print( f"#define BSSN_FINE_TIMING_TOPN {timing_top_hotspots}", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
|
if ( kernel_fine_timing == "yes" ):
|
||||||
|
print( "#define BSSN_KERNEL_FINE_TIMING 1", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
elif ( kernel_fine_timing == "no" ):
|
||||||
|
print( "#define BSSN_KERNEL_FINE_TIMING 0", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
else:
|
||||||
|
print( "Kernel_Fine_Timing setting error!!!" )
|
||||||
|
print()
|
||||||
|
print( "# Kernel_Fine_Timing setting error!!!", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
|
if ( stdin_abort_poll == "yes" ):
|
||||||
|
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 1", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
elif ( stdin_abort_poll == "no" ):
|
||||||
|
print( "#define BSSN_ENABLE_STDIN_ABORT_POLL 0", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
else:
|
||||||
|
print( "Enable_Stdin_Abort_Poll setting error!!!" )
|
||||||
|
print()
|
||||||
|
print( "# Enable_Stdin_Abort_Poll setting error!!!", file=file1 )
|
||||||
|
print( file=file1 )
|
||||||
|
|
||||||
# Define macro USE_GPU
|
# Define macro USE_GPU
|
||||||
# use GPU or not
|
# use GPU or not
|
||||||
|
|
||||||
@@ -224,6 +280,21 @@ def generate_macrodef_h():
|
|||||||
print( "// 0: for every level;", file=file1 )
|
print( "// 0: for every level;", file=file1 )
|
||||||
print( "// 1: for all", file=file1 )
|
print( "// 1: for all", file=file1 )
|
||||||
print( "//", file=file1 )
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_FINE_TIMING", file=file1 )
|
||||||
|
print( "// enable fine-grained per-timestep timing monitor", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_FINE_TIMING_EVERY", file=file1 )
|
||||||
|
print( "// report timing every N coarse timesteps", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_FINE_TIMING_TOPN", file=file1 )
|
||||||
|
print( "// number of hottest timing buckets shown in stdout", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_KERNEL_FINE_TIMING", file=file1 )
|
||||||
|
print( "// enable split timing inside compute_rhs_bssn", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
|
print( "// define BSSN_ENABLE_STDIN_ABORT_POLL", file=file1 )
|
||||||
|
print( "// poll stdin and broadcast abort flag every coarse step", file=file1 )
|
||||||
|
print( "//", file=file1 )
|
||||||
print( "// define USE_GPU", file=file1 )
|
print( "// define USE_GPU", file=file1 )
|
||||||
print( "// use gpu or not", file=file1 )
|
print( "// use gpu or not", file=file1 )
|
||||||
print( "//", file=file1 )
|
print( "//", file=file1 )
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ def makefile_ABE():
|
|||||||
|
|
||||||
## Build command with CPU binding to nohz_full cores
|
## Build command with CPU binding to nohz_full cores
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=optimize ABE"
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=off ABE"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -1,97 +0,0 @@
|
|||||||
# AMSS-NCKU PGO Profile Analysis Report
|
|
||||||
|
|
||||||
## 1. Profiling Environment
|
|
||||||
|
|
||||||
| Item | Value |
|
|
||||||
|------|-------|
|
|
||||||
| Compiler | Intel oneAPI DPC++/C++ 2025.3.0 (icpx/ifx) |
|
|
||||||
| Instrumentation Flag | `-fprofile-instr-generate` |
|
|
||||||
| Optimization Level (instrumented) | `-O2 -xHost -fma` |
|
|
||||||
| MPI Processes | 1 (single process to avoid MPI+instrumentation deadlock) |
|
|
||||||
| Profile File | `default_9725750769337483397_0.profraw` (327 KB) |
|
|
||||||
| Merged Profile | `default.profdata` (394 KB) |
|
|
||||||
| llvm-profdata | `/home/intel/oneapi/compiler/2025.3/bin/compiler/llvm-profdata` |
|
|
||||||
|
|
||||||
## 2. Reduced Simulation Parameters (for profiling run)
|
|
||||||
|
|
||||||
| Parameter | Production Value | Profiling Value |
|
|
||||||
|-----------|-----------------|-----------------|
|
|
||||||
| MPI_processes | 64 | 1 |
|
|
||||||
| grid_level | 9 | 4 |
|
|
||||||
| static_grid_level | 5 | 3 |
|
|
||||||
| static_grid_number | 96 | 24 |
|
|
||||||
| moving_grid_number | 48 | 16 |
|
|
||||||
| largest_box_xyz_max | 320^3 | 160^3 |
|
|
||||||
| Final_Evolution_Time | 1000.0 | 10.0 |
|
|
||||||
| Evolution_Step_Number | 10,000,000 | 1,000 |
|
|
||||||
| Detector_Number | 12 | 2 |
|
|
||||||
|
|
||||||
## 3. Profile Summary
|
|
||||||
|
|
||||||
| Metric | Value |
|
|
||||||
|--------|-------|
|
|
||||||
| Total instrumented functions | 1,392 |
|
|
||||||
| Functions with non-zero counts | 117 (8.4%) |
|
|
||||||
| Functions with zero counts | 1,275 (91.6%) |
|
|
||||||
| Maximum function entry count | 386,459,248 |
|
|
||||||
| Maximum internal block count | 370,477,680 |
|
|
||||||
| Total block count | 4,198,023,118 |
|
|
||||||
|
|
||||||
## 4. Top 20 Hotspot Functions
|
|
||||||
|
|
||||||
| Rank | Total Count | Max Block Count | Function | Category |
|
|
||||||
|------|------------|-----------------|----------|----------|
|
|
||||||
| 1 | 1,241,601,732 | 370,477,680 | `polint_` | Interpolation |
|
|
||||||
| 2 | 755,994,435 | 230,156,640 | `prolong3_` | Grid prolongation |
|
|
||||||
| 3 | 667,964,095 | 3,697,792 | `compute_rhs_bssn_` | BSSN RHS evolution |
|
|
||||||
| 4 | 539,736,051 | 386,459,248 | `symmetry_bd_` | Symmetry boundary |
|
|
||||||
| 5 | 277,310,808 | 53,170,728 | `lopsided_` | Lopsided FD stencil |
|
|
||||||
| 6 | 155,534,488 | 94,535,040 | `decide3d_` | 3D grid decision |
|
|
||||||
| 7 | 119,267,712 | 19,266,048 | `rungekutta4_rout_` | RK4 time integrator |
|
|
||||||
| 8 | 91,574,616 | 48,824,160 | `kodis_` | Kreiss-Oliger dissipation |
|
|
||||||
| 9 | 67,555,389 | 43,243,680 | `fderivs_` | Finite differences |
|
|
||||||
| 10 | 55,296,000 | 42,246,144 | `misc::fact(int)` | Factorial utility |
|
|
||||||
| 11 | 43,191,071 | 27,663,328 | `fdderivs_` | 2nd-order FD derivatives |
|
|
||||||
| 12 | 36,233,965 | 22,429,440 | `restrict3_` | Grid restriction |
|
|
||||||
| 13 | 24,698,512 | 17,231,520 | `polin3_` | Polynomial interpolation |
|
|
||||||
| 14 | 22,962,942 | 20,968,768 | `copy_` | Data copy |
|
|
||||||
| 15 | 20,135,696 | 17,259,168 | `Ansorg::barycentric(...)` | Spectral interpolation |
|
|
||||||
| 16 | 14,650,224 | 7,224,768 | `Ansorg::barycentric_omega(...)` | Spectral weights |
|
|
||||||
| 17 | 13,242,296 | 2,871,920 | `global_interp_` | Global interpolation |
|
|
||||||
| 18 | 12,672,000 | 7,734,528 | `sommerfeld_rout_` | Sommerfeld boundary |
|
|
||||||
| 19 | 6,872,832 | 1,880,064 | `sommerfeld_routbam_` | Sommerfeld boundary (BAM) |
|
|
||||||
| 20 | 5,709,900 | 2,809,632 | `l2normhelper_` | L2 norm computation |
|
|
||||||
|
|
||||||
## 5. Hotspot Category Breakdown
|
|
||||||
|
|
||||||
Top 20 functions account for ~98% of total execution counts:
|
|
||||||
|
|
||||||
| Category | Functions | Combined Count | Share |
|
|
||||||
|----------|-----------|---------------|-------|
|
|
||||||
| Interpolation / Prolongation / Restriction | polint_, prolong3_, restrict3_, polin3_, global_interp_, Ansorg::* | ~2,093M | ~50% |
|
|
||||||
| BSSN RHS + FD stencils | compute_rhs_bssn_, lopsided_, fderivs_, fdderivs_ | ~1,056M | ~25% |
|
|
||||||
| Boundary conditions | symmetry_bd_, sommerfeld_rout_, sommerfeld_routbam_ | ~559M | ~13% |
|
|
||||||
| Time integration | rungekutta4_rout_ | ~119M | ~3% |
|
|
||||||
| Dissipation | kodis_ | ~92M | ~2% |
|
|
||||||
| Utilities | misc::fact, decide3d_, copy_, l2normhelper_ | ~256M | ~6% |
|
|
||||||
|
|
||||||
## 6. Conclusions
|
|
||||||
|
|
||||||
1. **Profile data is valid**: 1,392 functions instrumented, 117 exercised with ~4.2 billion total counts.
|
|
||||||
2. **Hotspot concentration is high**: Top 5 functions alone account for ~76% of all counts, which is ideal for PGO — the compiler has strong branch/layout optimization targets.
|
|
||||||
3. **Fortran numerical kernels dominate**: `polint_`, `prolong3_`, `compute_rhs_bssn_`, `symmetry_bd_`, `lopsided_` are all Fortran routines in the inner evolution loop. PGO will optimize their branch prediction and basic block layout.
|
|
||||||
4. **91.6% of functions have zero counts**: These are code paths for unused features (GPU, BSSN-EScalar, BSSN-EM, Z4C, etc.). PGO will deprioritize them, improving instruction cache utilization.
|
|
||||||
5. **Profile is representative**: Despite the reduced grid size, the code path coverage matches production — the same kernels (RHS, prolongation, restriction, boundary) are exercised. PGO branch probabilities from this profile will transfer well to full-scale runs.
|
|
||||||
|
|
||||||
## 7. PGO Phase 2 Usage
|
|
||||||
|
|
||||||
To apply the profile, use the following flags in `makefile.inc`:
|
|
||||||
|
|
||||||
```makefile
|
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
|
||||||
-fprofile-instr-use=/home/amss/AMSS-NCKU/pgo_profile/default.profdata \
|
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
|
||||||
```
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user