Add safe BSSN-EScalar kernel and transfer toggles
This commit is contained in:
@@ -26,6 +26,12 @@ using namespace std;
|
||||
#include "shellfunctions.h"
|
||||
#include "parameters.h"
|
||||
|
||||
#if BSSN_USE_ESCALAR_C_KERNEL
|
||||
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar_c
|
||||
#else
|
||||
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar
|
||||
#endif
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
@@ -169,13 +175,7 @@ void bssnEScalar_class::Initialize()
|
||||
Setup_Black_Hole_position();
|
||||
}
|
||||
|
||||
// BSSN-EScalar currently uses the uncached communication fallback paths.
|
||||
sync_cache_pre = 0;
|
||||
sync_cache_cor = 0;
|
||||
sync_cache_rp_coarse = 0;
|
||||
sync_cache_rp_fine = 0;
|
||||
sync_cache_restrict = 0;
|
||||
sync_cache_outbd = 0;
|
||||
setup_transfer_caches();
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
@@ -347,6 +347,8 @@ void bssnEScalar_class::Read_Ansorg()
|
||||
}
|
||||
int order = 6;
|
||||
Ansorg read_ansorg("Ansorg.psid", order);
|
||||
if (myrank == 0)
|
||||
cout << "[debug] Read_Ansorg: Ansorg object ready" << endl;
|
||||
// set initial data
|
||||
for (int lev = 0; lev < GH->levels; lev++)
|
||||
{
|
||||
@@ -384,6 +386,8 @@ void bssnEScalar_class::Read_Ansorg()
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (myrank == 0)
|
||||
cout << "[debug] Read_Ansorg: finished level " << lev << " patch init" << endl;
|
||||
}
|
||||
#ifdef WithShell
|
||||
// ShellPatch part
|
||||
@@ -426,12 +430,16 @@ void bssnEScalar_class::Read_Ansorg()
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (myrank == 0)
|
||||
cout << "[debug] Read_Ansorg: finished shell init" << endl;
|
||||
#endif
|
||||
|
||||
delete[] Porg_here;
|
||||
delete[] pmom_local;
|
||||
delete[] spin_local;
|
||||
delete[] mass_local;
|
||||
if (myrank == 0)
|
||||
cout << "[debug] Read_Ansorg: finished local cleanup" << endl;
|
||||
// dump read_in initial data
|
||||
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
|
||||
}
|
||||
@@ -762,7 +770,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
|
||||
#endif
|
||||
|
||||
if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
@@ -1016,7 +1024,8 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -1035,6 +1044,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
@@ -1104,7 +1114,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
|
||||
#endif
|
||||
|
||||
if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
|
||||
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
|
||||
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
|
||||
@@ -1372,7 +1382,8 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
}
|
||||
#endif
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -1391,6 +1402,7 @@ void bssnEScalar_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||
// for black hole position
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
@@ -1858,8 +1870,11 @@ void bssnEScalar_class::AnalysisStuff_EScalar(int lev, double dT_lev)
|
||||
|
||||
//================================================================================================
|
||||
|
||||
void bssnEScalar_class::Interp_Constraint()
|
||||
void bssnEScalar_class::Interp_Constraint(bool infg)
|
||||
{
|
||||
if (!infg)
|
||||
return;
|
||||
|
||||
// we do not support a_lev != 0 yet.
|
||||
if (a_lev > 0)
|
||||
return;
|
||||
@@ -1881,7 +1896,7 @@ void bssnEScalar_class::Interp_Constraint()
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
if (lev > 0)
|
||||
f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
@@ -2101,7 +2116,7 @@ void bssnEScalar_class::Constraint_Out()
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
if (lev > 0)
|
||||
f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
|
||||
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
|
||||
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
|
||||
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
|
||||
|
||||
@@ -51,7 +51,7 @@ public:
|
||||
void Compute_Psi4(int lev);
|
||||
void Step(int lev, int YN);
|
||||
void AnalysisStuff_EScalar(int lev, double dT_lev);
|
||||
void Interp_Constraint();
|
||||
void Interp_Constraint(bool infg);
|
||||
void Constraint_Out();
|
||||
|
||||
protected:
|
||||
|
||||
@@ -1008,21 +1008,7 @@ void bssn_class::Initialize()
|
||||
Setup_Black_Hole_position();
|
||||
}
|
||||
|
||||
// BSSN-EScalar uses the uncached communication fallback paths.
|
||||
sync_cache_pre = 0;
|
||||
sync_cache_cor = 0;
|
||||
sync_cache_rp_coarse = 0;
|
||||
sync_cache_rp_fine = 0;
|
||||
sync_cache_restrict = 0;
|
||||
sync_cache_outbd = 0;
|
||||
#if (ABEtype != 1)
|
||||
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
||||
#endif
|
||||
setup_transfer_caches();
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
@@ -1037,13 +1023,6 @@ void bssn_class::Initialize()
|
||||
|
||||
bssn_class::~bssn_class()
|
||||
{
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] begin" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
#ifdef With_AHF
|
||||
AHList->clearList();
|
||||
AHDList->clearList();
|
||||
@@ -1078,13 +1057,6 @@ bssn_class::~bssn_class()
|
||||
ConstraintList->clearList();
|
||||
|
||||
delete[] ConstraintRefreshLevels;
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] lists cleared" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
delete phio;
|
||||
delete trKo;
|
||||
@@ -1262,13 +1234,6 @@ bssn_class::~bssn_class()
|
||||
delete Cons_Gx;
|
||||
delete Cons_Gy;
|
||||
delete Cons_Gz;
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] core vars freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Point_Psi4
|
||||
delete phix;
|
||||
@@ -1298,73 +1263,12 @@ bssn_class::~bssn_class()
|
||||
#endif
|
||||
|
||||
// Destroy sync caches before GH
|
||||
if (sync_cache_pre)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_pre[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_pre;
|
||||
}
|
||||
if (sync_cache_cor)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_cor[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_cor;
|
||||
}
|
||||
if (sync_cache_rp_coarse)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_coarse[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_rp_coarse;
|
||||
}
|
||||
if (sync_cache_rp_fine)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_fine[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_rp_fine;
|
||||
}
|
||||
if (sync_cache_restrict)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_restrict[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_restrict;
|
||||
}
|
||||
if (sync_cache_outbd)
|
||||
{
|
||||
#if (ABEtype != 1)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_outbd[i].destroy();
|
||||
#endif
|
||||
delete[] sync_cache_outbd;
|
||||
}
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] caches freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
destroy_transfer_caches();
|
||||
|
||||
delete GH;
|
||||
#ifdef WithShell
|
||||
delete SH;
|
||||
#endif
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] grids freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < BH_num; i++)
|
||||
{
|
||||
@@ -1384,13 +1288,6 @@ bssn_class::~bssn_class()
|
||||
delete[] Mass;
|
||||
delete[] Spin;
|
||||
delete[] Pmom;
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] puncture arrays freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
delete ErrorMonitor;
|
||||
delete Psi4Monitor;
|
||||
@@ -1399,22 +1296,8 @@ bssn_class::~bssn_class()
|
||||
delete ConVMonitor;
|
||||
delete TimingMonitor;
|
||||
delete Waveshell;
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] monitors freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
|
||||
delete CheckPoint;
|
||||
#if (ABEtype == 1)
|
||||
if (myrank == 0)
|
||||
{
|
||||
cout << "[dtor] checkpoint freed" << endl;
|
||||
cout.flush();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//================================================================================================
|
||||
@@ -2599,9 +2482,7 @@ void bssn_class::Evolve(int Steps)
|
||||
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid);
|
||||
#endif
|
||||
|
||||
@@ -2842,9 +2723,7 @@ void bssn_class::RecursiveStep(int lev)
|
||||
{
|
||||
if (ConstraintRefreshLevels)
|
||||
ConstraintRefreshLevels[lev] = 1;
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
}
|
||||
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
|
||||
#endif
|
||||
@@ -3025,9 +2904,7 @@ void bssn_class::ParallelStep()
|
||||
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -3191,12 +3068,10 @@ void bssn_class::ParallelStep()
|
||||
if (lev + 1 >= GH->movls)
|
||||
{
|
||||
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
|
||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
|
||||
invalidate_transfer_caches();
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -3211,9 +3086,7 @@ void bssn_class::ParallelStep()
|
||||
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -3232,9 +3105,7 @@ void bssn_class::ParallelStep()
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -3250,9 +3121,7 @@ void bssn_class::ParallelStep()
|
||||
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
|
||||
SynchList_cor, OldStateList, StateList, SynchList_pre,
|
||||
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
|
||||
#if (ABEtype != 1)
|
||||
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
|
||||
#endif
|
||||
invalidate_transfer_caches();
|
||||
|
||||
// a_stream.clear();
|
||||
// a_stream.str("");
|
||||
@@ -3783,11 +3652,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
STEP_TIMER_DECL(timer_predictor_sync);
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
#endif
|
||||
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -3806,9 +3671,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (ABEtype != 1)
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
#endif
|
||||
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
@@ -4154,11 +4017,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
STEP_TIMER_DECL(timer_corrector_sync);
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
#endif
|
||||
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -4177,9 +4036,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (ABEtype != 1)
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
@@ -4668,11 +4525,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
#endif
|
||||
|
||||
Parallel::AsyncSyncState async_pre;
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
|
||||
#endif
|
||||
sync_predictor_start(lev, SynchList_pre, async_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -4691,9 +4544,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (ABEtype != 1)
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
|
||||
#endif
|
||||
sync_predictor_finish(lev, async_pre, SynchList_pre);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
@@ -5022,11 +4873,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
#endif
|
||||
|
||||
Parallel::AsyncSyncState async_cor;
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
|
||||
#endif
|
||||
sync_corrector_start(lev, SynchList_cor, async_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
if (lev == 0)
|
||||
@@ -5045,9 +4892,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if (ABEtype != 1)
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
sync_corrector_finish(lev, async_cor, SynchList_cor);
|
||||
|
||||
#ifdef WithShell
|
||||
// Complete non-blocking error reduction and check
|
||||
@@ -5439,11 +5284,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
|
||||
#endif
|
||||
sync_evolution(lev, SynchList_pre, sync_cache_pre);
|
||||
|
||||
// Complete non-blocking error reduction and check
|
||||
MPI_Wait(&err_req, MPI_STATUS_IGNORE);
|
||||
@@ -5644,11 +5485,7 @@ void bssn_class::Step(int lev, int YN)
|
||||
|
||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
|
||||
#endif
|
||||
sync_evolution(lev, SynchList_cor, sync_cache_cor);
|
||||
|
||||
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
|
||||
|
||||
@@ -6365,11 +6202,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
#endif
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
||||
#endif
|
||||
restrict_evolution(lev, SL, SynchList_pre);
|
||||
#elif (RPB == 1)
|
||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||
@@ -6382,11 +6215,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
#endif
|
||||
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
@@ -6397,21 +6226,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#else
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
|
||||
#endif
|
||||
outbdlow2hi_evolution(lev, SynchList_pre, SL);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
|
||||
#endif
|
||||
@@ -6438,11 +6253,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
#endif
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#else
|
||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
|
||||
#endif
|
||||
restrict_evolution(lev, SL, SL);
|
||||
#elif (RPB == 1)
|
||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
|
||||
@@ -6455,11 +6266,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
|
||||
#endif
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
#endif
|
||||
sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
|
||||
|
||||
#if (PSTR == 1 || PSTR == 2)
|
||||
// a_stream.clear();
|
||||
@@ -6470,21 +6277,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#else
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
|
||||
#endif
|
||||
outbdlow2hi_evolution(lev, SL, SL);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
|
||||
#endif
|
||||
@@ -6685,7 +6478,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SL, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]);
|
||||
sync_evolution(lev, SL, sync_cache_rp_fine);
|
||||
#endif
|
||||
}
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
@@ -6818,39 +6611,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
}
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
|
||||
#endif
|
||||
restrict_evolution(lev, SynchList_cor, SynchList_pre);
|
||||
#elif (RPB == 1)
|
||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
#endif
|
||||
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#else
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||
#endif
|
||||
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
@@ -6864,39 +6635,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
if (myrank == 0)
|
||||
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
|
||||
#if (RPB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||
#else
|
||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
|
||||
#endif
|
||||
restrict_evolution(lev, SynchList_cor, StateList);
|
||||
#elif (RPB == 1)
|
||||
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
|
||||
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
|
||||
#endif
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
#endif
|
||||
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
#if (ABEtype == 1)
|
||||
Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
#else
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||
#endif
|
||||
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
@@ -6906,11 +6655,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if (ABEtype == 1)
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
#else
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
#endif
|
||||
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
|
||||
}
|
||||
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
|
||||
}
|
||||
@@ -6942,7 +6687,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||
outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
@@ -6955,7 +6700,7 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
{
|
||||
#if (RPB == 0)
|
||||
#if (MIXOUTB == 0)
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
|
||||
outbdlow2hi_evolution(lev, StateList, SynchList_cor);
|
||||
#elif (MIXOUTB == 1)
|
||||
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
|
||||
#endif
|
||||
@@ -6974,10 +6719,10 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
|
||||
#else
|
||||
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
|
||||
#endif
|
||||
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
|
||||
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
|
||||
}
|
||||
|
||||
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
|
||||
sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
|
||||
}
|
||||
}
|
||||
#undef MIXOUTB
|
||||
@@ -7707,6 +7452,169 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool bssn_class::use_transfer_cache() const
|
||||
{
|
||||
#if BSSN_USE_TRANSFER_CACHE
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void bssn_class::setup_transfer_caches()
|
||||
{
|
||||
sync_cache_pre = 0;
|
||||
sync_cache_cor = 0;
|
||||
sync_cache_rp_coarse = 0;
|
||||
sync_cache_rp_fine = 0;
|
||||
sync_cache_restrict = 0;
|
||||
sync_cache_outbd = 0;
|
||||
|
||||
if (!use_transfer_cache() || !GH)
|
||||
return;
|
||||
|
||||
sync_cache_pre = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_cor = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
|
||||
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
|
||||
}
|
||||
|
||||
void bssn_class::invalidate_transfer_caches()
|
||||
{
|
||||
if (!use_transfer_cache() || !GH || !sync_cache_pre || !sync_cache_cor ||
|
||||
!sync_cache_rp_coarse || !sync_cache_rp_fine || !sync_cache_restrict || !sync_cache_outbd)
|
||||
return;
|
||||
|
||||
for (int il = 0; il < GH->levels; il++)
|
||||
{
|
||||
sync_cache_pre[il].invalidate();
|
||||
sync_cache_cor[il].invalidate();
|
||||
sync_cache_rp_coarse[il].invalidate();
|
||||
sync_cache_rp_fine[il].invalidate();
|
||||
sync_cache_restrict[il].invalidate();
|
||||
sync_cache_outbd[il].invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_class::destroy_transfer_caches()
|
||||
{
|
||||
if (sync_cache_pre)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_pre[i].destroy();
|
||||
delete[] sync_cache_pre;
|
||||
sync_cache_pre = 0;
|
||||
}
|
||||
if (sync_cache_cor)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_cor[i].destroy();
|
||||
delete[] sync_cache_cor;
|
||||
sync_cache_cor = 0;
|
||||
}
|
||||
if (sync_cache_rp_coarse)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_coarse[i].destroy();
|
||||
delete[] sync_cache_rp_coarse;
|
||||
sync_cache_rp_coarse = 0;
|
||||
}
|
||||
if (sync_cache_rp_fine)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_rp_fine[i].destroy();
|
||||
delete[] sync_cache_rp_fine;
|
||||
sync_cache_rp_fine = 0;
|
||||
}
|
||||
if (sync_cache_restrict)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_restrict[i].destroy();
|
||||
delete[] sync_cache_restrict;
|
||||
sync_cache_restrict = 0;
|
||||
}
|
||||
if (sync_cache_outbd)
|
||||
{
|
||||
if (use_transfer_cache() && GH)
|
||||
for (int i = 0; i < GH->levels; i++)
|
||||
sync_cache_outbd[i].destroy();
|
||||
delete[] sync_cache_outbd;
|
||||
sync_cache_outbd = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void bssn_class::sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_pre[lev], async_state);
|
||||
else
|
||||
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
Parallel::Sync_finish(sync_cache_pre[lev], async_state, VarList, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_cor[lev], async_state);
|
||||
else
|
||||
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
Parallel::Sync_finish(sync_cache_cor[lev], async_state, VarList, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array)
|
||||
{
|
||||
if (use_transfer_cache() && cache_array)
|
||||
Parallel::Sync_cached(GH->PatL[lev], VarList, Symmetry, cache_array[lev]);
|
||||
else
|
||||
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_restrict[lev]);
|
||||
else
|
||||
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry);
|
||||
}
|
||||
|
||||
void bssn_class::outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
|
||||
{
|
||||
if (use_transfer_cache())
|
||||
{
|
||||
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_outbd[lev]);
|
||||
return;
|
||||
}
|
||||
|
||||
MyList<Patch> *Ppc = GH->PatL[lev - 1];
|
||||
while (Ppc)
|
||||
{
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, src_var_list, dst_var_list, Symmetry);
|
||||
Pp = Pp->next;
|
||||
}
|
||||
Ppc = Ppc->next;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -33,6 +33,14 @@ using namespace std;
|
||||
|
||||
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
|
||||
|
||||
#ifndef BSSN_USE_TRANSFER_CACHE
|
||||
#define BSSN_USE_TRANSFER_CACHE 1
|
||||
#endif
|
||||
|
||||
#ifndef BSSN_USE_ESCALAR_C_KERNEL
|
||||
#define BSSN_USE_ESCALAR_C_KERNEL 1
|
||||
#endif
|
||||
|
||||
class bssn_class
|
||||
{
|
||||
public:
|
||||
@@ -171,6 +179,17 @@ public:
|
||||
void testOutBd();
|
||||
|
||||
bool check_Stdin_Abort();
|
||||
bool use_transfer_cache() const;
|
||||
void setup_transfer_caches();
|
||||
void invalidate_transfer_caches();
|
||||
void destroy_transfer_caches();
|
||||
void sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
|
||||
void sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
|
||||
void sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
|
||||
void sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
|
||||
void sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array = 0);
|
||||
void restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
|
||||
void outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
|
||||
|
||||
virtual void Setup_Initial_Data_Cao();
|
||||
virtual void Setup_Initial_Data_Lousto();
|
||||
|
||||
@@ -2,11 +2,19 @@
|
||||
|
||||
include makefile.inc
|
||||
|
||||
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
|
||||
ifeq ($(USE_CXX_KERNELS),0)
|
||||
$(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar_rhs_c.C reuses the C BSSN kernel)
|
||||
endif
|
||||
endif
|
||||
|
||||
## polint(ordn=6) kernel selector:
|
||||
## 1 (default): barycentric fast path
|
||||
## 0 : fallback to Neville path
|
||||
POLINT6_USE_BARY ?= 1
|
||||
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
|
||||
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(USE_TRANSFER_CACHE)
|
||||
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(USE_CXX_ESCALAR_KERNEL)
|
||||
|
||||
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
|
||||
## make -> opt (PGO-guided, maximum performance)
|
||||
@@ -16,7 +24,8 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
||||
ifeq ($(PGO_MODE),instrument)
|
||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
else
|
||||
@@ -26,7 +35,8 @@ else
|
||||
|
||||
|
||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
|
||||
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
|
||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
|
||||
endif
|
||||
@@ -87,7 +97,10 @@ ifeq ($(USE_CXX_KERNELS),0)
|
||||
CFILES =
|
||||
else
|
||||
# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels
|
||||
CFILES = bssn_rhs_c.o bssn_escalar_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
|
||||
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
|
||||
CFILES += bssn_escalar_rhs_c.o
|
||||
endif
|
||||
endif
|
||||
|
||||
## RK4 kernel switch (independent from USE_CXX_KERNELS)
|
||||
|
||||
@@ -48,6 +48,17 @@ endif
|
||||
## 0 : fall back to original Fortran kernels
|
||||
USE_CXX_KERNELS ?= 1
|
||||
|
||||
## BSSN-EScalar RHS switch
|
||||
## 1 : use BSSN-EScalar C wrapper on the normal patch path
|
||||
## 0 (default) : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
|
||||
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
|
||||
USE_CXX_ESCALAR_KERNEL ?= 0
|
||||
|
||||
## Cached transfer switch
|
||||
## 1 : enable cached Sync/Restrict/OutBd transfer on evolution hot paths
|
||||
## 0 (default) : keep the original uncached transfer path for precision-safe runs
|
||||
USE_TRANSFER_CACHE ?= 0
|
||||
|
||||
## RK4 kernel implementation switch
|
||||
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
|
||||
## 0 : use original Fortran rungekutta4_rout.o
|
||||
|
||||
Reference in New Issue
Block a user