Add safe BSSN-EScalar kernel and transfer toggles

This commit is contained in:
2026-04-25 01:41:55 +08:00
parent 0f1d0de1e7
commit 0cf58176d9
6 changed files with 379 additions and 413 deletions

View File

@@ -23,8 +23,14 @@ using namespace std;
#include "rungekutta4_rout.h" #include "rungekutta4_rout.h"
#include "sommerfeld_rout.h" #include "sommerfeld_rout.h"
#include "getnp4.h" #include "getnp4.h"
#include "shellfunctions.h" #include "shellfunctions.h"
#include "parameters.h" #include "parameters.h"
#if BSSN_USE_ESCALAR_C_KERNEL
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar_c
#else
#define BSSN_ESCALAR_RHS f_compute_rhs_bssn_escalar
#endif
#ifdef With_AHF #ifdef With_AHF
#include "derivatives.h" #include "derivatives.h"
@@ -169,13 +175,7 @@ void bssnEScalar_class::Initialize()
Setup_Black_Hole_position(); Setup_Black_Hole_position();
} }
// BSSN-EScalar currently uses the uncached communication fallback paths. setup_transfer_caches();
sync_cache_pre = 0;
sync_cache_cor = 0;
sync_cache_rp_coarse = 0;
sync_cache_rp_fine = 0;
sync_cache_restrict = 0;
sync_cache_outbd = 0;
} }
//================================================================================================ //================================================================================================
@@ -345,11 +345,13 @@ void bssnEScalar_class::Read_Ansorg()
} }
inf.close(); inf.close();
} }
int order = 6; int order = 6;
Ansorg read_ansorg("Ansorg.psid", order); Ansorg read_ansorg("Ansorg.psid", order);
// set initial data if (myrank == 0)
for (int lev = 0; lev < GH->levels; lev++) cout << "[debug] Read_Ansorg: Ansorg object ready" << endl;
{ // set initial data
for (int lev = 0; lev < GH->levels; lev++)
{
MyList<Patch> *Pp = GH->PatL[lev]; MyList<Patch> *Pp = GH->PatL[lev];
while (Pp) while (Pp)
{ {
@@ -381,12 +383,14 @@ void bssnEScalar_class::Read_Ansorg()
if (BL == Pp->data->ble) if (BL == Pp->data->ble)
break; break;
BL = BL->next; BL = BL->next;
} }
Pp = Pp->next; Pp = Pp->next;
} }
} if (myrank == 0)
#ifdef WithShell cout << "[debug] Read_Ansorg: finished level " << lev << " patch init" << endl;
// ShellPatch part }
#ifdef WithShell
// ShellPatch part
MyList<ss_patch> *Pp = SH->PatL; MyList<ss_patch> *Pp = SH->PatL;
while (Pp) while (Pp)
{ {
@@ -423,15 +427,19 @@ void bssnEScalar_class::Read_Ansorg()
if (BL == Pp->data->ble) if (BL == Pp->data->ble)
break; break;
BL = BL->next; BL = BL->next;
} }
Pp = Pp->next; Pp = Pp->next;
} }
#endif if (myrank == 0)
cout << "[debug] Read_Ansorg: finished shell init" << endl;
#endif
delete[] Porg_here; delete[] Porg_here;
delete[] pmom_local; delete[] pmom_local;
delete[] spin_local; delete[] spin_local;
delete[] mass_local; delete[] mass_local;
if (myrank == 0)
cout << "[debug] Read_Ansorg: finished local cleanup" << endl;
// dump read_in initial data // dump read_in initial data
// for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT); // for(int lev=0;lev<GH->levels;lev++) Parallel::Dump_Data(GH->PatL[lev],StateList,0,PhysTime,dT);
} }
@@ -762,7 +770,7 @@ void bssnEScalar_class::Step(int lev, int YN)
cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]); cg->fgfs[Ayy0->sgfn], cg->fgfs[Ayz0->sgfn], cg->fgfs[Azz0->sgfn]);
#endif #endif
if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -1016,11 +1024,12 @@ void bssnEScalar_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry); Parallel::AsyncSyncState async_pre;
sync_predictor_start(lev, SynchList_pre, async_pre);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{ {
clock_t prev_clock, curr_clock; clock_t prev_clock, curr_clock;
if (myrank == 0) if (myrank == 0)
curr_clock = clock(); curr_clock = clock();
@@ -1032,9 +1041,10 @@ void bssnEScalar_class::Step(int lev, int YN)
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
} }
#endif #endif
sync_predictor_finish(lev, async_pre, SynchList_pre);
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
@@ -1104,7 +1114,7 @@ void bssnEScalar_class::Step(int lev, int YN)
cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]); cg->fgfs[Ayy->sgfn], cg->fgfs[Ayz->sgfn], cg->fgfs[Azz->sgfn]);
#endif #endif
if (f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], if (BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn], cg->fgfs[phi->sgfn], cg->fgfs[trK->sgfn],
cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn], cg->fgfs[gxx->sgfn], cg->fgfs[gxy->sgfn], cg->fgfs[gxz->sgfn],
cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn], cg->fgfs[gyy->sgfn], cg->fgfs[gyz->sgfn], cg->fgfs[gzz->sgfn],
@@ -1372,11 +1382,12 @@ void bssnEScalar_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry); Parallel::AsyncSyncState async_cor;
sync_corrector_start(lev, SynchList_cor, async_cor);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
{ {
clock_t prev_clock, curr_clock; clock_t prev_clock, curr_clock;
if (myrank == 0) if (myrank == 0)
curr_clock = clock(); curr_clock = clock();
@@ -1388,9 +1399,10 @@ void bssnEScalar_class::Step(int lev, int YN)
cout << " Shell stuff synchronization used " cout << " Shell stuff synchronization used "
<< (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC) << (double)(curr_clock - prev_clock) / ((double)CLOCKS_PER_SEC)
<< " seconds! " << endl; << " seconds! " << endl;
} }
} }
#endif #endif
sync_corrector_finish(lev, async_cor, SynchList_cor);
// for black hole position // for black hole position
if (BH_num > 0 && lev == GH->levels - 1) if (BH_num > 0 && lev == GH->levels - 1)
{ {
@@ -1858,11 +1870,14 @@ void bssnEScalar_class::AnalysisStuff_EScalar(int lev, double dT_lev)
//================================================================================================ //================================================================================================
void bssnEScalar_class::Interp_Constraint() void bssnEScalar_class::Interp_Constraint(bool infg)
{ {
// we do not support a_lev != 0 yet. if (!infg)
if (a_lev > 0) return;
return;
// we do not support a_lev != 0 yet.
if (a_lev > 0)
return;
for (int lev = 0; lev < GH->levels; lev++) for (int lev = 0; lev < GH->levels; lev++)
{ {
@@ -1881,7 +1896,7 @@ void bssnEScalar_class::Interp_Constraint()
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
if (lev > 0) if (lev > 0)
f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],
@@ -2101,7 +2116,7 @@ void bssnEScalar_class::Constraint_Out()
if (myrank == cg->rank) if (myrank == cg->rank)
{ {
if (lev > 0) if (lev > 0)
f_compute_rhs_bssn_escalar_c(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2], BSSN_ESCALAR_RHS(cg->shape, TRK4, cg->X[0], cg->X[1], cg->X[2],
cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn], cg->fgfs[phi0->sgfn], cg->fgfs[trK0->sgfn],
cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn], cg->fgfs[gxx0->sgfn], cg->fgfs[gxy0->sgfn], cg->fgfs[gxz0->sgfn],
cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn], cg->fgfs[gyy0->sgfn], cg->fgfs[gyz0->sgfn], cg->fgfs[gzz0->sgfn],

View File

@@ -51,7 +51,7 @@ public:
void Compute_Psi4(int lev); void Compute_Psi4(int lev);
void Step(int lev, int YN); void Step(int lev, int YN);
void AnalysisStuff_EScalar(int lev, double dT_lev); void AnalysisStuff_EScalar(int lev, double dT_lev);
void Interp_Constraint(); void Interp_Constraint(bool infg);
void Constraint_Out(); void Constraint_Out();
protected: protected:

View File

@@ -283,7 +283,7 @@ namespace rhs_kernel_timing_report
bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei, bssn_class::bssn_class(double Couranti, double StartTimei, double TotalTimei,
double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei, double DumpTimei, double d2DumpTimei, double CheckTimei, double AnasTimei,
int Symmetryi, int checkruni, char *checkfilenamei, int Symmetryi, int checkruni, char *checkfilenamei,
double numepssi, double numepsbi, double numepshi, double numepssi, double numepsbi, double numepshi,
int a_levi, int maxli, int decni, double maxrexi, double drexi) int a_levi, int maxli, int decni, double maxrexi, double drexi)
: Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei), : Courant(Couranti), StartTime(StartTimei), TotalTime(TotalTimei),
DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei), DumpTime(DumpTimei), d2DumpTime(d2DumpTimei), CheckTime(CheckTimei), AnasTime(AnasTimei),
@@ -1008,21 +1008,7 @@ void bssn_class::Initialize()
Setup_Black_Hole_position(); Setup_Black_Hole_position();
} }
// BSSN-EScalar uses the uncached communication fallback paths. setup_transfer_caches();
sync_cache_pre = 0;
sync_cache_cor = 0;
sync_cache_rp_coarse = 0;
sync_cache_rp_fine = 0;
sync_cache_restrict = 0;
sync_cache_outbd = 0;
#if (ABEtype != 1)
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
#endif
} }
//================================================================================================ //================================================================================================
@@ -1037,15 +1023,8 @@ void bssn_class::Initialize()
bssn_class::~bssn_class() bssn_class::~bssn_class()
{ {
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] begin" << endl;
cout.flush();
}
#endif
#ifdef With_AHF #ifdef With_AHF
AHList->clearList(); AHList->clearList();
AHDList->clearList(); AHDList->clearList();
GaugeList->clearList(); GaugeList->clearList();
if (lastahdumpid) if (lastahdumpid)
@@ -1078,13 +1057,6 @@ bssn_class::~bssn_class()
ConstraintList->clearList(); ConstraintList->clearList();
delete[] ConstraintRefreshLevels; delete[] ConstraintRefreshLevels;
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] lists cleared" << endl;
cout.flush();
}
#endif
delete phio; delete phio;
delete trKo; delete trKo;
@@ -1257,20 +1229,13 @@ bssn_class::~bssn_class()
delete Cons_Ham; delete Cons_Ham;
delete Cons_Px; delete Cons_Px;
delete Cons_Py; delete Cons_Py;
delete Cons_Pz; delete Cons_Pz;
delete Cons_Gx; delete Cons_Gx;
delete Cons_Gy; delete Cons_Gy;
delete Cons_Gz; delete Cons_Gz;
#if (ABEtype == 1)
if (myrank == 0) #ifdef Point_Psi4
{
cout << "[dtor] core vars freed" << endl;
cout.flush();
}
#endif
#ifdef Point_Psi4
delete phix; delete phix;
delete phiy; delete phiy;
delete phiz; delete phiz;
@@ -1296,78 +1261,17 @@ bssn_class::~bssn_class()
delete Azzy; delete Azzy;
delete Azzz; delete Azzz;
#endif #endif
// Destroy sync caches before GH // Destroy sync caches before GH
if (sync_cache_pre) destroy_transfer_caches();
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_pre[i].destroy();
#endif
delete[] sync_cache_pre;
}
if (sync_cache_cor)
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_cor[i].destroy();
#endif
delete[] sync_cache_cor;
}
if (sync_cache_rp_coarse)
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_coarse[i].destroy();
#endif
delete[] sync_cache_rp_coarse;
}
if (sync_cache_rp_fine)
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_fine[i].destroy();
#endif
delete[] sync_cache_rp_fine;
}
if (sync_cache_restrict)
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_restrict[i].destroy();
#endif
delete[] sync_cache_restrict;
}
if (sync_cache_outbd)
{
#if (ABEtype != 1)
for (int i = 0; i < GH->levels; i++)
sync_cache_outbd[i].destroy();
#endif
delete[] sync_cache_outbd;
}
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] caches freed" << endl;
cout.flush();
}
#endif
delete GH; delete GH;
#ifdef WithShell #ifdef WithShell
delete SH; delete SH;
#endif #endif
#if (ABEtype == 1)
if (myrank == 0) for (int i = 0; i < BH_num; i++)
{ {
cout << "[dtor] grids freed" << endl;
cout.flush();
}
#endif
for (int i = 0; i < BH_num; i++)
{
delete[] Porg0[i]; delete[] Porg0[i];
delete[] Porgbr[i]; delete[] Porgbr[i];
delete[] Porg[i]; delete[] Porg[i];
@@ -1380,17 +1284,10 @@ bssn_class::~bssn_class()
delete[] Porg; delete[] Porg;
delete[] Porg1; delete[] Porg1;
delete[] Porg_rhs; delete[] Porg_rhs;
delete[] Mass; delete[] Mass;
delete[] Spin; delete[] Spin;
delete[] Pmom; delete[] Pmom;
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] puncture arrays freed" << endl;
cout.flush();
}
#endif
delete ErrorMonitor; delete ErrorMonitor;
delete Psi4Monitor; delete Psi4Monitor;
@@ -1399,22 +1296,8 @@ bssn_class::~bssn_class()
delete ConVMonitor; delete ConVMonitor;
delete TimingMonitor; delete TimingMonitor;
delete Waveshell; delete Waveshell;
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] monitors freed" << endl;
cout.flush();
}
#endif
delete CheckPoint; delete CheckPoint;
#if (ABEtype == 1)
if (myrank == 0)
{
cout << "[dtor] checkpoint freed" << endl;
cout.flush();
}
#endif
} }
//================================================================================================ //================================================================================================
@@ -2599,9 +2482,7 @@ void bssn_class::Evolve(int Steps)
GH->Regrid(Symmetry, BH_num, Porgbr, Porg0, GH->Regrid(Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor); fgt(PhysTime - dT_mon, StartTime, dT_mon / 2), ErrorMonitor);
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
STEP_TIMER_ADD(TB_REGRID, timer_regrid); STEP_TIMER_ADD(TB_REGRID, timer_regrid);
#endif #endif
@@ -2842,9 +2723,7 @@ void bssn_class::RecursiveStep(int lev)
{ {
if (ConstraintRefreshLevels) if (ConstraintRefreshLevels)
ConstraintRefreshLevels[lev] = 1; ConstraintRefreshLevels[lev] = 1;
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
} }
STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel); STEP_TIMER_ADD(TB_REGRID, timer_regrid_onelevel);
#endif #endif
@@ -3022,13 +2901,11 @@ void bssn_class::ParallelStep()
delete[] tporg; delete[] tporg;
delete[] tporgo; delete[] tporgo;
#if (REGLEV == 0) #if (REGLEV == 0)
if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(GH->mylev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif #endif
#endif
} }
//================================================================================================ //================================================================================================
@@ -3191,12 +3068,10 @@ void bssn_class::ParallelStep()
if (lev + 1 >= GH->movls) if (lev + 1 >= GH->movls)
{ {
// GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0, // GH->Regrid_Onelevel_aux(lev,Symmetry,BH_num,Porgbr,Porg0,
if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev + 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor)) fgt(PhysTime - dT_levp1, StartTime, dT_levp1 / 2), ErrorMonitor))
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -3208,12 +3083,10 @@ void bssn_class::ParallelStep()
// for this level // for this level
if (YN == 1) if (YN == 1)
{ {
if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_lev / 2), ErrorMonitor))
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -3229,12 +3102,10 @@ void bssn_class::ParallelStep()
if (YN == 1) if (YN == 1)
{ {
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -3247,12 +3118,10 @@ void bssn_class::ParallelStep()
if (i % 4 == 3) if (i % 4 == 3)
{ {
// GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0, // GH->Regrid_Onelevel_aux(lev-2,Symmetry,BH_num,Porgbr,Porg0,
if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0, if (GH->Regrid_Onelevel(lev - 1, Symmetry, BH_num, Porgbr, Porg0,
SynchList_cor, OldStateList, StateList, SynchList_pre, SynchList_cor, OldStateList, StateList, SynchList_pre,
fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor)) fgt(PhysTime - dT_lev, StartTime, dT_levm1 / 2), ErrorMonitor))
#if (ABEtype != 1) invalidate_transfer_caches();
for (int il = 0; il < GH->levels; il++) { sync_cache_pre[il].invalidate(); sync_cache_cor[il].invalidate(); sync_cache_rp_coarse[il].invalidate(); sync_cache_rp_fine[il].invalidate(); sync_cache_restrict[il].invalidate(); sync_cache_outbd[il].invalidate(); }
#endif
// a_stream.clear(); // a_stream.clear();
// a_stream.str(""); // a_stream.str("");
@@ -3783,11 +3652,7 @@ void bssn_class::Step(int lev, int YN)
STEP_TIMER_DECL(timer_predictor_sync); STEP_TIMER_DECL(timer_predictor_sync);
Parallel::AsyncSyncState async_pre; Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1) sync_predictor_start(lev, SynchList_pre, async_pre);
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -3806,9 +3671,7 @@ void bssn_class::Step(int lev, int YN)
} }
} }
#endif #endif
#if (ABEtype != 1) sync_predictor_finish(lev, async_pre, SynchList_pre);
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
@@ -4154,11 +4017,7 @@ void bssn_class::Step(int lev, int YN)
STEP_TIMER_DECL(timer_corrector_sync); STEP_TIMER_DECL(timer_corrector_sync);
Parallel::AsyncSyncState async_cor; Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1) sync_corrector_start(lev, SynchList_cor, async_cor);
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -4177,9 +4036,7 @@ void bssn_class::Step(int lev, int YN)
} }
} }
#endif #endif
#if (ABEtype != 1) sync_corrector_finish(lev, async_cor, SynchList_cor);
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
@@ -4664,15 +4521,11 @@ void bssn_class::Step(int lev, int YN)
{ {
int erh = ERROR; int erh = ERROR;
MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req); MPI_Iallreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD, &err_req);
} }
#endif
Parallel::AsyncSyncState async_pre;
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev], async_pre);
#endif #endif
Parallel::AsyncSyncState async_pre;
sync_predictor_start(lev, SynchList_pre, async_pre);
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -4691,9 +4544,7 @@ void bssn_class::Step(int lev, int YN)
} }
} }
#endif #endif
#if (ABEtype != 1) sync_predictor_finish(lev, async_pre, SynchList_pre);
Parallel::Sync_finish(sync_cache_pre[lev], async_pre, SynchList_pre, Symmetry);
#endif
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
@@ -5021,12 +4872,8 @@ void bssn_class::Step(int lev, int YN)
} }
#endif #endif
Parallel::AsyncSyncState async_cor; Parallel::AsyncSyncState async_cor;
#if (ABEtype == 1) sync_corrector_start(lev, SynchList_cor, async_cor);
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_start(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev], async_cor);
#endif
#ifdef WithShell #ifdef WithShell
if (lev == 0) if (lev == 0)
@@ -5045,9 +4892,7 @@ void bssn_class::Step(int lev, int YN)
} }
} }
#endif #endif
#if (ABEtype != 1) sync_corrector_finish(lev, async_cor, SynchList_cor);
Parallel::Sync_finish(sync_cache_cor[lev], async_cor, SynchList_cor, Symmetry);
#endif
#ifdef WithShell #ifdef WithShell
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
@@ -5439,11 +5284,7 @@ void bssn_class::Step(int lev, int YN)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync"); // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Predictor sync");
#if (ABEtype == 1) sync_evolution(lev, SynchList_pre, sync_cache_pre);
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_pre, Symmetry, sync_cache_pre[lev]);
#endif
// Complete non-blocking error reduction and check // Complete non-blocking error reduction and check
MPI_Wait(&err_req, MPI_STATUS_IGNORE); MPI_Wait(&err_req, MPI_STATUS_IGNORE);
@@ -5644,11 +5485,7 @@ void bssn_class::Step(int lev, int YN)
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync"); // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"before Corrector sync");
#if (ABEtype == 1) sync_evolution(lev, SynchList_cor, sync_cache_cor);
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_cor[lev]);
#endif
// misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync"); // misc::tillherecheck(GH->Commlev[lev],GH->start_rank[lev],"after Corrector sync");
@@ -6365,15 +6202,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
#if (ABEtype == 1) restrict_evolution(lev, SL, SynchList_pre);
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SynchList_pre, GH->rsul[lev], Symmetry);
#endif #endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -6382,11 +6215,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif #endif
#if (ABEtype == 1) sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -6397,21 +6226,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
#if (ABEtype == 1) outbdlow2hi_evolution(lev, SynchList_pre, SL);
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SL, Symmetry);
#endif #endif
@@ -6438,15 +6253,11 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#endif #endif
#if (RPB == 0) #if (RPB == 0)
#if (ABEtype == 1) restrict_evolution(lev, SL, SL);
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SL,SL,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, GH->rsul[lev], Symmetry);
#endif #endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -6455,11 +6266,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
// misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str()); // misc::tillherecheck(GH->Commlev[GH->mylev],GH->start_rank[GH->mylev],a_stream.str());
#endif #endif
#if (ABEtype == 1) sync_evolution(lev - 1, SL, sync_cache_rp_coarse);
Parallel::Sync(GH->PatL[lev - 1], SL, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SL, Symmetry, sync_cache_rp_coarse[lev]);
#endif
#if (PSTR == 1 || PSTR == 2) #if (PSTR == 1 || PSTR == 2)
// a_stream.clear(); // a_stream.clear();
@@ -6470,21 +6277,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB,
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
#if (ABEtype == 1) outbdlow2hi_evolution(lev, SL, SL);
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SL, SL, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SL, SL, Symmetry);
#endif #endif
@@ -6685,7 +6478,7 @@ void bssn_class::RestrictProlong_aux(int lev, int YN, bool BB,
#if (ABEtype == 1) #if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev], SL, Symmetry); Parallel::Sync(GH->PatL[lev], SL, Symmetry);
#else #else
Parallel::Sync_cached(GH->PatL[lev], SL, Symmetry, sync_cache_rp_fine[lev]); sync_evolution(lev, SL, sync_cache_rp_fine);
#endif #endif
} }
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong); STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
@@ -6818,39 +6611,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
} }
#if (RPB == 0) #if (RPB == 0)
#if (ABEtype == 1) restrict_evolution(lev, SynchList_cor, SynchList_pre);
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,SynchList_pre,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, SynchList_pre, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev - 1], SynchList_pre, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], SynchList_pre, Symmetry, sync_cache_rp_coarse[lev]);
#endif #endif
sync_evolution(lev - 1, SynchList_pre, sync_cache_rp_coarse);
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
#if (ABEtype == 1) outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, SynchList_pre, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
@@ -6864,39 +6635,17 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
if (myrank == 0) if (myrank == 0)
cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl; cout << "===: " << GH->Lt[lev - 1] << "," << GH->Lt[lev] + dT_lev << endl;
#if (RPB == 0) #if (RPB == 0)
#if (ABEtype == 1) restrict_evolution(lev, SynchList_cor, StateList);
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#else
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry, sync_cache_restrict[lev]);
#endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry); // Parallel::Restrict_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_cor,StateList,Symmetry);
Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry); Parallel::Restrict_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, GH->rsul[lev], Symmetry);
#endif
#if (ABEtype == 1)
Parallel::Sync(GH->PatL[lev - 1], StateList, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]);
#endif #endif
sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
#if (ABEtype == 1) outbdlow2hi_evolution(lev, StateList, SynchList_cor);
Ppc = GH->PatL[lev - 1];
while (Ppc)
{
Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, StateList, SynchList_cor, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
#else
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]);
#endif
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif
@@ -6906,11 +6655,7 @@ void bssn_class::RestrictProlong(int lev, int YN, bool BB)
#endif #endif
} }
#if (ABEtype == 1) sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
#else
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]);
#endif
} }
STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong); STEP_TIMER_ADD(TB_RESTRICT_PROLONG, timer_restrict_prolong);
} }
@@ -6940,12 +6685,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
Pp = Pp->next; Pp = Pp->next;
} }
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry, sync_cache_outbd[lev]); outbdlow2hi_evolution(lev, SynchList_pre, SynchList_cor);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, Symmetry);
#endif #endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],SynchList_pre,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], SynchList_pre, SynchList_cor, GH->bdsul[lev], Symmetry);
@@ -6953,12 +6698,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
} }
else // no time refinement levels and for all same time levels else // no time refinement levels and for all same time levels
{ {
#if (RPB == 0) #if (RPB == 0)
#if (MIXOUTB == 0) #if (MIXOUTB == 0)
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry, sync_cache_outbd[lev]); outbdlow2hi_evolution(lev, StateList, SynchList_cor);
#elif (MIXOUTB == 1) #elif (MIXOUTB == 1)
Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry); Parallel::OutBdLow2Himix(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, Symmetry);
#endif #endif
#elif (RPB == 1) #elif (RPB == 1)
// Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry); // Parallel::OutBdLow2Hi_bam(GH->PatL[lev-1],GH->PatL[lev],StateList,SynchList_cor,Symmetry);
Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry); Parallel::OutBdLow2Hi_bam(GH->PatL[lev - 1], GH->PatL[lev], StateList, SynchList_cor, GH->bdsul[lev], Symmetry);
@@ -6974,12 +6719,12 @@ void bssn_class::ProlongRestrict(int lev, int YN, bool BB)
#else #else
Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry); Parallel::Restrict_after(GH->PatL[lev - 1], GH->PatL[lev], SynchList_cor, StateList, Symmetry);
#endif #endif
Parallel::Sync_cached(GH->PatL[lev - 1], StateList, Symmetry, sync_cache_rp_coarse[lev]); sync_evolution(lev - 1, StateList, sync_cache_rp_coarse);
} }
Parallel::Sync_cached(GH->PatL[lev], SynchList_cor, Symmetry, sync_cache_rp_fine[lev]); sync_evolution(lev, SynchList_cor, sync_cache_rp_fine);
} }
} }
#undef MIXOUTB #undef MIXOUTB
//================================================================================================ //================================================================================================
@@ -7707,6 +7452,169 @@ void bssn_class::compute_Porg_rhs(double **BH_PS, double **BH_RHS, var *forx, va
} }
} }
} }
bool bssn_class::use_transfer_cache() const
{
#if BSSN_USE_TRANSFER_CACHE
return true;
#else
return false;
#endif
}
void bssn_class::setup_transfer_caches()
{
sync_cache_pre = 0;
sync_cache_cor = 0;
sync_cache_rp_coarse = 0;
sync_cache_rp_fine = 0;
sync_cache_restrict = 0;
sync_cache_outbd = 0;
if (!use_transfer_cache() || !GH)
return;
sync_cache_pre = new Parallel::SyncCache[GH->levels];
sync_cache_cor = new Parallel::SyncCache[GH->levels];
sync_cache_rp_coarse = new Parallel::SyncCache[GH->levels];
sync_cache_rp_fine = new Parallel::SyncCache[GH->levels];
sync_cache_restrict = new Parallel::SyncCache[GH->levels];
sync_cache_outbd = new Parallel::SyncCache[GH->levels];
}
void bssn_class::invalidate_transfer_caches()
{
if (!use_transfer_cache() || !GH || !sync_cache_pre || !sync_cache_cor ||
!sync_cache_rp_coarse || !sync_cache_rp_fine || !sync_cache_restrict || !sync_cache_outbd)
return;
for (int il = 0; il < GH->levels; il++)
{
sync_cache_pre[il].invalidate();
sync_cache_cor[il].invalidate();
sync_cache_rp_coarse[il].invalidate();
sync_cache_rp_fine[il].invalidate();
sync_cache_restrict[il].invalidate();
sync_cache_outbd[il].invalidate();
}
}
void bssn_class::destroy_transfer_caches()
{
if (sync_cache_pre)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_pre[i].destroy();
delete[] sync_cache_pre;
sync_cache_pre = 0;
}
if (sync_cache_cor)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_cor[i].destroy();
delete[] sync_cache_cor;
sync_cache_cor = 0;
}
if (sync_cache_rp_coarse)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_coarse[i].destroy();
delete[] sync_cache_rp_coarse;
sync_cache_rp_coarse = 0;
}
if (sync_cache_rp_fine)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_rp_fine[i].destroy();
delete[] sync_cache_rp_fine;
sync_cache_rp_fine = 0;
}
if (sync_cache_restrict)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_restrict[i].destroy();
delete[] sync_cache_restrict;
sync_cache_restrict = 0;
}
if (sync_cache_outbd)
{
if (use_transfer_cache() && GH)
for (int i = 0; i < GH->levels; i++)
sync_cache_outbd[i].destroy();
delete[] sync_cache_outbd;
sync_cache_outbd = 0;
}
}
void bssn_class::sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
{
if (use_transfer_cache())
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_pre[lev], async_state);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
{
if (use_transfer_cache())
Parallel::Sync_finish(sync_cache_pre[lev], async_state, VarList, Symmetry);
}
void bssn_class::sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state)
{
if (use_transfer_cache())
Parallel::Sync_start(GH->PatL[lev], VarList, Symmetry, sync_cache_cor[lev], async_state);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList)
{
if (use_transfer_cache())
Parallel::Sync_finish(sync_cache_cor[lev], async_state, VarList, Symmetry);
}
void bssn_class::sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array)
{
if (use_transfer_cache() && cache_array)
Parallel::Sync_cached(GH->PatL[lev], VarList, Symmetry, cache_array[lev]);
else
Parallel::Sync(GH->PatL[lev], VarList, Symmetry);
}
void bssn_class::restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
{
if (use_transfer_cache())
Parallel::Restrict_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_restrict[lev]);
else
Parallel::Restrict(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry);
}
void bssn_class::outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list)
{
if (use_transfer_cache())
{
Parallel::OutBdLow2Hi_cached(GH->PatL[lev - 1], GH->PatL[lev], src_var_list, dst_var_list, Symmetry, sync_cache_outbd[lev]);
return;
}
MyList<Patch> *Ppc = GH->PatL[lev - 1];
while (Ppc)
{
MyList<Patch> *Pp = GH->PatL[lev];
while (Pp)
{
Parallel::OutBdLow2Hi(Ppc->data, Pp->data, src_var_list, dst_var_list, Symmetry);
Pp = Pp->next;
}
Ppc = Ppc->next;
}
}
#endif #endif
//================================================================================================ //================================================================================================

View File

@@ -31,11 +31,19 @@ using namespace std;
#include "surface_integral.h" #include "surface_integral.h"
#include "checkpoint.h" #include "checkpoint.h"
extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN); extern void setpbh(int iBHN, double **iPBH, double *iMass, int rBHN);
class bssn_class #ifndef BSSN_USE_TRANSFER_CACHE
{ #define BSSN_USE_TRANSFER_CACHE 1
public: #endif
#ifndef BSSN_USE_ESCALAR_C_KERNEL
#define BSSN_USE_ESCALAR_C_KERNEL 1
#endif
class bssn_class
{
public:
int ngfs; int ngfs;
int nprocs, myrank; int nprocs, myrank;
cgh *GH; cgh *GH;
@@ -167,14 +175,25 @@ public:
void Setup_KerrSchild(); void Setup_KerrSchild();
void Enforce_algcon(int lev, int fg); void Enforce_algcon(int lev, int fg);
void testRestrict(); void testRestrict();
void testOutBd(); void testOutBd();
bool check_Stdin_Abort(); bool check_Stdin_Abort();
bool use_transfer_cache() const;
virtual void Setup_Initial_Data_Cao(); void setup_transfer_caches();
virtual void Setup_Initial_Data_Lousto(); void invalidate_transfer_caches();
virtual void Initialize(); void destroy_transfer_caches();
void sync_predictor_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
void sync_predictor_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
void sync_corrector_start(int lev, MyList<var> *VarList, Parallel::AsyncSyncState &async_state);
void sync_corrector_finish(int lev, Parallel::AsyncSyncState &async_state, MyList<var> *VarList);
void sync_evolution(int lev, MyList<var> *VarList, Parallel::SyncCache *cache_array = 0);
void restrict_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
void outbdlow2hi_evolution(int lev, MyList<var> *src_var_list, MyList<var> *dst_var_list);
virtual void Setup_Initial_Data_Cao();
virtual void Setup_Initial_Data_Lousto();
virtual void Initialize();
virtual void Read_Ansorg(); virtual void Read_Ansorg();
virtual void Read_Pablo() {}; virtual void Read_Pablo() {};
virtual void Compute_Psi4(int lev); virtual void Compute_Psi4(int lev);

View File

@@ -2,11 +2,19 @@
include makefile.inc include makefile.inc
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
ifeq ($(USE_CXX_KERNELS),0)
$(error USE_CXX_ESCALAR_KERNEL=1 requires USE_CXX_KERNELS=1 because bssn_escalar_rhs_c.C reuses the C BSSN kernel)
endif
endif
## polint(ordn=6) kernel selector: ## polint(ordn=6) kernel selector:
## 1 (default): barycentric fast path ## 1 (default): barycentric fast path
## 0 : fallback to Neville path ## 0 : fallback to Neville path
POLINT6_USE_BARY ?= 1 POLINT6_USE_BARY ?= 1
POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY) POLINT6_FLAG = -DPOLINT6_USE_BARYCENTRIC=$(POLINT6_USE_BARY)
TRANSFER_CACHE_FLAG = -DBSSN_USE_TRANSFER_CACHE=$(USE_TRANSFER_CACHE)
ESCALAR_KERNEL_FLAG = -DBSSN_USE_ESCALAR_C_KERNEL=$(USE_CXX_ESCALAR_KERNEL)
## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt) ## ABE build flags selected by PGO_MODE (set in makefile.inc, default: opt)
## make -> opt (PGO-guided, maximum performance) ## make -> opt (PGO-guided, maximum performance)
@@ -16,7 +24,8 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
ifeq ($(PGO_MODE),instrument) ifeq ($(PGO_MODE),instrument)
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability ## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \ CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
else else
@@ -26,7 +35,8 @@ else
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) \
$(TRANSFER_CACHE_FLAG) $(ESCALAR_KERNEL_FLAG)
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
-align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG) -align array64byte -fpp -I${MKLROOT}/include $(POLINT6_FLAG)
endif endif
@@ -87,7 +97,10 @@ ifeq ($(USE_CXX_KERNELS),0)
CFILES = CFILES =
else else
# C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels # C++ mode (default): C rewrite of bssn/bssn-escalar rhs and helper kernels
CFILES = bssn_rhs_c.o bssn_escalar_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o
ifeq ($(USE_CXX_ESCALAR_KERNEL),1)
CFILES += bssn_escalar_rhs_c.o
endif
endif endif
## RK4 kernel switch (independent from USE_CXX_KERNELS) ## RK4 kernel switch (independent from USE_CXX_KERNELS)

View File

@@ -48,6 +48,17 @@ endif
## 0 : fall back to original Fortran kernels ## 0 : fall back to original Fortran kernels
USE_CXX_KERNELS ?= 1 USE_CXX_KERNELS ?= 1
## BSSN-EScalar RHS switch
## 1 : use BSSN-EScalar C wrapper on the normal patch path
## 0 (default) : keep the original Fortran BSSN-EScalar RHS for precision-safe runs
## Note: this requires USE_CXX_KERNELS=1 because the wrapper reuses the C BSSN kernel.
USE_CXX_ESCALAR_KERNEL ?= 0
## Cached transfer switch
## 1 : enable cached Sync/Restrict/OutBd transfer on evolution hot paths
## 0 (default) : keep the original uncached transfer path for precision-safe runs
USE_TRANSFER_CACHE ?= 0
## RK4 kernel implementation switch ## RK4 kernel implementation switch
## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments) ## 1 (default) : use C/C++ rewrite of rungekutta4_rout (for optimization experiments)
## 0 : use original Fortran rungekutta4_rout.o ## 0 : use original Fortran rungekutta4_rout.o