Fix BSSN-EM runtime crash

This commit is contained in:
2026-05-07 16:47:55 +08:00
parent 5525465cad
commit fea2dcc0d5
7 changed files with 103 additions and 98 deletions

View File

@@ -198,16 +198,16 @@ int main(int argc, char *argv[])
if (myrank == 0) if (myrank == 0)
{ {
string out_dir; string out_dir;
char filename[50]; string filename;
map<string, string>::iterator iter; map<string, string>::iterator iter;
iter = parameters::str_par.find("output dir"); iter = parameters::str_par.find("output dir");
if (iter != parameters::str_par.end()) if (iter != parameters::str_par.end())
{ {
out_dir = iter->second; out_dir = iter->second;
} }
sprintf(filename, "%s/setting.par", out_dir.c_str()); filename = out_dir + "/setting.par";
ofstream setfile; ofstream setfile;
setfile.open(filename, ios::trunc); setfile.open(filename.c_str(), ios::trunc);
if (!setfile.good()) if (!setfile.good())
{ {

View File

@@ -3,6 +3,7 @@
#include <sstream> #include <sstream>
#include <cstdio> #include <cstdio>
#include <map> #include <map>
#include <string>
using namespace std; using namespace std;
#else #else
#include <stdio.h> #include <stdio.h>
@@ -3258,11 +3259,12 @@ void Z4c_class::Interp_Constraint()
} }
ofstream outfile; ofstream outfile;
char filename[50]; char suffix[64];
sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off // 0.5 for round off
outfile.open(filename); outfile.open(filename.c_str());
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl;
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {

View File

@@ -4,6 +4,7 @@
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <map> #include <map>
#include <string>
using namespace std; using namespace std;
#else #else
#include <stdio.h> #include <stdio.h>
@@ -330,13 +331,15 @@ bool bssn_em_cuda_keep_resident_after_step(int lev, int trfls_in, int analysis_l
static int keep_all_levels = -1; static int keep_all_levels = -1;
if (keep_all_levels < 0) if (keep_all_levels < 0)
{ {
const char *env = getenv("AMSS_CUDA_KEEP_ALL_LEVELS"); const char *env = getenv("AMSS_CUDA_EM_KEEP_ALL_LEVELS");
keep_all_levels = (env && atoi(env) != 0) ? 1 : 0; keep_all_levels = (env && atoi(env) != 0) ? 1 : 0;
} }
static int enabled = -1; static int enabled = -1;
if (enabled < 0) if (enabled < 0)
{ {
const char *env = getenv("AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP"); const char *env = getenv("AMSS_CUDA_EM_KEEP_RESIDENT_AFTER_STEP");
if (!env)
env = getenv("AMSS_CUDA_KEEP_RESIDENT_AFTER_STEP");
enabled = (env && atoi(env) != 0) ? 1 : 0; enabled = (env && atoi(env) != 0) ? 1 : 0;
} }
if (!enabled) if (!enabled)
@@ -2334,17 +2337,7 @@ void bssnEM_class::Step(int lev, int YN)
em_t0 = MPI_Wtime(); em_t0 = MPI_Wtime();
const bool needs_resident_download = const bool needs_resident_download =
!bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev); !bssn_em_cuda_keep_resident_after_step(lev, trfls, a_lev);
const bool skip_zero_resident_download = if (needs_resident_download)
needs_resident_download &&
bssn_em_zero_resident_download_fastpath_enabled() &&
bssn_em_resident_zero_fastpath_ready(GH->PatL[lev],
#ifdef WithShell
0,
#else
0,
#endif
myrank);
if (needs_resident_download && !skip_zero_resident_download)
bssn_em_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true); bssn_em_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
if (em_step_timing) if (em_step_timing)
em_t_resident += MPI_Wtime() - em_t0; em_t_resident += MPI_Wtime() - em_t0;
@@ -2952,11 +2945,12 @@ void bssnEM_class::Interp_Constraint()
} }
ofstream outfile; ofstream outfile;
char filename[50]; char suffix[64];
sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off // 0.5 for round off
outfile.open(filename); outfile.open(filename.c_str());
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl;
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {

View File

@@ -3,6 +3,7 @@
#include <sstream> #include <sstream>
#include <cstdio> #include <cstdio>
#include <map> #include <map>
#include <string>
using namespace std; using namespace std;
#else #else
#include <stdio.h> #include <stdio.h>
@@ -2523,11 +2524,12 @@ void bssnEScalar_class::Interp_Constraint()
} }
ofstream outfile; ofstream outfile;
char filename[50]; char suffix[64];
sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off // 0.5 for round off
outfile.open(filename); outfile.open(filename.c_str());
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, fR_Res, ...." << endl; outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, fR_Res, ...." << endl;
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {

View File

@@ -9665,11 +9665,12 @@ void bssn_class::Interp_Constraint(bool infg)
if (myrank == 0) if (myrank == 0)
{ {
ofstream outfile; ofstream outfile;
char filename[50]; char suffix[64];
sprintf(filename, "%s/interp_constraint_%05d.dat", ErrorMonitor->out_dir.c_str(), int(PhysTime / dT + 0.5)); sprintf(suffix, "/interp_constraint_%05d.dat", int(PhysTime / dT + 0.5));
string filename = ErrorMonitor->out_dir + suffix;
// 0.5 for round off // 0.5 for round off
outfile.open(filename); outfile.open(filename.c_str());
outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl; outfile << "# corrdinate, H_Res, Px_Res, Py_Res, Pz_Res, Gx_Res, Gy_Res, Gz_Res, ...." << endl;
for (int i = 0; i < n; i++) for (int i = 0; i < n; i++)
{ {

View File

@@ -76,8 +76,11 @@ checkpoint::checkpoint(bool checked, const char fname[], int myrank) : filename(
I_Print = (myrank == 0); I_Print = (myrank == 0);
int i = strlen(fname); size_t filename_len = out_dir.size() + strlen(fname) + 32;
filename = new char[i+30]; #ifdef CHECKDETAIL
filename_len += 32;
#endif
filename = new char[filename_len];
// cout << filename << endl; // cout << filename << endl;
// cout << i << endl; // cout << i << endl;
@@ -103,7 +106,7 @@ checkpoint::checkpoint(bool checked, const char fname[], int myrank) : filename(
checkpoint::~checkpoint() checkpoint::~checkpoint()
{ {
CheckList->clearList(); CheckList->clearList();
if (I_Print) if (filename)
delete[] filename; delete[] filename;
} }
@@ -136,7 +139,7 @@ void checkpoint::writecheck_cgh(double time, cgh *GH)
if (I_Print) if (I_Print)
{ {
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_cgh.CHK", filename); sprintf(fname, "%s_cgh.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -195,7 +198,7 @@ void checkpoint::readcheck_cgh(double &time, cgh *GH, int myrank, int nprocs, in
int DIM = dim; int DIM = dim;
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_cgh.CHK", filename); sprintf(fname, "%s_cgh.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -297,7 +300,7 @@ void checkpoint::writecheck_sh(double time, ShellPatch *SH)
if (I_Print) if (I_Print)
{ {
char fname[50]; char fname[4096];
sprintf(fname, "%s_sh.CHK", filename); sprintf(fname, "%s_sh.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -335,7 +338,7 @@ void checkpoint::readcheck_sh(ShellPatch *SH, int myrank)
int DIM = dim; int DIM = dim;
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_sh.CHK", filename); sprintf(fname, "%s_sh.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -390,7 +393,7 @@ void checkpoint::write_Black_Hole_position(int BH_num_input, int BH_num, double
if (I_Print) if (I_Print)
{ {
char fname[50]; char fname[4096];
sprintf(fname, "%s_BHp.CHK", filename); sprintf(fname, "%s_BHp.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -417,7 +420,7 @@ void checkpoint::read_Black_Hole_position(int &BH_num_input, int &BH_num, double
{ {
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_BHp.CHK", filename); sprintf(fname, "%s_BHp.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -461,7 +464,7 @@ void checkpoint::write_bssn(double LastDump, double Last2dDump, double LastAnas)
if (I_Print) if (I_Print)
{ {
char fname[50]; char fname[4096];
sprintf(fname, "%s_bssn.CHK", filename); sprintf(fname, "%s_bssn.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -481,7 +484,7 @@ void checkpoint::read_bssn(double &LastDump, double &Last2dDump, double &LastAna
{ {
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_bssn.CHK", filename); sprintf(fname, "%s_bssn.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -506,7 +509,7 @@ void checkpoint::write_bssn(double LastDump, double Last2dDump, double LastAnas)
ofstream outfile; ofstream outfile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_bssn.CHK", filename); sprintf(fname, "%s_bssn.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -527,7 +530,7 @@ void checkpoint::read_bssn(double &LastDump, double &Last2dDump, double &LastAna
{ {
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_bssn.CHK", filename); sprintf(fname, "%s_bssn.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -551,7 +554,7 @@ void checkpoint::write_Black_Hole_position(int BH_num_input, int BH_num, double
ofstream outfile; ofstream outfile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_BHp.CHK", filename); sprintf(fname, "%s_BHp.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -581,7 +584,7 @@ void checkpoint::read_Black_Hole_position(int &BH_num_input, int &BH_num, double
{ {
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_BHp.CHK", filename); sprintf(fname, "%s_BHp.CHK", filename);
infile.open(fname); infile.open(fname);
@@ -628,7 +631,7 @@ void checkpoint::writecheck_cgh(double time, cgh *GH)
ofstream outfile; ofstream outfile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_cgh.CHK", filename); sprintf(fname, "%s_cgh.CHK", filename);
outfile.open(fname, ios::out | ios::trunc); outfile.open(fname, ios::out | ios::trunc);
@@ -738,7 +741,7 @@ void checkpoint::readcheck_cgh(double &time, cgh *GH, int myrank, int nprocs, in
int DIM = dim; int DIM = dim;
ifstream infile; ifstream infile;
// char fname[50]; // char fname[50];
char fname[50+50]; char fname[4096];
sprintf(fname, "%s_cgh.CHK", filename); sprintf(fname, "%s_cgh.CHK", filename);
infile.open(fname); infile.open(fname);

View File

@@ -1,6 +1,7 @@
#ifdef newc #ifdef newc
#include <cstdio> #include <cstdio>
#include <sstream>
using namespace std; using namespace std;
#else #else
#include <stdio.h> #include <stdio.h>
@@ -77,16 +78,17 @@ monitor::monitor(const char fname[], int myrank, string head)
parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir)); parameters::str_par.insert(map<string, string>::value_type("output dir", out_dir));
} }
// considering checkpoint run // considering checkpoint run
char filename[50]; string filename = out_dir + "/" + fname;
sprintf(filename, "%s/%s", out_dir.c_str(), fname);
int i = 1; int i = 1;
while ((access(filename, F_OK)) != -1) while ((access(filename.c_str(), F_OK)) != -1)
{ {
sprintf(filename, "%s/%d_%s", out_dir.c_str(), i, fname); stringstream ss;
ss << out_dir << "/" << i << "_" << fname;
filename = ss.str();
i++; i++;
} }
outfile.open(filename, ios::trunc); outfile.open(filename.c_str(), ios::trunc);
time_t tnow; time_t tnow;
time(&tnow); time(&tnow);
@@ -107,16 +109,17 @@ monitor::monitor(const char fname[], int myrank, const int out_rank, string head
if (I_Print) if (I_Print)
{ {
// considering checkpoint run // considering checkpoint run
char filename[50]; string filename = out_dir + "/" + fname;
sprintf(filename, "%s/%s", out_dir.c_str(), fname);
int i = 1; int i = 1;
while ((access(filename, F_OK)) != -1) while ((access(filename.c_str(), F_OK)) != -1)
{ {
sprintf(filename, "%s/%d_%s", out_dir.c_str(), i, fname); stringstream ss;
ss << out_dir << "/" << i << "_" << fname;
filename = ss.str();
i++; i++;
} }
outfile.open(filename, ios::trunc); outfile.open(filename.c_str(), ios::trunc);
time_t tnow; time_t tnow;
time(&tnow); time(&tnow);