Compare commits
7 Commits
chb-replac
...
cjy-oneapi
| Author | SHA1 | Date | |
|---|---|---|---|
| e0b5e012df | |||
|
|
6b2464b80c | ||
| 9c33e16571 | |||
| 45b7a43576 | |||
| dfb79e3e11 | |||
| d2c2214fa1 | |||
| e157ea3a23 |
@@ -270,6 +270,12 @@ if not os.path.exists( ABE_file ):
|
|||||||
## Copy the executable ABE (or ABEGPU) into the run directory
|
## Copy the executable ABE (or ABEGPU) into the run directory
|
||||||
shutil.copy2(ABE_file, output_directory)
|
shutil.copy2(ABE_file, output_directory)
|
||||||
|
|
||||||
|
## Copy interp load balance profile if present (for optimize pass)
|
||||||
|
interp_lb_profile = os.path.join(AMSS_NCKU_source_copy, "interp_lb_profile.bin")
|
||||||
|
if os.path.exists(interp_lb_profile):
|
||||||
|
shutil.copy2(interp_lb_profile, output_directory)
|
||||||
|
print( " Copied interp_lb_profile.bin to run directory " )
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
|
|
||||||
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
|
## If the initial-data method is TwoPuncture, copy the TwoPunctureABE executable to the run directory
|
||||||
|
|||||||
@@ -13,6 +13,9 @@ using namespace std;
|
|||||||
#include "MPatch.h"
|
#include "MPatch.h"
|
||||||
#include "Parallel.h"
|
#include "Parallel.h"
|
||||||
#include "fmisc.h"
|
#include "fmisc.h"
|
||||||
|
#ifdef INTERP_LB_PROFILE
|
||||||
|
#include "interp_lb_profile.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
Patch::Patch(int DIM, int *shapei, double *bboxi, int levi, bool buflog, int Symmetry) : lev(levi)
|
||||||
{
|
{
|
||||||
@@ -507,6 +510,9 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
// Targeted point-to-point overload: each owner sends each point only to
|
// Targeted point-to-point overload: each owner sends each point only to
|
||||||
// the one rank that needs it for integration (consumer), reducing
|
// the one rank that needs it for integration (consumer), reducing
|
||||||
// communication volume by ~nprocs times compared to the Bcast version.
|
// communication volume by ~nprocs times compared to the Bcast version.
|
||||||
|
#ifdef INTERP_LB_PROFILE
|
||||||
|
double t_interp_start = MPI_Wtime();
|
||||||
|
#endif
|
||||||
int myrank, nprocs;
|
int myrank, nprocs;
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||||
@@ -608,6 +614,11 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef INTERP_LB_PROFILE
|
||||||
|
double t_interp_end = MPI_Wtime();
|
||||||
|
double t_interp_local = t_interp_end - t_interp_start;
|
||||||
|
#endif
|
||||||
|
|
||||||
// --- Error check for unfound points ---
|
// --- Error check for unfound points ---
|
||||||
for (int j = 0; j < NN; j++)
|
for (int j = 0; j < NN; j++)
|
||||||
{
|
{
|
||||||
@@ -764,6 +775,31 @@ void Patch::Interp_Points(MyList<var> *VarList,
|
|||||||
delete[] recv_count;
|
delete[] recv_count;
|
||||||
delete[] consumer_rank;
|
delete[] consumer_rank;
|
||||||
delete[] owner_rank;
|
delete[] owner_rank;
|
||||||
|
|
||||||
|
#ifdef INTERP_LB_PROFILE
|
||||||
|
{
|
||||||
|
static bool profile_written = false;
|
||||||
|
if (!profile_written) {
|
||||||
|
double *all_times = nullptr;
|
||||||
|
if (myrank == 0) all_times = new double[nprocs];
|
||||||
|
MPI_Gather(&t_interp_local, 1, MPI_DOUBLE,
|
||||||
|
all_times, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||||
|
if (myrank == 0) {
|
||||||
|
int heavy[64];
|
||||||
|
int nh = InterpLBProfile::identify_heavy_ranks(
|
||||||
|
all_times, nprocs, 2.5, heavy, 64);
|
||||||
|
InterpLBProfile::write_profile(
|
||||||
|
"interp_lb_profile.bin", nprocs,
|
||||||
|
all_times, heavy, nh, 2.5);
|
||||||
|
printf("[InterpLB] Profile written: %d heavy ranks\n", nh);
|
||||||
|
for (int i = 0; i < nh; i++)
|
||||||
|
printf(" Heavy rank %d: %.6f s\n", heavy[i], all_times[heavy[i]]);
|
||||||
|
delete[] all_times;
|
||||||
|
}
|
||||||
|
profile_written = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
void Patch::Interp_Points(MyList<var> *VarList,
|
void Patch::Interp_Points(MyList<var> *VarList,
|
||||||
int NN, double **XX,
|
int NN, double **XX,
|
||||||
|
|||||||
@@ -462,7 +462,7 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev); // delete through KillBlocks
|
ng = ng0 = new Block(dim, shape_here, bbox_here, n_rank++, ingfsi, fngfsi, PP->lev);
|
||||||
// ng->checkBlock();
|
// ng->checkBlock();
|
||||||
if (BlL)
|
if (BlL)
|
||||||
BlL->insert(ng);
|
BlL->insert(ng);
|
||||||
@@ -500,6 +500,384 @@ MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int i
|
|||||||
|
|
||||||
return BlL;
|
return BlL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef INTERP_LB_OPTIMIZE
|
||||||
|
#include "interp_lb_profile_data.h"
|
||||||
|
|
||||||
|
MyList<Block> *Parallel::distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||||
|
bool periodic, int nodes)
|
||||||
|
{
|
||||||
|
#ifdef USE_GPU_DIVIDE
|
||||||
|
double cpu_part, gpu_part;
|
||||||
|
map<string, double>::iterator iter;
|
||||||
|
iter = parameters::dou_par.find("cpu part");
|
||||||
|
if (iter != parameters::dou_par.end())
|
||||||
|
{
|
||||||
|
cpu_part = iter->second;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
const int LEN = 256;
|
||||||
|
char pline[LEN];
|
||||||
|
string str, sgrp, skey, sval;
|
||||||
|
int sind;
|
||||||
|
char pname[50];
|
||||||
|
{
|
||||||
|
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||||||
|
if (iter != parameters::str_par.end())
|
||||||
|
strcpy(pname, (iter->second).c_str());
|
||||||
|
else { cout << "Error inputpar" << endl; exit(0); }
|
||||||
|
}
|
||||||
|
ifstream inf(pname, ifstream::in);
|
||||||
|
if (!inf.good() && myrank == 0)
|
||||||
|
{ cout << "Can not open parameter file " << pname << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
||||||
|
for (int i = 1; inf.good(); i++)
|
||||||
|
{
|
||||||
|
inf.getline(pline, LEN); str = pline;
|
||||||
|
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||||||
|
if (status == -1) { cout << "error reading parameter file " << pname << " in line " << i << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
||||||
|
else if (status == 0) continue;
|
||||||
|
if (sgrp == "ABE") { if (skey == "cpu part") cpu_part = atof(sval.c_str()); }
|
||||||
|
}
|
||||||
|
inf.close();
|
||||||
|
parameters::dou_par.insert(map<string, double>::value_type("cpu part", cpu_part));
|
||||||
|
}
|
||||||
|
iter = parameters::dou_par.find("gpu part");
|
||||||
|
if (iter != parameters::dou_par.end())
|
||||||
|
{
|
||||||
|
gpu_part = iter->second;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
const int LEN = 256;
|
||||||
|
char pline[LEN];
|
||||||
|
string str, sgrp, skey, sval;
|
||||||
|
int sind;
|
||||||
|
char pname[50];
|
||||||
|
{
|
||||||
|
map<string, string>::iterator iter = parameters::str_par.find("inputpar");
|
||||||
|
if (iter != parameters::str_par.end())
|
||||||
|
strcpy(pname, (iter->second).c_str());
|
||||||
|
else { cout << "Error inputpar" << endl; exit(0); }
|
||||||
|
}
|
||||||
|
ifstream inf(pname, ifstream::in);
|
||||||
|
if (!inf.good() && myrank == 0)
|
||||||
|
{ cout << "Can not open parameter file " << pname << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
||||||
|
for (int i = 1; inf.good(); i++)
|
||||||
|
{
|
||||||
|
inf.getline(pline, LEN); str = pline;
|
||||||
|
int status = misc::parse_parts(str, sgrp, skey, sval, sind);
|
||||||
|
if (status == -1) { cout << "error reading parameter file " << pname << " in line " << i << endl; MPI_Abort(MPI_COMM_WORLD, 1); }
|
||||||
|
else if (status == 0) continue;
|
||||||
|
if (sgrp == "ABE") { if (skey == "gpu part") gpu_part = atof(sval.c_str()); }
|
||||||
|
}
|
||||||
|
inf.close();
|
||||||
|
parameters::dou_par.insert(map<string, double>::value_type("gpu part", gpu_part));
|
||||||
|
}
|
||||||
|
if (nodes == 0) nodes = cpusize / 2;
|
||||||
|
#else
|
||||||
|
if (nodes == 0) nodes = cpusize;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (dim != 3)
|
||||||
|
{
|
||||||
|
cout << "distrivute: now we only support 3-dimension" << endl;
|
||||||
|
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
MyList<Block> *BlL = 0;
|
||||||
|
int split_size, min_size, block_size = 0;
|
||||||
|
int min_width = 2 * Mymax(ghost_width, buffer_width);
|
||||||
|
int nxyz[dim], mmin_width[dim], min_shape[dim];
|
||||||
|
|
||||||
|
MyList<Patch> *PLi = PatchLIST;
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
min_shape[i] = PLi->data->shape[i];
|
||||||
|
int lev = PLi->data->lev;
|
||||||
|
PLi = PLi->next;
|
||||||
|
while (PLi)
|
||||||
|
{
|
||||||
|
Patch *PP = PLi->data;
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
min_shape[i] = Mymin(min_shape[i], PP->shape[i]);
|
||||||
|
if (lev != PLi->data->lev)
|
||||||
|
cout << "Parallel::distribute CAUSTION: meet Patches for different level: " << lev << " and " << PLi->data->lev << endl;
|
||||||
|
PLi = PLi->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
mmin_width[i] = Mymin(min_width, min_shape[i]);
|
||||||
|
min_size = mmin_width[0];
|
||||||
|
for (int i = 1; i < dim; i++)
|
||||||
|
min_size = min_size * mmin_width[i];
|
||||||
|
|
||||||
|
PLi = PatchLIST;
|
||||||
|
while (PLi)
|
||||||
|
{
|
||||||
|
Patch *PP = PLi->data;
|
||||||
|
int bs = PP->shape[0];
|
||||||
|
for (int i = 1; i < dim; i++)
|
||||||
|
bs = bs * PP->shape[i];
|
||||||
|
block_size = block_size + bs;
|
||||||
|
PLi = PLi->next;
|
||||||
|
}
|
||||||
|
split_size = Mymax(min_size, block_size / nodes);
|
||||||
|
split_size = Mymax(1, split_size);
|
||||||
|
|
||||||
|
int n_rank = 0;
|
||||||
|
PLi = PatchLIST;
|
||||||
|
int reacpu = 0;
|
||||||
|
int current_block_id = 0;
|
||||||
|
while (PLi) {
|
||||||
|
Block *ng0, *ng;
|
||||||
|
bool first_block_in_patch = true;
|
||||||
|
Patch *PP = PLi->data;
|
||||||
|
reacpu += partition3(nxyz, split_size, mmin_width, nodes, PP->shape);
|
||||||
|
|
||||||
|
for (int i = 0; i < nxyz[0]; i++)
|
||||||
|
for (int j = 0; j < nxyz[1]; j++)
|
||||||
|
for (int k = 0; k < nxyz[2]; k++)
|
||||||
|
{
|
||||||
|
int ibbox_here[6], shape_here[3];
|
||||||
|
double bbox_here[6], dd;
|
||||||
|
Block *current_ng_start = nullptr;
|
||||||
|
|
||||||
|
bool is_heavy = false;
|
||||||
|
int r_l = -1, r_r = -1;
|
||||||
|
if (cpusize == INTERP_LB_NPROCS) {
|
||||||
|
for (int si = 0; si < INTERP_LB_NUM_HEAVY; si++) {
|
||||||
|
if (current_block_id == interp_lb_splits[si][0]) {
|
||||||
|
is_heavy = true;
|
||||||
|
r_l = interp_lb_splits[si][1];
|
||||||
|
r_r = interp_lb_splits[si][2];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_heavy)
|
||||||
|
{
|
||||||
|
int ib0 = (PP->shape[0] * i) / nxyz[0];
|
||||||
|
int ib3 = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||||||
|
int jb1 = (PP->shape[1] * j) / nxyz[1];
|
||||||
|
int jb4 = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||||||
|
int kb2 = (PP->shape[2] * k) / nxyz[2];
|
||||||
|
int kb5 = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||||||
|
|
||||||
|
Block *split_first_block = nullptr;
|
||||||
|
Block *split_last_block = nullptr;
|
||||||
|
splitHotspotBlock(BlL, dim, ib0, ib3, jb1, jb4, kb2, kb5,
|
||||||
|
PP, r_l, r_r, ingfsi, fngfsi, periodic,
|
||||||
|
split_first_block, split_last_block);
|
||||||
|
|
||||||
|
current_ng_start = split_first_block;
|
||||||
|
ng = split_last_block;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ibbox_here[0] = (PP->shape[0] * i) / nxyz[0];
|
||||||
|
ibbox_here[3] = (PP->shape[0] * (i + 1)) / nxyz[0] - 1;
|
||||||
|
ibbox_here[1] = (PP->shape[1] * j) / nxyz[1];
|
||||||
|
ibbox_here[4] = (PP->shape[1] * (j + 1)) / nxyz[1] - 1;
|
||||||
|
ibbox_here[2] = (PP->shape[2] * k) / nxyz[2];
|
||||||
|
ibbox_here[5] = (PP->shape[2] * (k + 1)) / nxyz[2] - 1;
|
||||||
|
|
||||||
|
if (periodic) {
|
||||||
|
for(int d=0; d<3; d++) {
|
||||||
|
ibbox_here[d] -= ghost_width;
|
||||||
|
ibbox_here[d+3] += ghost_width;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ibbox_here[0] = Mymax(0, ibbox_here[0] - ghost_width);
|
||||||
|
ibbox_here[3] = Mymin(PP->shape[0] - 1, ibbox_here[3] + ghost_width);
|
||||||
|
ibbox_here[1] = Mymax(0, ibbox_here[1] - ghost_width);
|
||||||
|
ibbox_here[4] = Mymin(PP->shape[1] - 1, ibbox_here[4] + ghost_width);
|
||||||
|
ibbox_here[2] = Mymax(0, ibbox_here[2] - ghost_width);
|
||||||
|
ibbox_here[5] = Mymin(PP->shape[2] - 1, ibbox_here[5] + ghost_width);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int d=0; d<3; d++) shape_here[d] = ibbox_here[d+3] - ibbox_here[d] + 1;
|
||||||
|
|
||||||
|
#ifdef Vertex
|
||||||
|
#ifdef Cell
|
||||||
|
#error Both Cell and Vertex are defined
|
||||||
|
#endif
|
||||||
|
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||||||
|
bbox_here[0] = PP->bbox[0] + ibbox_here[0] * dd;
|
||||||
|
bbox_here[3] = PP->bbox[0] + ibbox_here[3] * dd;
|
||||||
|
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||||||
|
bbox_here[1] = PP->bbox[1] + ibbox_here[1] * dd;
|
||||||
|
bbox_here[4] = PP->bbox[1] + ibbox_here[4] * dd;
|
||||||
|
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||||||
|
bbox_here[2] = PP->bbox[2] + ibbox_here[2] * dd;
|
||||||
|
bbox_here[5] = PP->bbox[2] + ibbox_here[5] * dd;
|
||||||
|
#else
|
||||||
|
#ifdef Cell
|
||||||
|
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||||||
|
bbox_here[0] = PP->bbox[0] + (ibbox_here[0]) * dd;
|
||||||
|
bbox_here[3] = PP->bbox[0] + (ibbox_here[3] + 1) * dd;
|
||||||
|
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||||||
|
bbox_here[1] = PP->bbox[1] + (ibbox_here[1]) * dd;
|
||||||
|
bbox_here[4] = PP->bbox[1] + (ibbox_here[4] + 1) * dd;
|
||||||
|
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||||||
|
bbox_here[2] = PP->bbox[2] + (ibbox_here[2]) * dd;
|
||||||
|
bbox_here[5] = PP->bbox[2] + (ibbox_here[5] + 1) * dd;
|
||||||
|
#else
|
||||||
|
#error Not define Vertex nor Cell
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
ng = createMappedBlock(BlL, dim, shape_here, bbox_here,
|
||||||
|
current_block_id, ingfsi, fngfsi, PP->lev);
|
||||||
|
current_ng_start = ng;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (first_block_in_patch) {
|
||||||
|
ng0 = current_ng_start;
|
||||||
|
MyList<Block> *Bp_start = BlL;
|
||||||
|
while (Bp_start && Bp_start->data != ng0) Bp_start = Bp_start->next;
|
||||||
|
PP->blb = Bp_start;
|
||||||
|
first_block_in_patch = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_block_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MyList<Block> *Bp_end = BlL;
|
||||||
|
while (Bp_end && Bp_end->data != ng) Bp_end = Bp_end->next;
|
||||||
|
PP->ble = Bp_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
PLi = PLi->next;
|
||||||
|
}
|
||||||
|
if (reacpu < nodes * 2 / 3)
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||||
|
if (myrank == 0)
|
||||||
|
cout << "Parallel::distribute CAUSTION: level#" << lev << " uses essencially " << reacpu << " processors vs " << nodes << " nodes run, your scientific computation scale is not as large as you estimate." << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return BlL;
|
||||||
|
}
|
||||||
|
|
||||||
|
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||||
|
int ib0_orig, int ib3_orig,
|
||||||
|
int jb1_orig, int jb4_orig,
|
||||||
|
int kb2_orig, int kb5_orig,
|
||||||
|
Patch* PP, int r_left, int r_right,
|
||||||
|
int ingfsi, int fngfsi, bool periodic,
|
||||||
|
Block* &split_first_block, Block* &split_last_block)
|
||||||
|
{
|
||||||
|
int mid = (ib0_orig + ib3_orig) / 2;
|
||||||
|
|
||||||
|
int indices_L[6] = {ib0_orig, jb1_orig, kb2_orig, mid, jb4_orig, kb5_orig};
|
||||||
|
int indices_R[6] = {mid + 1, jb1_orig, kb2_orig, ib3_orig, jb4_orig, kb5_orig};
|
||||||
|
|
||||||
|
auto createSubBlock = [&](int* ib_raw, int target_rank) {
|
||||||
|
int ib_final[6];
|
||||||
|
int sh_here[3];
|
||||||
|
double bb_here[6], dd;
|
||||||
|
|
||||||
|
if (periodic) {
|
||||||
|
ib_final[0] = ib_raw[0] - ghost_width;
|
||||||
|
ib_final[3] = ib_raw[3] + ghost_width;
|
||||||
|
ib_final[1] = ib_raw[1] - ghost_width;
|
||||||
|
ib_final[4] = ib_raw[4] + ghost_width;
|
||||||
|
ib_final[2] = ib_raw[2] - ghost_width;
|
||||||
|
ib_final[5] = ib_raw[5] + ghost_width;
|
||||||
|
} else {
|
||||||
|
ib_final[0] = Mymax(0, ib_raw[0] - ghost_width);
|
||||||
|
ib_final[3] = Mymin(PP->shape[0] - 1, ib_raw[3] + ghost_width);
|
||||||
|
ib_final[1] = Mymax(0, ib_raw[1] - ghost_width);
|
||||||
|
ib_final[4] = Mymin(PP->shape[1] - 1, ib_raw[4] + ghost_width);
|
||||||
|
ib_final[2] = Mymax(0, ib_raw[2] - ghost_width);
|
||||||
|
ib_final[5] = Mymin(PP->shape[2] - 1, ib_raw[5] + ghost_width);
|
||||||
|
}
|
||||||
|
|
||||||
|
sh_here[0] = ib_final[3] - ib_final[0] + 1;
|
||||||
|
sh_here[1] = ib_final[4] - ib_final[1] + 1;
|
||||||
|
sh_here[2] = ib_final[5] - ib_final[2] + 1;
|
||||||
|
|
||||||
|
#ifdef Vertex
|
||||||
|
dd = (PP->bbox[3] - PP->bbox[0]) / (PP->shape[0] - 1);
|
||||||
|
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||||||
|
bb_here[3] = PP->bbox[0] + ib_final[3] * dd;
|
||||||
|
dd = (PP->bbox[4] - PP->bbox[1]) / (PP->shape[1] - 1);
|
||||||
|
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||||||
|
bb_here[4] = PP->bbox[1] + ib_final[4] * dd;
|
||||||
|
dd = (PP->bbox[5] - PP->bbox[2]) / (PP->shape[2] - 1);
|
||||||
|
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||||||
|
bb_here[5] = PP->bbox[2] + ib_final[5] * dd;
|
||||||
|
#else
|
||||||
|
#ifdef Cell
|
||||||
|
dd = (PP->bbox[3] - PP->bbox[0]) / PP->shape[0];
|
||||||
|
bb_here[0] = PP->bbox[0] + ib_final[0] * dd;
|
||||||
|
bb_here[3] = PP->bbox[0] + (ib_final[3] + 1) * dd;
|
||||||
|
dd = (PP->bbox[4] - PP->bbox[1]) / PP->shape[1];
|
||||||
|
bb_here[1] = PP->bbox[1] + ib_final[1] * dd;
|
||||||
|
bb_here[4] = PP->bbox[1] + (ib_final[4] + 1) * dd;
|
||||||
|
dd = (PP->bbox[5] - PP->bbox[2]) / PP->shape[2];
|
||||||
|
bb_here[2] = PP->bbox[2] + ib_final[2] * dd;
|
||||||
|
bb_here[5] = PP->bbox[2] + (ib_final[5] + 1) * dd;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Block* Bg = new Block(dim, sh_here, bb_here, target_rank, ingfsi, fngfsi, PP->lev);
|
||||||
|
if (BlL) BlL->insert(Bg);
|
||||||
|
else BlL = new MyList<Block>(Bg);
|
||||||
|
|
||||||
|
return Bg;
|
||||||
|
};
|
||||||
|
|
||||||
|
split_first_block = createSubBlock(indices_L, r_left);
|
||||||
|
split_last_block = createSubBlock(indices_R, r_right);
|
||||||
|
return split_last_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||||||
|
int block_id, int ingfsi, int fngfsi, int lev)
|
||||||
|
{
|
||||||
|
int target_rank = block_id;
|
||||||
|
if (INTERP_LB_NPROCS > 0) {
|
||||||
|
for (int ri = 0; ri < interp_lb_num_remaps; ri++) {
|
||||||
|
if (block_id == interp_lb_remaps[ri][0]) {
|
||||||
|
target_rank = interp_lb_remaps[ri][1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Block* ng = new Block(dim, shape, bbox, target_rank, ingfsi, fngfsi, lev);
|
||||||
|
if (BlL) BlL->insert(ng);
|
||||||
|
else BlL = new MyList<Block>(ng);
|
||||||
|
|
||||||
|
return ng;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// When INTERP_LB_OPTIMIZE is not defined, distribute_optimize falls back to distribute
|
||||||
|
MyList<Block> *Parallel::distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||||
|
bool periodic, int nodes)
|
||||||
|
{
|
||||||
|
return distribute(PatchLIST, cpusize, ingfsi, fngfsi, periodic, nodes);
|
||||||
|
}
|
||||||
|
Block* Parallel::splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||||
|
int ib0_orig, int ib3_orig,
|
||||||
|
int jb1_orig, int jb4_orig,
|
||||||
|
int kb2_orig, int kb5_orig,
|
||||||
|
Patch* PP, int r_left, int r_right,
|
||||||
|
int ingfsi, int fngfsi, bool periodic,
|
||||||
|
Block* &split_first_block, Block* &split_last_block)
|
||||||
|
{ return nullptr; }
|
||||||
|
Block* Parallel::createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||||||
|
int block_id, int ingfsi, int fngfsi, int lev)
|
||||||
|
{ return nullptr; }
|
||||||
|
#endif
|
||||||
|
|
||||||
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
#elif (PSTR == 1 || PSTR == 2 || PSTR == 3)
|
||||||
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
MyList<Block> *Parallel::distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfsi,
|
||||||
bool periodic, int start_rank, int end_rank, int nodes)
|
bool periodic, int start_rank, int end_rank, int nodes)
|
||||||
|
|||||||
@@ -32,6 +32,16 @@ namespace Parallel
|
|||||||
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
int partition2(int *nxy, int split_size, int *min_width, int cpusize, int *shape); // special for 2 diemnsions
|
||||||
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
int partition3(int *nxyz, int split_size, int *min_width, int cpusize, int *shape);
|
||||||
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
MyList<Block> *distribute(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0); // produce corresponding Blocks
|
||||||
|
MyList<Block> *distribute_optimize(MyList<Patch> *PatchLIST, int cpusize, int ingfsi, int fngfs, bool periodic, int nodes = 0);
|
||||||
|
Block* splitHotspotBlock(MyList<Block>* &BlL, int _dim,
|
||||||
|
int ib0_orig, int ib3_orig,
|
||||||
|
int jb1_orig, int jb4_orig,
|
||||||
|
int kb2_orig, int kb5_orig,
|
||||||
|
Patch* PP, int r_left, int r_right,
|
||||||
|
int ingfsi, int fngfsi, bool periodic,
|
||||||
|
Block* &split_first_block, Block* &split_last_block);
|
||||||
|
Block* createMappedBlock(MyList<Block>* &BlL, int _dim, int* shape, double* bbox,
|
||||||
|
int block_id, int ingfsi, int fngfsi, int lev);
|
||||||
void KillBlocks(MyList<Patch> *PatchLIST);
|
void KillBlocks(MyList<Patch> *PatchLIST);
|
||||||
|
|
||||||
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
void setfunction(MyList<Block> *BlL, var *vn, double func(double x, double y, double z));
|
||||||
|
|||||||
@@ -70,10 +70,34 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
const double FF = 0.75, eta = 2.0;
|
const double FF = 0.75, eta = 2.0;
|
||||||
const double F1o3 = 1.0/3.0, F2o3 = 2.0/3.0, F3o2 = 1.5, F1o6 = 1.0/6.0;
|
const double F1o3 = 1.0/3.0, F2o3 = 2.0/3.0, F3o2 = 1.5, F1o6 = 1.0/6.0;
|
||||||
const double F16 = 16.0, F8 = 8.0;
|
const double F16 = 16.0, F8 = 8.0;
|
||||||
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5)
|
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7)
|
||||||
double reta[all];
|
double reta[all];
|
||||||
/* 使用时:reta[idx],其中 idx = i + nx*(j + ny*k) (Fortran列主序) */
|
/* 使用时:reta[idx],其中 idx = i + nx*(j + ny*k) (Fortran列主序) */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (GAUGE == 6 || GAUGE == 7)
|
||||||
|
int BHN = 0;
|
||||||
|
double Porg[9] = {0.0};
|
||||||
|
double Mass[3] = {0.0};
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#ifdef fortran1
|
||||||
|
void getpbh(int &, double *, double *);
|
||||||
|
#elif defined(fortran2)
|
||||||
|
void GETPBH(int &, double *, double *);
|
||||||
|
#else
|
||||||
|
void getpbh_(int &, double *, double *);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef fortran1
|
||||||
|
getpbh(BHN, Porg, Mass);
|
||||||
|
#elif defined(fortran2)
|
||||||
|
GETPBH(BHN, Porg, Mass);
|
||||||
|
#else
|
||||||
|
getpbh_(BHN, Porg, Mass);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
PI = acos(-1.0);
|
PI = acos(-1.0);
|
||||||
dX = X[1] - X[0];
|
dX = X[1] - X[0];
|
||||||
dY = Y[1] - Y[0];
|
dY = Y[1] - Y[0];
|
||||||
@@ -966,11 +990,67 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 1ms //
|
// 1ms //
|
||||||
for(int i=0;i<all;i+=1){
|
#if (GAUGE == 6 || GAUGE == 7)
|
||||||
|
if (BHN == 2) {
|
||||||
|
const double M = Mass[0] + Mass[1];
|
||||||
|
const double A = TWO / M;
|
||||||
|
const double w1 = 1.2e1;
|
||||||
|
const double w2 = w1;
|
||||||
|
const double C1 = ONE / Mass[0] - A;
|
||||||
|
const double C2 = ONE / Mass[1] - A;
|
||||||
|
const double denom =
|
||||||
|
(Porg[0] - Porg[3]) * (Porg[0] - Porg[3]) +
|
||||||
|
(Porg[1] - Porg[4]) * (Porg[1] - Porg[4]) +
|
||||||
|
(Porg[2] - Porg[5]) * (Porg[2] - Porg[5]);
|
||||||
|
|
||||||
|
for (int k0 = 0; k0 < nz; ++k0) {
|
||||||
|
for (int j0 = 0; j0 < ny; ++j0) {
|
||||||
|
for (int i0 = 0; i0 < nx; ++i0) {
|
||||||
|
const size_t p = idx_ex(i0, j0, k0, ex);
|
||||||
|
const double dx1 = Porg[0] - X[i0];
|
||||||
|
const double dy1 = Porg[1] - Y[j0];
|
||||||
|
const double dz1 = Porg[2] - Z[k0];
|
||||||
|
const double dx2 = Porg[3] - X[i0];
|
||||||
|
const double dy2 = Porg[4] - Y[j0];
|
||||||
|
const double dz2 = Porg[5] - Z[k0];
|
||||||
|
const double r1 = (dx1 * dx1 + dy1 * dy1 + dz1 * dz1) / denom;
|
||||||
|
const double r2 = (dx2 * dx2 + dy2 * dy2 + dz2 * dz2) / denom;
|
||||||
|
#if (GAUGE == 6)
|
||||||
|
reta[p] = A + C1 / (ONE + w1 * r1) + C2 / (ONE + w2 * r2);
|
||||||
|
#else
|
||||||
|
reta[p] = A + C1 * exp(-w1 * r1) + C2 * exp(-w2 * r2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
printf("not support BH_num in Jason's form %d %d\n", (GAUGE == 6) ? 1 : 2, BHN);
|
||||||
|
for (int i = 0; i < all; ++i) reta[i] = ZEO;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (int i = 0; i < all; i += 1) {
|
||||||
|
#if (GAUGE == 0)
|
||||||
betax_rhs[i] = FF * dtSfx[i];
|
betax_rhs[i] = FF * dtSfx[i];
|
||||||
betay_rhs[i] = FF * dtSfy[i];
|
betay_rhs[i] = FF * dtSfy[i];
|
||||||
betaz_rhs[i] = FF * dtSfz[i];
|
betaz_rhs[i] = FF * dtSfz[i];
|
||||||
#if (GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5)
|
|
||||||
|
dtSfx_rhs[i] = Gamx_rhs[i] - eta * dtSfx[i];
|
||||||
|
dtSfy_rhs[i] = Gamy_rhs[i] - eta * dtSfy[i];
|
||||||
|
dtSfz_rhs[i] = Gamz_rhs[i] - eta * dtSfz[i];
|
||||||
|
#elif (GAUGE == 1)
|
||||||
|
betax_rhs[i] = Gamx[i] - eta * betax[i];
|
||||||
|
betay_rhs[i] = Gamy[i] - eta * betay[i];
|
||||||
|
betaz_rhs[i] = Gamz[i] - eta * betaz[i];
|
||||||
|
|
||||||
|
dtSfx_rhs[i] = ZEO;
|
||||||
|
dtSfy_rhs[i] = ZEO;
|
||||||
|
dtSfz_rhs[i] = ZEO;
|
||||||
|
#elif (GAUGE == 2 || GAUGE == 3)
|
||||||
|
betax_rhs[i] = FF * dtSfx[i];
|
||||||
|
betay_rhs[i] = FF * dtSfy[i];
|
||||||
|
betaz_rhs[i] = FF * dtSfz[i];
|
||||||
|
|
||||||
reta[i] =
|
reta[i] =
|
||||||
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
|
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
|
||||||
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
|
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
|
||||||
@@ -979,16 +1059,46 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
|
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
|
||||||
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
|
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
|
||||||
|
|
||||||
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (1.0 - sqrt(chin1[i])), 2.0 );
|
#if (GAUGE == 2)
|
||||||
|
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
|
||||||
|
#else
|
||||||
|
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - chin1[i]), 2.0 );
|
||||||
|
#endif
|
||||||
|
|
||||||
dtSfx_rhs[i] = Gamx_rhs[i] - reta[i] * dtSfx[i];
|
dtSfx_rhs[i] = Gamx_rhs[i] - reta[i] * dtSfx[i];
|
||||||
dtSfy_rhs[i] = Gamy_rhs[i] - reta[i] * dtSfy[i];
|
dtSfy_rhs[i] = Gamy_rhs[i] - reta[i] * dtSfy[i];
|
||||||
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||||
#else
|
#elif (GAUGE == 4 || GAUGE == 5)
|
||||||
dtSfx_rhs[i] = Gamx_rhs[i] - eta * dtSfx[i];
|
reta[i] =
|
||||||
dtSfy_rhs[i] = Gamy_rhs[i] - eta * dtSfy[i];
|
gupxx[i] * dtSfx_rhs[i] * dtSfx_rhs[i]
|
||||||
dtSfz_rhs[i] = Gamz_rhs[i] - eta * dtSfz[i];
|
+ gupyy[i] * dtSfy_rhs[i] * dtSfy_rhs[i]
|
||||||
#endif
|
+ gupzz[i] * dtSfz_rhs[i] * dtSfz_rhs[i]
|
||||||
|
+ TWO * ( gupxy[i] * dtSfx_rhs[i] * dtSfy_rhs[i]
|
||||||
|
+ gupxz[i] * dtSfx_rhs[i] * dtSfz_rhs[i]
|
||||||
|
+ gupyz[i] * dtSfy_rhs[i] * dtSfz_rhs[i] );
|
||||||
|
|
||||||
|
#if (GAUGE == 4)
|
||||||
|
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - sqrt(chin1[i])), 2.0 );
|
||||||
|
#else
|
||||||
|
reta[i] = 1.31 / 2.0 * sqrt( reta[i] / chin1[i] ) / pow( (ONE - chin1[i]), 2.0 );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
betax_rhs[i] = FF * Gamx[i] - reta[i] * betax[i];
|
||||||
|
betay_rhs[i] = FF * Gamy[i] - reta[i] * betay[i];
|
||||||
|
betaz_rhs[i] = FF * Gamz[i] - reta[i] * betaz[i];
|
||||||
|
|
||||||
|
dtSfx_rhs[i] = ZEO;
|
||||||
|
dtSfy_rhs[i] = ZEO;
|
||||||
|
dtSfz_rhs[i] = ZEO;
|
||||||
|
#elif (GAUGE == 6 || GAUGE == 7)
|
||||||
|
betax_rhs[i] = FF * dtSfx[i];
|
||||||
|
betay_rhs[i] = FF * dtSfy[i];
|
||||||
|
betaz_rhs[i] = FF * dtSfz[i];
|
||||||
|
|
||||||
|
dtSfx_rhs[i] = Gamx_rhs[i] - reta[i] * dtSfx[i];
|
||||||
|
dtSfy_rhs[i] = Gamy_rhs[i] - reta[i] * dtSfy[i];
|
||||||
|
dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
// 26ms //
|
// 26ms //
|
||||||
lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS);
|
lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS);
|
||||||
|
|||||||
@@ -130,7 +130,11 @@ void cgh::compose_cgh(int nprocs)
|
|||||||
for (int lev = 0; lev < levels; lev++)
|
for (int lev = 0; lev < levels; lev++)
|
||||||
{
|
{
|
||||||
checkPatchList(PatL[lev], false);
|
checkPatchList(PatL[lev], false);
|
||||||
|
#ifdef INTERP_LB_OPTIMIZE
|
||||||
|
Parallel::distribute_optimize(PatL[lev], nprocs, ingfs, fngfs, false);
|
||||||
|
#else
|
||||||
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
|
Parallel::distribute(PatL[lev], nprocs, ingfs, fngfs, false);
|
||||||
|
#endif
|
||||||
#if (RPB == 1)
|
#if (RPB == 1)
|
||||||
// we need distributed box of PatL[lev] and PatL[lev-1]
|
// we need distributed box of PatL[lev] and PatL[lev-1]
|
||||||
if (lev > 0)
|
if (lev > 0)
|
||||||
|
|||||||
@@ -73,6 +73,10 @@ void fdderivs(const int ex[3],
|
|||||||
|
|
||||||
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
/* 输出清零:fxx,fyy,fzz,fxy,fxz,fyz = 0 */
|
||||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fxx[p] = ZEO; fyy[p] = ZEO; fzz[p] = ZEO;
|
||||||
|
fxy[p] = ZEO; fxz[p] = ZEO; fyz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fortran:
|
* Fortran:
|
||||||
|
|||||||
@@ -74,6 +74,11 @@ void fderivs(const int ex[3],
|
|||||||
|
|
||||||
// fx = fy = fz = 0
|
// fx = fy = fz = 0
|
||||||
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
const size_t all = (size_t)ex1 * (size_t)ex2 * (size_t)ex3;
|
||||||
|
for (size_t p = 0; p < all; ++p) {
|
||||||
|
fx[p] = ZEO;
|
||||||
|
fy[p] = ZEO;
|
||||||
|
fz[p] = ZEO;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fortran loops:
|
* Fortran loops:
|
||||||
|
|||||||
107
AMSS_NCKU_source/interp_lb_profile.C
Normal file
107
AMSS_NCKU_source/interp_lb_profile.C
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
#include "interp_lb_profile.h"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
namespace InterpLBProfile {
|
||||||
|
|
||||||
|
bool write_profile(const char *filepath, int nprocs,
|
||||||
|
const double *rank_times,
|
||||||
|
const int *heavy_ranks, int num_heavy,
|
||||||
|
double threshold_ratio)
|
||||||
|
{
|
||||||
|
FILE *fp = fopen(filepath, "wb");
|
||||||
|
if (!fp) return false;
|
||||||
|
|
||||||
|
ProfileHeader hdr;
|
||||||
|
hdr.magic = MAGIC;
|
||||||
|
hdr.version = VERSION;
|
||||||
|
hdr.nprocs = nprocs;
|
||||||
|
hdr.num_heavy = num_heavy;
|
||||||
|
hdr.threshold_ratio = threshold_ratio;
|
||||||
|
|
||||||
|
fwrite(&hdr, sizeof(hdr), 1, fp);
|
||||||
|
fwrite(rank_times, sizeof(double), nprocs, fp);
|
||||||
|
fwrite(heavy_ranks, sizeof(int), num_heavy, fp);
|
||||||
|
fclose(fp);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool read_profile(const char *filepath, int current_nprocs,
|
||||||
|
int *heavy_ranks, int &num_heavy,
|
||||||
|
double *rank_times, MPI_Comm comm)
|
||||||
|
{
|
||||||
|
int myrank;
|
||||||
|
MPI_Comm_rank(comm, &myrank);
|
||||||
|
|
||||||
|
int valid = 0;
|
||||||
|
ProfileHeader hdr;
|
||||||
|
memset(&hdr, 0, sizeof(hdr));
|
||||||
|
|
||||||
|
if (myrank == 0) {
|
||||||
|
FILE *fp = fopen(filepath, "rb");
|
||||||
|
if (fp) {
|
||||||
|
if (fread(&hdr, sizeof(hdr), 1, fp) == 1 &&
|
||||||
|
hdr.magic == MAGIC && hdr.version == VERSION &&
|
||||||
|
hdr.nprocs == current_nprocs)
|
||||||
|
{
|
||||||
|
if (fread(rank_times, sizeof(double), current_nprocs, fp)
|
||||||
|
== (size_t)current_nprocs &&
|
||||||
|
fread(heavy_ranks, sizeof(int), hdr.num_heavy, fp)
|
||||||
|
== (size_t)hdr.num_heavy)
|
||||||
|
{
|
||||||
|
num_heavy = hdr.num_heavy;
|
||||||
|
valid = 1;
|
||||||
|
}
|
||||||
|
} else if (fp) {
|
||||||
|
printf("[InterpLB] Profile rejected: magic=0x%X version=%u "
|
||||||
|
"nprocs=%d (current=%d)\n",
|
||||||
|
hdr.magic, hdr.version, hdr.nprocs, current_nprocs);
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Bcast(&valid, 1, MPI_INT, 0, comm);
|
||||||
|
if (!valid) return false;
|
||||||
|
|
||||||
|
MPI_Bcast(&num_heavy, 1, MPI_INT, 0, comm);
|
||||||
|
MPI_Bcast(heavy_ranks, num_heavy, MPI_INT, 0, comm);
|
||||||
|
MPI_Bcast(rank_times, current_nprocs, MPI_DOUBLE, 0, comm);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int identify_heavy_ranks(const double *rank_times, int nprocs,
|
||||||
|
double threshold_ratio,
|
||||||
|
int *heavy_ranks, int max_heavy)
|
||||||
|
{
|
||||||
|
double sum = 0;
|
||||||
|
for (int i = 0; i < nprocs; i++) sum += rank_times[i];
|
||||||
|
double mean = sum / nprocs;
|
||||||
|
double threshold = threshold_ratio * mean;
|
||||||
|
|
||||||
|
// Collect candidates
|
||||||
|
struct RankTime { int rank; double time; };
|
||||||
|
RankTime *candidates = new RankTime[nprocs];
|
||||||
|
int ncand = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nprocs; i++) {
|
||||||
|
if (rank_times[i] > threshold)
|
||||||
|
candidates[ncand++] = {i, rank_times[i]};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort descending by time
|
||||||
|
std::sort(candidates, candidates + ncand,
|
||||||
|
[](const RankTime &a, const RankTime &b) {
|
||||||
|
return a.time > b.time;
|
||||||
|
});
|
||||||
|
|
||||||
|
int count = (ncand < max_heavy) ? ncand : max_heavy;
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
heavy_ranks[i] = candidates[i].rank;
|
||||||
|
|
||||||
|
delete[] candidates;
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace InterpLBProfile
|
||||||
BIN
AMSS_NCKU_source/interp_lb_profile.bin
Normal file
BIN
AMSS_NCKU_source/interp_lb_profile.bin
Normal file
Binary file not shown.
38
AMSS_NCKU_source/interp_lb_profile.h
Normal file
38
AMSS_NCKU_source/interp_lb_profile.h
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#ifndef INTERP_LB_PROFILE_H
|
||||||
|
#define INTERP_LB_PROFILE_H
|
||||||
|
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
namespace InterpLBProfile {
|
||||||
|
|
||||||
|
static const unsigned int MAGIC = 0x494C4250; // "ILBP"
|
||||||
|
static const unsigned int VERSION = 1;
|
||||||
|
|
||||||
|
struct ProfileHeader {
|
||||||
|
unsigned int magic;
|
||||||
|
unsigned int version;
|
||||||
|
int nprocs;
|
||||||
|
int num_heavy;
|
||||||
|
double threshold_ratio;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Write profile file (rank 0 only)
|
||||||
|
bool write_profile(const char *filepath, int nprocs,
|
||||||
|
const double *rank_times,
|
||||||
|
const int *heavy_ranks, int num_heavy,
|
||||||
|
double threshold_ratio);
|
||||||
|
|
||||||
|
// Read profile file (rank 0 reads, then broadcasts to all)
|
||||||
|
// Returns true if file found and valid for current nprocs
|
||||||
|
bool read_profile(const char *filepath, int current_nprocs,
|
||||||
|
int *heavy_ranks, int &num_heavy,
|
||||||
|
double *rank_times, MPI_Comm comm);
|
||||||
|
|
||||||
|
// Identify heavy ranks: those with time > threshold_ratio * mean
|
||||||
|
int identify_heavy_ranks(const double *rank_times, int nprocs,
|
||||||
|
double threshold_ratio,
|
||||||
|
int *heavy_ranks, int max_heavy);
|
||||||
|
|
||||||
|
} // namespace InterpLBProfile
|
||||||
|
|
||||||
|
#endif /* INTERP_LB_PROFILE_H */
|
||||||
27
AMSS_NCKU_source/interp_lb_profile_data.h
Normal file
27
AMSS_NCKU_source/interp_lb_profile_data.h
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
/* Auto-generated from interp_lb_profile.bin — do not edit */
|
||||||
|
#ifndef INTERP_LB_PROFILE_DATA_H
|
||||||
|
#define INTERP_LB_PROFILE_DATA_H
|
||||||
|
|
||||||
|
#define INTERP_LB_NPROCS 64
|
||||||
|
#define INTERP_LB_NUM_HEAVY 4
|
||||||
|
|
||||||
|
static const int interp_lb_heavy_blocks[4] = {27, 35, 28, 36};
|
||||||
|
|
||||||
|
/* Split table: {block_id, r_left, r_right} */
|
||||||
|
static const int interp_lb_splits[4][3] = {
|
||||||
|
{27, 26, 27},
|
||||||
|
{35, 34, 35},
|
||||||
|
{28, 28, 29},
|
||||||
|
{36, 36, 37},
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Rank remap for displaced neighbor blocks */
|
||||||
|
static const int interp_lb_num_remaps = 4;
|
||||||
|
static const int interp_lb_remaps[][2] = {
|
||||||
|
{26, 25},
|
||||||
|
{29, 30},
|
||||||
|
{34, 33},
|
||||||
|
{37, 38},
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* INTERP_LB_PROFILE_DATA_H */
|
||||||
@@ -10,14 +10,14 @@ PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/default.profdata
|
|||||||
ifeq ($(PGO_MODE),instrument)
|
ifeq ($(PGO_MODE),instrument)
|
||||||
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
## Phase 1: instrumentation — omit -ipo/-fp-model fast=2 for faster build and numerical stability
|
||||||
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
CXXAPPFLAGS = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||||
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
-align array64byte -fpp -I${MKLROOT}/include
|
||||||
else
|
else
|
||||||
## opt (default): maximum performance with PGO profile data
|
## opt (default): maximum performance with PGO profile data
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
-fprofile-instr-use=$(PROFDATA) \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS)
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
-fprofile-instr-use=$(PROFDATA) \
|
-fprofile-instr-use=$(PROFDATA) \
|
||||||
-align array64byte -fpp -I${MKLROOT}/include
|
-align array64byte -fpp -I${MKLROOT}/include
|
||||||
@@ -53,8 +53,14 @@ kodiss_c.o: kodiss_c.C
|
|||||||
lopsided_c.o: lopsided_c.C
|
lopsided_c.o: lopsided_c.C
|
||||||
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
## TwoPunctureABE uses fixed optimal flags, independent of CXXAPPFLAGS (which may be PGO-instrumented)
|
interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h
|
||||||
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo -Dfortran3 -Dnewc -I${MKLROOT}/include
|
${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@
|
||||||
|
|
||||||
|
## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS
|
||||||
|
TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata
|
||||||
|
TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \
|
||||||
|
-fprofile-instr-use=$(TP_PROFDATA) \
|
||||||
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
|
|
||||||
TwoPunctures.o: TwoPunctures.C
|
TwoPunctures.o: TwoPunctures.C
|
||||||
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
${CXX} $(TP_OPTFLAGS) -qopenmp -c $< -o $@
|
||||||
@@ -64,15 +70,21 @@ TwoPunctureABE.o: TwoPunctureABE.C
|
|||||||
|
|
||||||
# Input files
|
# Input files
|
||||||
|
|
||||||
# C rewrite files
|
## Kernel implementation switch (set USE_CXX_KERNELS=0 to fall back to Fortran)
|
||||||
|
ifeq ($(USE_CXX_KERNELS),0)
|
||||||
|
# Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below
|
||||||
|
CFILES =
|
||||||
|
else
|
||||||
|
# C++ mode (default): C rewrite of bssn_rhs and helper kernels
|
||||||
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o
|
CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o
|
||||||
|
endif
|
||||||
|
|
||||||
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||||
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
cgh.o bssn_class.o surface_integral.o ShellPatch.o\
|
||||||
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
bssnEScalar_class.o perf.o Z4c_class.o NullShellPatch.o\
|
||||||
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
bssnEM_class.o cpbc_util.o z4c_rhs_point.o checkpoint.o\
|
||||||
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
Parallel_bam.o scalar_class.o transpbh.o NullShellPatch2.o\
|
||||||
NullShellPatch2_Evo.o writefile_f.o
|
NullShellPatch2_Evo.o writefile_f.o interp_lb_profile.o
|
||||||
|
|
||||||
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\
|
||||||
cgh.o surface_integral.o ShellPatch.o\
|
cgh.o surface_integral.o ShellPatch.o\
|
||||||
@@ -82,7 +94,7 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
|
|||||||
NullShellPatch2_Evo.o \
|
NullShellPatch2_Evo.o \
|
||||||
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
bssn_gpu_class.o bssn_step_gpu.o bssn_macro.o writefile_f.o
|
||||||
|
|
||||||
F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
F90FILES_BASE = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||||
rungekutta4_rout.o diff_new.o kodiss.o kodiss_sh.o\
|
rungekutta4_rout.o diff_new.o kodiss.o kodiss_sh.o\
|
||||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||||
@@ -95,6 +107,14 @@ F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
|||||||
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
scalar_rhs.o initial_scalar.o NullEvol2.o initial_null2.o\
|
||||||
NullNews2.o tool_f.o
|
NullNews2.o tool_f.o
|
||||||
|
|
||||||
|
ifeq ($(USE_CXX_KERNELS),0)
|
||||||
|
# Fortran mode: include original bssn_rhs.o
|
||||||
|
F90FILES = $(F90FILES_BASE) bssn_rhs.o
|
||||||
|
else
|
||||||
|
# C++ mode (default): bssn_rhs.o replaced by C++ kernel
|
||||||
|
F90FILES = $(F90FILES_BASE)
|
||||||
|
endif
|
||||||
|
|
||||||
F77FILES = zbesh.o
|
F77FILES = zbesh.o
|
||||||
|
|
||||||
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
AHFDOBJS = expansion.o expansion_Jacobian.o patch.o coords.o patch_info.o patch_interp.o patch_system.o \
|
||||||
|
|||||||
@@ -14,6 +14,25 @@ LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore
|
|||||||
## opt : (default) maximum performance with PGO profile-guided optimization
|
## opt : (default) maximum performance with PGO profile-guided optimization
|
||||||
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
## instrument : PGO Phase 1 instrumentation to collect fresh profile data
|
||||||
PGO_MODE ?= opt
|
PGO_MODE ?= opt
|
||||||
|
|
||||||
|
## Interp_Points load balance profiling mode
|
||||||
|
## off : (default) no load balance instrumentation
|
||||||
|
## profile : Pass 1 — instrument Interp_Points to collect timing profile
|
||||||
|
## optimize : Pass 2 — read profile and apply block rebalancing
|
||||||
|
INTERP_LB_MODE ?= off
|
||||||
|
|
||||||
|
ifeq ($(INTERP_LB_MODE),profile)
|
||||||
|
INTERP_LB_FLAGS = -DINTERP_LB_PROFILE
|
||||||
|
else ifeq ($(INTERP_LB_MODE),optimize)
|
||||||
|
INTERP_LB_FLAGS = -DINTERP_LB_OPTIMIZE
|
||||||
|
else
|
||||||
|
INTERP_LB_FLAGS =
|
||||||
|
endif
|
||||||
|
|
||||||
|
## Kernel implementation switch
|
||||||
|
## 1 (default) : use C++ rewrite of bssn_rhs and helper kernels (faster)
|
||||||
|
## 0 : fall back to original Fortran kernels
|
||||||
|
USE_CXX_KERNELS ?= 1
|
||||||
f90 = ifx
|
f90 = ifx
|
||||||
f77 = ifx
|
f77 = ifx
|
||||||
CXX = icpx
|
CXX = icpx
|
||||||
|
|||||||
72
generate_interp_lb_header.py
Normal file
72
generate_interp_lb_header.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Convert interp_lb_profile.bin to a C header for compile-time embedding."""
|
||||||
|
import struct, sys
|
||||||
|
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
print(f"Usage: {sys.argv[0]} <profile.bin> <output.h>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
with open(sys.argv[1], 'rb') as f:
|
||||||
|
magic, version, nprocs, num_heavy = struct.unpack('IIii', f.read(16))
|
||||||
|
threshold = struct.unpack('d', f.read(8))[0]
|
||||||
|
times = list(struct.unpack(f'{nprocs}d', f.read(nprocs * 8)))
|
||||||
|
heavy = list(struct.unpack(f'{num_heavy}i', f.read(num_heavy * 4)))
|
||||||
|
|
||||||
|
# For each heavy rank, compute split: left half -> lighter neighbor, right half -> heavy rank
|
||||||
|
# (or vice versa depending on which neighbor is lighter)
|
||||||
|
splits = []
|
||||||
|
for hr in heavy:
|
||||||
|
prev_t = times[hr - 1] if hr > 0 else 1e30
|
||||||
|
next_t = times[hr + 1] if hr < nprocs - 1 else 1e30
|
||||||
|
if prev_t <= next_t:
|
||||||
|
splits.append((hr, hr - 1, hr)) # (block_id, r_left, r_right)
|
||||||
|
else:
|
||||||
|
splits.append((hr, hr, hr + 1))
|
||||||
|
|
||||||
|
# Also remap the displaced neighbor blocks
|
||||||
|
remaps = {}
|
||||||
|
for hr, r_l, r_r in splits:
|
||||||
|
if r_l != hr:
|
||||||
|
# We took r_l's slot, so remap block r_l to its other neighbor
|
||||||
|
displaced = r_l
|
||||||
|
if displaced > 0 and displaced - 1 not in [s[0] for s in splits]:
|
||||||
|
remaps[displaced] = displaced - 1
|
||||||
|
elif displaced < nprocs - 1:
|
||||||
|
remaps[displaced] = displaced + 1
|
||||||
|
else:
|
||||||
|
displaced = r_r
|
||||||
|
if displaced < nprocs - 1 and displaced + 1 not in [s[0] for s in splits]:
|
||||||
|
remaps[displaced] = displaced + 1
|
||||||
|
elif displaced > 0:
|
||||||
|
remaps[displaced] = displaced - 1
|
||||||
|
|
||||||
|
with open(sys.argv[2], 'w') as out:
|
||||||
|
out.write("/* Auto-generated from interp_lb_profile.bin — do not edit */\n")
|
||||||
|
out.write("#ifndef INTERP_LB_PROFILE_DATA_H\n")
|
||||||
|
out.write("#define INTERP_LB_PROFILE_DATA_H\n\n")
|
||||||
|
out.write(f"#define INTERP_LB_NPROCS {nprocs}\n")
|
||||||
|
out.write(f"#define INTERP_LB_NUM_HEAVY {num_heavy}\n\n")
|
||||||
|
out.write(f"static const int interp_lb_heavy_blocks[{num_heavy}] = {{")
|
||||||
|
out.write(", ".join(str(h) for h in heavy))
|
||||||
|
out.write("};\n\n")
|
||||||
|
out.write("/* Split table: {block_id, r_left, r_right} */\n")
|
||||||
|
out.write(f"static const int interp_lb_splits[{num_heavy}][3] = {{\n")
|
||||||
|
for bid, rl, rr in splits:
|
||||||
|
out.write(f" {{{bid}, {rl}, {rr}}},\n")
|
||||||
|
out.write("};\n\n")
|
||||||
|
out.write("/* Rank remap for displaced neighbor blocks */\n")
|
||||||
|
out.write(f"static const int interp_lb_num_remaps = {len(remaps)};\n")
|
||||||
|
out.write(f"static const int interp_lb_remaps[][2] = {{\n")
|
||||||
|
for src, dst in sorted(remaps.items()):
|
||||||
|
out.write(f" {{{src}, {dst}}},\n")
|
||||||
|
if not remaps:
|
||||||
|
out.write(" {-1, -1},\n")
|
||||||
|
out.write("};\n\n")
|
||||||
|
out.write("#endif /* INTERP_LB_PROFILE_DATA_H */\n")
|
||||||
|
|
||||||
|
print(f"Generated {sys.argv[2]}:")
|
||||||
|
print(f" {num_heavy} heavy blocks to split: {heavy}")
|
||||||
|
for bid, rl, rr in splits:
|
||||||
|
print(f" block {bid}: split -> rank {rl} (left), rank {rr} (right)")
|
||||||
|
for src, dst in sorted(remaps.items()):
|
||||||
|
print(f" block {src}: remap -> rank {dst}")
|
||||||
@@ -69,7 +69,7 @@ def makefile_ABE():
|
|||||||
|
|
||||||
## Build command with CPU binding to nohz_full cores
|
## Build command with CPU binding to nohz_full cores
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABE"
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} INTERP_LB_MODE=optimize ABE"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
makefile_command = f"{NUMACTL_CPU_BIND} make -j{BUILD_JOBS} ABEGPU"
|
||||||
else:
|
else:
|
||||||
|
|||||||
BIN
pgo_profile/TwoPunctureABE.profdata
Normal file
BIN
pgo_profile/TwoPunctureABE.profdata
Normal file
Binary file not shown.
Binary file not shown.
BIN
pgo_profile/default.profdata-f
Normal file
BIN
pgo_profile/default.profdata-f
Normal file
Binary file not shown.
BIN
pgo_profile/default_9726420327935033477_0.profraw
Normal file
BIN
pgo_profile/default_9726420327935033477_0.profraw
Normal file
Binary file not shown.
Reference in New Issue
Block a user