[WIP] Add CUDA support for Z4C
Rewritten done by Codex. This still has errors, do not pick this one now.
This commit is contained in:
@@ -26,12 +26,20 @@ using namespace std;
|
||||
#include "shellfunctions.h"
|
||||
#include "cpbc.h"
|
||||
#include "kodiss.h"
|
||||
#include "parameters.h"
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
#include "parameters.h"
|
||||
|
||||
#ifndef USE_CUDA_Z4C
|
||||
#define USE_CUDA_Z4C 0
|
||||
#endif
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#include "z4c_rhs_cuda.h"
|
||||
#endif
|
||||
|
||||
#ifdef With_AHF
|
||||
#include "derivatives.h"
|
||||
#include "myglobal.h"
|
||||
#endif
|
||||
|
||||
//================================================================================================
|
||||
|
||||
@@ -167,12 +175,554 @@ Z4c_class::~Z4c_class()
|
||||
|
||||
#define MRBD 0 // 0: fix BD for meshrefinement level; 1: sommerfeld_bam for them; 2: sommerfeld_yo for them
|
||||
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifndef CPBC
|
||||
// for sommerfeld boundary
|
||||
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#ifdef WithShell
|
||||
#error "USE_CUDA_Z4C resident path currently supports Cartesian non-shell Z4C only"
|
||||
#endif
|
||||
#if (MRBD == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support MRBD == 2"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
static const int k_z4c_cuda_bh_state_indices[3] = {18, 19, 20};
|
||||
|
||||
bool fill_z4c_cuda_views(Block *cg, MyList<var> *vars,
|
||||
double **host_views,
|
||||
double *propspeeds = 0,
|
||||
double *soa_flat = 0)
|
||||
{
|
||||
int idx = 0;
|
||||
while (vars && idx < Z4C_CUDA_STATE_COUNT)
|
||||
{
|
||||
host_views[idx] = cg->fgfs[vars->data->sgfn];
|
||||
if (propspeeds)
|
||||
propspeeds[idx] = vars->data->propspeed;
|
||||
if (soa_flat)
|
||||
{
|
||||
soa_flat[3 * idx + 0] = vars->data->SoA[0];
|
||||
soa_flat[3 * idx + 1] = vars->data->SoA[1];
|
||||
soa_flat[3 * idx + 2] = vars->data->SoA[2];
|
||||
}
|
||||
vars = vars->next;
|
||||
++idx;
|
||||
}
|
||||
return idx == Z4C_CUDA_STATE_COUNT && vars == 0;
|
||||
}
|
||||
|
||||
void z4c_cuda_download_level_state(MyList<Patch> *PatL, MyList<var> *vars, int myrank, bool release_ctx)
|
||||
{
|
||||
MyList<Patch> *Pp = PatL;
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank && z4c_cuda_has_resident_state(cg))
|
||||
{
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, vars, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on resident state download" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (z4c_cuda_download_resident_state(cg, cg->shape, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C resident state download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (release_ctx)
|
||||
z4c_cuda_release_step_ctx(cg);
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
}
|
||||
|
||||
bool z4c_cuda_patch_contains_point(Patch *patch, const double *point)
|
||||
{
|
||||
if (!patch)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
const double h = patch->getdX(d);
|
||||
const double lo = patch->bbox[d] + patch->lli[d] * h;
|
||||
const double hi = patch->bbox[dim + d] - patch->uui[d] * h;
|
||||
if (point[d] < lo || point[d] > hi)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_point_in_block(Patch *patch, Block *block,
|
||||
const double *point, const double *DH)
|
||||
{
|
||||
if (!patch || !block)
|
||||
return false;
|
||||
for (int d = 0; d < dim; d++)
|
||||
{
|
||||
double llb;
|
||||
double uub;
|
||||
#ifdef Vertex
|
||||
#ifdef Cell
|
||||
#error Both Cell and Vertex are defined
|
||||
#endif
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + (ghost_width - 0.5) * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - (ghost_width - 0.5) * DH[d];
|
||||
#else
|
||||
#ifdef Cell
|
||||
llb = (feq(block->bbox[d], patch->bbox[d], DH[d] / 2))
|
||||
? block->bbox[d] + patch->lli[d] * DH[d]
|
||||
: block->bbox[d] + ghost_width * DH[d];
|
||||
uub = (feq(block->bbox[dim + d], patch->bbox[dim + d], DH[d] / 2))
|
||||
? block->bbox[dim + d] - patch->uui[d] * DH[d]
|
||||
: block->bbox[dim + d] - ghost_width * DH[d];
|
||||
#else
|
||||
#error Not define Vertex nor Cell
|
||||
#endif
|
||||
#endif
|
||||
if (point[d] - llb < -DH[d] / 2 || point[d] - uub > DH[d] / 2)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int z4c_cuda_interp_tile_start(const double *coords, int n, double x, double dx, int ordn)
|
||||
{
|
||||
if (!coords || n <= ordn)
|
||||
return 0;
|
||||
int cxi = int((x - coords[0]) / dx + 0.4) + 1;
|
||||
int start = cxi - ordn / 2;
|
||||
if (start < 0)
|
||||
start = 0;
|
||||
const int max_start = n - ordn;
|
||||
if (start > max_start)
|
||||
start = max_start;
|
||||
return start;
|
||||
}
|
||||
|
||||
bool z4c_cuda_interp_bh_point_resident(MyList<Patch> *PatL,
|
||||
int myrank,
|
||||
const double *point,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry,
|
||||
double *shellf)
|
||||
{
|
||||
const int ordn = 2 * ghost_width;
|
||||
int owner_rank = -1;
|
||||
|
||||
shellf[0] = shellf[1] = shellf[2] = 0.0;
|
||||
|
||||
MyList<Patch> *PL = PatL;
|
||||
while (PL)
|
||||
{
|
||||
Patch *patch = PL->data;
|
||||
if (!z4c_cuda_patch_contains_point(patch, point))
|
||||
{
|
||||
PL = PL->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
double DH[dim];
|
||||
for (int d = 0; d < dim; d++)
|
||||
DH[d] = patch->getdX(d);
|
||||
|
||||
MyList<Block> *BP = patch->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *block = BP->data;
|
||||
if (z4c_cuda_point_in_block(patch, block, point, DH))
|
||||
{
|
||||
owner_rank = block->rank;
|
||||
if (myrank == owner_rank)
|
||||
{
|
||||
int interp_ordn = ordn;
|
||||
int interp_sym = Symmetry;
|
||||
double x = point[0];
|
||||
double y = point[1];
|
||||
double z = point[2];
|
||||
|
||||
if (z4c_cuda_has_resident_state(block) &&
|
||||
block->shape[0] >= ordn && block->shape[1] >= ordn && block->shape[2] >= ordn)
|
||||
{
|
||||
const int sx = ordn;
|
||||
const int sy = ordn;
|
||||
const int sz = ordn;
|
||||
const int region_all = sx * sy * sz;
|
||||
const int i0 = z4c_cuda_interp_tile_start(block->X[0], block->shape[0], x, DH[0], ordn);
|
||||
const int j0 = z4c_cuda_interp_tile_start(block->X[1], block->shape[1], y, DH[1], ordn);
|
||||
const int k0 = z4c_cuda_interp_tile_start(block->X[2], block->shape[2], z, DH[2], ordn);
|
||||
double *packed_fields = new double[3 * region_all];
|
||||
var *vars[3] = {forx, fory, forz};
|
||||
for (int f = 0; f < 3; f++)
|
||||
{
|
||||
if (z4c_cuda_pack_state_region_to_host_buffer(block,
|
||||
k_z4c_cuda_bh_state_indices[f],
|
||||
packed_fields + f * region_all,
|
||||
block->shape,
|
||||
i0, j0, k0,
|
||||
sx, sy, sz) != 0)
|
||||
{
|
||||
delete[] packed_fields;
|
||||
cout << "CUDA Z4C BH tile download failed" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int tile_shape[3] = {sx, sy, sz};
|
||||
f_global_interp(tile_shape,
|
||||
block->X[0] + i0,
|
||||
block->X[1] + j0,
|
||||
block->X[2] + k0,
|
||||
packed_fields + f * region_all,
|
||||
shellf[f],
|
||||
x, y, z,
|
||||
interp_ordn,
|
||||
vars[f]->SoA,
|
||||
interp_sym);
|
||||
}
|
||||
delete[] packed_fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forx->sgfn], shellf[0],
|
||||
x, y, z, interp_ordn, forx->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[fory->sgfn], shellf[1],
|
||||
x, y, z, interp_ordn, fory->SoA, interp_sym);
|
||||
f_global_interp(block->shape, block->X[0], block->X[1], block->X[2],
|
||||
block->fgfs[forz->sgfn], shellf[2],
|
||||
x, y, z, interp_ordn, forz->SoA, interp_sym);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (BP == patch->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
|
||||
if (owner_rank >= 0)
|
||||
break;
|
||||
PL = PL->next;
|
||||
}
|
||||
|
||||
if (owner_rank < 0)
|
||||
return false;
|
||||
|
||||
MPI_Bcast(shellf, 3, MPI_DOUBLE, owner_rank, MPI_COMM_WORLD);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool z4c_cuda_compute_porg_rhs_resident(cgh *GH,
|
||||
int ilev,
|
||||
int myrank,
|
||||
int BH_num,
|
||||
double **BH_PS,
|
||||
double **BH_RHS,
|
||||
var *forx, var *fory, var *forz,
|
||||
int Symmetry)
|
||||
{
|
||||
for (int n = 0; n < BH_num; n++)
|
||||
{
|
||||
double shellf[3] = {0.0, 0.0, 0.0};
|
||||
int lev = ilev;
|
||||
while (lev >= 0 &&
|
||||
!z4c_cuda_interp_bh_point_resident(GH->PatL[lev], myrank, BH_PS[n],
|
||||
forx, fory, forz, Symmetry, shellf))
|
||||
{
|
||||
--lev;
|
||||
}
|
||||
if (lev < 0)
|
||||
return false;
|
||||
BH_RHS[n][0] = -shellf[0];
|
||||
BH_RHS[n][1] = -shellf[1];
|
||||
BH_RHS[n][2] = -shellf[2];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
bool BB = fgt(PhysTime, StartTime, dT_lev / 2);
|
||||
double ndeps = numepss;
|
||||
if (lev < GH->movls)
|
||||
ndeps = numepsb;
|
||||
double TRK4 = PhysTime;
|
||||
int iter_count = 0;
|
||||
int pre = 0, cor = 1;
|
||||
int ERROR = 0;
|
||||
|
||||
MyList<Patch> *Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, StateList, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_pre, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on predictor step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, pre,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C predictor substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in predictor at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_pre, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
compute_Porg_rhs(Porg0, Porg_rhs, Sfx0, Sfy0, Sfz0, lev);
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg[ithBH][2] = fabs(Porg[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg[ithBH][0] = fabs(Porg[ithBH][0]);
|
||||
Porg[ithBH][1] = fabs(Porg[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((lev == a_lev) && (LastAnas + dT_lev >= AnasTime))
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_pre, myrank, false);
|
||||
if (lev == a_lev)
|
||||
AnalysisStuff(lev, dT_lev);
|
||||
|
||||
for (iter_count = 1; iter_count < 4; iter_count++)
|
||||
{
|
||||
if (iter_count == 1 || iter_count == 3)
|
||||
TRK4 += dT_lev / 2;
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
if (myrank == cg->rank)
|
||||
{
|
||||
double *state_in[Z4C_CUDA_STATE_COUNT];
|
||||
double *state_out[Z4C_CUDA_STATE_COUNT];
|
||||
double propspeed[Z4C_CUDA_STATE_COUNT];
|
||||
double soa_flat[3 * Z4C_CUDA_STATE_COUNT];
|
||||
if (!fill_z4c_cuda_views(cg, SynchList_pre, state_in, propspeed, soa_flat) ||
|
||||
!fill_z4c_cuda_views(cg, SynchList_cor, state_out))
|
||||
{
|
||||
cout << "CUDA Z4C state list mismatch on corrector step" << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int apply_bam_bc = 0;
|
||||
#if (MRBD == 0)
|
||||
#if (SommerType == 0)
|
||||
apply_bam_bc = (lev == 0) ? 1 : 0;
|
||||
#endif
|
||||
#elif (MRBD == 1)
|
||||
apply_bam_bc = 1;
|
||||
#endif
|
||||
int keep_resident_state = 1;
|
||||
int apply_enforce_ga = 0;
|
||||
#if (AGM == 0)
|
||||
apply_enforce_ga = 1;
|
||||
#elif (AGM == 1)
|
||||
apply_enforce_ga = (iter_count == 3) ? 1 : 0;
|
||||
#endif
|
||||
if (z4c_cuda_rk4_substep(cg,
|
||||
cg->shape, cg->X[0], cg->X[1], cg->X[2],
|
||||
state_in, state_out,
|
||||
propspeed, soa_flat, Pp->data->bbox,
|
||||
dT_lev, TRK4, iter_count, apply_bam_bc,
|
||||
Symmetry, lev, ndeps, cor,
|
||||
keep_resident_state, apply_enforce_ga, chitiny))
|
||||
{
|
||||
cout << "CUDA Z4C corrector substep failed in domain: ("
|
||||
<< cg->bbox[0] << ":" << cg->bbox[3] << ","
|
||||
<< cg->bbox[1] << ":" << cg->bbox[4] << ","
|
||||
<< cg->bbox[2] << ":" << cg->bbox[5] << ")" << endl;
|
||||
ERROR = 1;
|
||||
}
|
||||
}
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
|
||||
{
|
||||
int erh = ERROR;
|
||||
MPI_Allreduce(&erh, &ERROR, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
|
||||
}
|
||||
if (ERROR)
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed in RK4 substep#" << iter_count
|
||||
<< " at t = " << PhysTime
|
||||
<< ", lev = " << lev << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
Parallel::Sync(GH->PatL[lev], SynchList_cor, Symmetry);
|
||||
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
if (!z4c_cuda_compute_porg_rhs_resident(GH, lev, myrank, BH_num,
|
||||
Porg, Porg1,
|
||||
Sfx, Sfy, Sfz, Symmetry))
|
||||
{
|
||||
if (myrank == 0 && ErrorMonitor->outfile)
|
||||
ErrorMonitor->outfile << "CUDA Z4C failed to interpolate black-hole shift at t = "
|
||||
<< PhysTime << endl;
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][0], Porg1[ithBH][0], Porg_rhs[ithBH][0], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][1], Porg1[ithBH][1], Porg_rhs[ithBH][1], iter_count);
|
||||
f_rungekutta4_scalar(dT_lev, Porg0[ithBH][2], Porg1[ithBH][2], Porg_rhs[ithBH][2], iter_count);
|
||||
if (Symmetry > 0)
|
||||
Porg1[ithBH][2] = fabs(Porg1[ithBH][2]);
|
||||
if (Symmetry == 2)
|
||||
{
|
||||
Porg1[ithBH][0] = fabs(Porg1[ithBH][0]);
|
||||
Porg1[ithBH][1] = fabs(Porg1[ithBH][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (iter_count < 3)
|
||||
{
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(SynchList_pre, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
z4c_cuda_download_level_state(GH->PatL[lev], SynchList_cor, myrank, true);
|
||||
|
||||
#if (RPS == 0)
|
||||
RestrictProlong(lev, YN, BB);
|
||||
#endif
|
||||
|
||||
Pp = GH->PatL[lev];
|
||||
while (Pp)
|
||||
{
|
||||
MyList<Block> *BP = Pp->data->blb;
|
||||
while (BP)
|
||||
{
|
||||
Block *cg = BP->data;
|
||||
cg->swapList(StateList, SynchList_cor, myrank);
|
||||
cg->swapList(OldStateList, SynchList_cor, myrank);
|
||||
if (BP == Pp->data->ble)
|
||||
break;
|
||||
BP = BP->next;
|
||||
}
|
||||
Pp = Pp->next;
|
||||
}
|
||||
if (BH_num > 0 && lev == GH->levels - 1)
|
||||
{
|
||||
for (int ithBH = 0; ithBH < BH_num; ithBH++)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#else
|
||||
double dT_lev = dT * pow(0.5, Mymax(lev, trfls));
|
||||
#ifdef With_AHF
|
||||
AH_Step_Find(lev, dT_lev);
|
||||
#endif
|
||||
@@ -1039,15 +1589,19 @@ void Z4c_class::Step(int lev, int YN)
|
||||
{
|
||||
Porg0[ithBH][0] = Porg1[ithBH][0];
|
||||
Porg0[ithBH][1] = Porg1[ithBH][1];
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
Porg0[ithBH][2] = Porg1[ithBH][2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
// for constraint preserving boundary (CPBC)
|
||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||
#error "USE_CUDA_Z4C resident path does not support CPBC"
|
||||
#endif
|
||||
#ifndef WithShell
|
||||
#error "CPBC only supports Shell"
|
||||
#endif
|
||||
|
||||
// 0: extroplate rhs, 1: extroplate variable
|
||||
// 2: extroplate variable but before RHS calculation
|
||||
|
||||
Reference in New Issue
Block a user