merge lopsided+kodis
This commit is contained in:
@@ -912,23 +912,12 @@ static void gpu_fdderivs(double *d_f,
|
|||||||
kern_fdderivs<<<grid(all), BLK>>>(fh, d_fxx, d_fxy, d_fxz, d_fyy, d_fyz, d_fzz);
|
kern_fdderivs<<<grid(all), BLK>>>(fh, d_fxx, d_fxy, d_fxz, d_fyy, d_fyz, d_fzz);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* symmetry_bd on GPU for ord=3, then launch lopsided kernel */
|
/* Combined ord=3 advection + KO dissipation.
|
||||||
static void gpu_lopsided(double *d_f, double *d_f_rhs,
|
* When advection and KO use the same source field, symmetry packing is shared.
|
||||||
|
* If they differ (e.g. gxx advection + dxx KO), only KO repacks.
|
||||||
|
*/
|
||||||
|
static void gpu_lopsided_kodis(double *d_f_adv, double *d_f_ko, double *d_f_rhs,
|
||||||
double *d_Sfx, double *d_Sfy, double *d_Sfz,
|
double *d_Sfx, double *d_Sfy, double *d_Sfz,
|
||||||
double SoA0, double SoA1, double SoA2, int all)
|
|
||||||
{
|
|
||||||
double *fh = g_buf.d_fh3;
|
|
||||||
const size_t nx = (size_t)g_buf.prev_nx;
|
|
||||||
const size_t ny = (size_t)g_buf.prev_ny;
|
|
||||||
const size_t nz = (size_t)g_buf.prev_nz;
|
|
||||||
const size_t w_pack = (nx + 3ull) * (ny + 3ull) * (nz + 3ull);
|
|
||||||
|
|
||||||
kern_symbd_pack_ord3<<<grid(w_pack), BLK>>>(d_f, fh, SoA0, SoA1, SoA2);
|
|
||||||
kern_lopsided<<<grid(all), BLK>>>(fh, d_f_rhs, d_Sfx, d_Sfy, d_Sfz);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* symmetry_bd on GPU for ord=3, then launch kodis kernel */
|
|
||||||
static void gpu_kodis(double *d_f, double *d_f_rhs,
|
|
||||||
double SoA0, double SoA1, double SoA2,
|
double SoA0, double SoA1, double SoA2,
|
||||||
double eps_val, int all)
|
double eps_val, int all)
|
||||||
{
|
{
|
||||||
@@ -938,9 +927,16 @@ static void gpu_kodis(double *d_f, double *d_f_rhs,
|
|||||||
const size_t nz = (size_t)g_buf.prev_nz;
|
const size_t nz = (size_t)g_buf.prev_nz;
|
||||||
const size_t w_pack = (nx + 3ull) * (ny + 3ull) * (nz + 3ull);
|
const size_t w_pack = (nx + 3ull) * (ny + 3ull) * (nz + 3ull);
|
||||||
|
|
||||||
kern_symbd_pack_ord3<<<grid(w_pack), BLK>>>(d_f, fh, SoA0, SoA1, SoA2);
|
kern_symbd_pack_ord3<<<grid(w_pack), BLK>>>(d_f_adv, fh, SoA0, SoA1, SoA2);
|
||||||
|
kern_lopsided<<<grid(all), BLK>>>(fh, d_f_rhs, d_Sfx, d_Sfy, d_Sfz);
|
||||||
|
|
||||||
|
if (eps_val > 0.0) {
|
||||||
|
if (d_f_ko != d_f_adv) {
|
||||||
|
kern_symbd_pack_ord3<<<grid(w_pack), BLK>>>(d_f_ko, fh, SoA0, SoA1, SoA2);
|
||||||
|
}
|
||||||
kern_kodis<<<grid(all), BLK>>>(fh, d_f_rhs, eps_val);
|
kern_kodis<<<grid(all), BLK>>>(fh, d_f_rhs, eps_val);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ================================================================== */
|
/* ================================================================== */
|
||||||
/* C. Point-wise computation kernels */
|
/* C. Point-wise computation kernels */
|
||||||
@@ -2466,62 +2462,32 @@ int f_compute_rhs_bssn(int *ex, double &T,
|
|||||||
D(S_f_arr), D(S_S_arr));
|
D(S_f_arr), D(S_S_arr));
|
||||||
|
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
/* Phase 16: 23x lopsided (advection) */
|
/* Phase 16/17: advection + KO dissipation (shared ord=3 pack) */
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
gpu_lopsided(D(S_gxx), D(S_gxx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_gxx), D(S_dxx), D(S_gxx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Gamz), D(S_Gamz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, all);
|
gpu_lopsided_kodis(D(S_Gamz), D(S_Gamz), D(S_Gamz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_gxy), D(S_gxy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, all);
|
gpu_lopsided_kodis(D(S_gxy), D(S_gxy), D(S_gxy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Lap), D(S_Lap_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_Lap), D(S_Lap), D(S_Lap_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_gxz), D(S_gxz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, all);
|
gpu_lopsided_kodis(D(S_gxz), D(S_gxz), D(S_gxz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_betax), D(S_betax_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_betax), D(S_betax), D(S_betax_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_gyy), D(S_gyy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_gyy), D(S_dyy), D(S_gyy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_betay), D(S_betay_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, all);
|
gpu_lopsided_kodis(D(S_betay), D(S_betay), D(S_betay_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_gyz), D(S_gyz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, all);
|
gpu_lopsided_kodis(D(S_gyz), D(S_gyz), D(S_gyz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_betaz), D(S_betaz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, all);
|
gpu_lopsided_kodis(D(S_betaz), D(S_betaz), D(S_betaz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_gzz), D(S_gzz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_gzz), D(S_dzz), D(S_gzz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_dtSfx), D(S_dtSfx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_dtSfx), D(S_dtSfx), D(S_dtSfx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Axx), D(S_Axx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_Axx), D(S_Axx), D(S_Axx_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_dtSfy), D(S_dtSfy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, all);
|
gpu_lopsided_kodis(D(S_dtSfy), D(S_dtSfy), D(S_dtSfy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Axy), D(S_Axy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, all);
|
gpu_lopsided_kodis(D(S_Axy), D(S_Axy), D(S_Axy_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,ANTI,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_dtSfz), D(S_dtSfz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, all);
|
gpu_lopsided_kodis(D(S_dtSfz), D(S_dtSfz), D(S_dtSfz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_Axz), D(S_Axz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, all);
|
gpu_lopsided_kodis(D(S_Axz), D(S_Axz), D(S_Axz_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_Ayy), D(S_Ayy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_Ayy), D(S_Ayy), D(S_Ayy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Ayz), D(S_Ayz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, all);
|
gpu_lopsided_kodis(D(S_Ayz), D(S_Ayz), D(S_Ayz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,ANTI, eps, all);
|
||||||
gpu_lopsided(D(S_Azz), D(S_Azz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_Azz), D(S_Azz), D(S_Azz_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_chi), D(S_chi_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_chi), D(S_chi), D(S_chi_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_trK), D(S_trK_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_trK), D(S_trK), D(S_trK_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Gamx), D(S_Gamx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, all);
|
gpu_lopsided_kodis(D(S_Gamx), D(S_Gamx), D(S_Gamx_rhs), D(S_betax),D(S_betay),D(S_betaz), ANTI,SYM,SYM, eps, all);
|
||||||
gpu_lopsided(D(S_Gamy), D(S_Gamy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, all);
|
gpu_lopsided_kodis(D(S_Gamy), D(S_Gamy), D(S_Gamy_rhs), D(S_betax),D(S_betay),D(S_betaz), SYM,ANTI,SYM, eps, all);
|
||||||
|
|
||||||
/* ============================================================ */
|
|
||||||
/* Phase 17: 24x KO dissipation (eps > 0) */
|
|
||||||
/* ============================================================ */
|
|
||||||
if (eps > 0) {
|
|
||||||
gpu_kodis(D(S_chi), D(S_chi_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_trK), D(S_trK_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_dxx), D(S_gxx_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_gxy), D(S_gxy_rhs), ANTI,ANTI,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_gxz), D(S_gxz_rhs), ANTI,SYM,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_dyy), D(S_gyy_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_gyz), D(S_gyz_rhs), SYM,ANTI,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_dzz), D(S_gzz_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Axx), D(S_Axx_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_dtSfz), D(S_dtSfz_rhs), SYM,SYM,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_Axy), D(S_Axy_rhs), ANTI,ANTI,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_dtSfy), D(S_dtSfy_rhs), SYM,ANTI,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Axz), D(S_Axz_rhs), ANTI,SYM,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_dtSfx), D(S_dtSfx_rhs), ANTI,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Ayy), D(S_Ayy_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_betaz), D(S_betaz_rhs), SYM,SYM,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_Ayz), D(S_Ayz_rhs), SYM,ANTI,ANTI, eps, all);
|
|
||||||
gpu_kodis(D(S_betay), D(S_betay_rhs), SYM,ANTI,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Azz), D(S_Azz_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_betax), D(S_betax_rhs), ANTI,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Gamx), D(S_Gamx_rhs), ANTI,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Lap), D(S_Lap_rhs), SYM,SYM,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Gamy), D(S_Gamy_rhs), SYM,ANTI,SYM, eps, all);
|
|
||||||
gpu_kodis(D(S_Gamz), D(S_Gamz_rhs), SYM,SYM,ANTI, eps, all);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ============================================================ */
|
/* ============================================================ */
|
||||||
/* Phase 18: Hamilton & momentum constraints (co==0) */
|
/* Phase 18: Hamilton & momentum constraints (co==0) */
|
||||||
|
|||||||
Reference in New Issue
Block a user