Enable Z4C + Shell-Patch GPU coexistence (Phase 3)
Remove the compile-time #error that blocked USE_CUDA_Z4C + WithShell. Add GPU-to-CPU state sync at the start of both Z4C Step functions (non-CPBC and CPBC) so shell CPU consumers read valid field data after Cartesian GPU RHS with resident state. Move bssn_cuda_use_resident_sync and bssn_cuda_download_level_state _if_present from anonymous namespace to file scope in bssn_class.C so derived classes (Z4C) can call them. Declare both in bssn_rhs_cuda.h. Include bssn_rhs_cuda.h in Z4c_class.C. Z4C shell RHS remains on CPU (Fortran Z4c_rhs_ss.f90) pending future GPU kernel implementation. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,9 @@ using namespace std;
|
|||||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||||
#include "z4c_rhs_cuda.h"
|
#include "z4c_rhs_cuda.h"
|
||||||
#endif
|
#endif
|
||||||
|
#if USE_CUDA_BSSN
|
||||||
|
#include "bssn_rhs_cuda.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef With_AHF
|
#ifdef With_AHF
|
||||||
#include "derivatives.h"
|
#include "derivatives.h"
|
||||||
@@ -187,9 +190,6 @@ Z4c_class::~Z4c_class()
|
|||||||
// for sommerfeld boundary
|
// for sommerfeld boundary
|
||||||
|
|
||||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
#if USE_CUDA_Z4C && (ABEtype == 2)
|
||||||
#ifdef WithShell
|
|
||||||
#error "USE_CUDA_Z4C resident path currently supports Cartesian non-shell Z4C only"
|
|
||||||
#endif
|
|
||||||
#if (MRBD == 2)
|
#if (MRBD == 2)
|
||||||
#error "USE_CUDA_Z4C resident path does not support MRBD == 2"
|
#error "USE_CUDA_Z4C resident path does not support MRBD == 2"
|
||||||
#endif
|
#endif
|
||||||
@@ -490,6 +490,14 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int pre = 0, cor = 1;
|
int pre = 0, cor = 1;
|
||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
|
#ifdef WithShell
|
||||||
|
if (bssn_cuda_use_resident_sync(lev))
|
||||||
|
{
|
||||||
|
for (int dl = 0; dl < GH->levels; dl++)
|
||||||
|
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
while (Pp)
|
while (Pp)
|
||||||
{
|
{
|
||||||
@@ -915,6 +923,13 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WithShell
|
#ifdef WithShell
|
||||||
|
#if USE_CUDA_Z4C
|
||||||
|
if (bssn_cuda_use_resident_sync(lev))
|
||||||
|
{
|
||||||
|
for (int dl = 0; dl < GH->levels; dl++)
|
||||||
|
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// evolve Shell Patches
|
// evolve Shell Patches
|
||||||
if (lev == 0)
|
if (lev == 0)
|
||||||
{
|
{
|
||||||
@@ -1622,9 +1637,7 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// for constraint preserving boundary (CPBC)
|
// for constraint preserving boundary (CPBC)
|
||||||
#if USE_CUDA_Z4C && (ABEtype == 2)
|
// Note: CPBC path uses CPU Fortran RHS; GPU resident sync is a no-op here.
|
||||||
#error "USE_CUDA_Z4C resident path does not support CPBC"
|
|
||||||
#endif
|
|
||||||
#ifndef WithShell
|
#ifndef WithShell
|
||||||
#error "CPBC only supports Shell"
|
#error "CPBC only supports Shell"
|
||||||
#endif
|
#endif
|
||||||
@@ -1654,6 +1667,14 @@ void Z4c_class::Step(int lev, int YN)
|
|||||||
int pre = 0, cor = 1;
|
int pre = 0, cor = 1;
|
||||||
int ERROR = 0;
|
int ERROR = 0;
|
||||||
|
|
||||||
|
#if USE_CUDA_Z4C && defined(WithShell)
|
||||||
|
if (bssn_cuda_use_resident_sync(lev))
|
||||||
|
{
|
||||||
|
for (int dl = 0; dl < GH->levels; dl++)
|
||||||
|
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
MyList<ss_patch> *sPp;
|
MyList<ss_patch> *sPp;
|
||||||
// Predictor
|
// Predictor
|
||||||
MyList<Patch> *Pp = GH->PatL[lev];
|
MyList<Patch> *Pp = GH->PatL[lev];
|
||||||
|
|||||||
@@ -548,6 +548,8 @@ bool fill_bssn_cuda_views_count(Block *cg, MyList<var> *vars,
|
|||||||
return idx == state_count && vars == 0;
|
return idx == state_count && vars == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
bool bssn_cuda_use_resident_sync(int lev)
|
bool bssn_cuda_use_resident_sync(int lev)
|
||||||
{
|
{
|
||||||
(void)lev;
|
(void)lev;
|
||||||
@@ -1032,7 +1034,6 @@ void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !USE_CUDA_BSSN
|
#if !USE_CUDA_BSSN
|
||||||
|
|||||||
@@ -404,6 +404,10 @@ void bssn_cuda_release_step_ctx(void *block_tag);
|
|||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
// C++-only helpers declared for derived equation classes (Z4C, etc.)
|
||||||
|
// Defined in bssn_class.C. Requires MyList, Patch, var from including TU.
|
||||||
|
bool bssn_cuda_use_resident_sync(int lev);
|
||||||
|
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user