Enable Z4C + Shell-Patch GPU coexistence (Phase 3)

Remove the compile-time #error that blocked USE_CUDA_Z4C + WithShell.
Add GPU-to-CPU state sync at the start of both Z4C Step functions
(non-CPBC and CPBC) so shell CPU consumers read valid field data
after Cartesian GPU RHS with resident state.

Move bssn_cuda_use_resident_sync and bssn_cuda_download_level_state
_if_present from anonymous namespace to file scope in bssn_class.C
so derived classes (Z4C) can call them. Declare both in
bssn_rhs_cuda.h. Include bssn_rhs_cuda.h in Z4c_class.C.

Z4C shell RHS remains on CPU (Fortran Z4c_rhs_ss.f90) pending
future GPU kernel implementation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-10 12:08:02 +08:00
parent 0ca86afd41
commit c4194214c6
3 changed files with 645 additions and 619 deletions

View File

@@ -36,6 +36,9 @@ using namespace std;
#if USE_CUDA_Z4C && (ABEtype == 2) #if USE_CUDA_Z4C && (ABEtype == 2)
#include "z4c_rhs_cuda.h" #include "z4c_rhs_cuda.h"
#endif #endif
#if USE_CUDA_BSSN
#include "bssn_rhs_cuda.h"
#endif
#ifdef With_AHF #ifdef With_AHF
#include "derivatives.h" #include "derivatives.h"
@@ -187,9 +190,6 @@ Z4c_class::~Z4c_class()
// for sommerfeld boundary // for sommerfeld boundary
#if USE_CUDA_Z4C && (ABEtype == 2) #if USE_CUDA_Z4C && (ABEtype == 2)
#ifdef WithShell
#error "USE_CUDA_Z4C resident path currently supports Cartesian non-shell Z4C only"
#endif
#if (MRBD == 2) #if (MRBD == 2)
#error "USE_CUDA_Z4C resident path does not support MRBD == 2" #error "USE_CUDA_Z4C resident path does not support MRBD == 2"
#endif #endif
@@ -490,6 +490,14 @@ void Z4c_class::Step(int lev, int YN)
int pre = 0, cor = 1; int pre = 0, cor = 1;
int ERROR = 0; int ERROR = 0;
#ifdef WithShell
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
MyList<Patch> *Pp = GH->PatL[lev]; MyList<Patch> *Pp = GH->PatL[lev];
while (Pp) while (Pp)
{ {
@@ -915,6 +923,13 @@ void Z4c_class::Step(int lev, int YN)
} }
#ifdef WithShell #ifdef WithShell
#if USE_CUDA_Z4C
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
// evolve Shell Patches // evolve Shell Patches
if (lev == 0) if (lev == 0)
{ {
@@ -1622,9 +1637,7 @@ void Z4c_class::Step(int lev, int YN)
} }
#else #else
// for constraint preserving boundary (CPBC) // for constraint preserving boundary (CPBC)
#if USE_CUDA_Z4C && (ABEtype == 2) // Note: CPBC path uses CPU Fortran RHS; GPU resident sync is a no-op here.
#error "USE_CUDA_Z4C resident path does not support CPBC"
#endif
#ifndef WithShell #ifndef WithShell
#error "CPBC only supports Shell" #error "CPBC only supports Shell"
#endif #endif
@@ -1654,6 +1667,14 @@ void Z4c_class::Step(int lev, int YN)
int pre = 0, cor = 1; int pre = 0, cor = 1;
int ERROR = 0; int ERROR = 0;
#if USE_CUDA_Z4C && defined(WithShell)
if (bssn_cuda_use_resident_sync(lev))
{
for (int dl = 0; dl < GH->levels; dl++)
bssn_cuda_download_level_state_if_present(GH->PatL[dl], StateList, myrank);
}
#endif
MyList<ss_patch> *sPp; MyList<ss_patch> *sPp;
// Predictor // Predictor
MyList<Patch> *Pp = GH->PatL[lev]; MyList<Patch> *Pp = GH->PatL[lev];

View File

@@ -548,6 +548,8 @@ bool fill_bssn_cuda_views_count(Block *cg, MyList<var> *vars,
return idx == state_count && vars == 0; return idx == state_count && vars == 0;
} }
} // namespace
bool bssn_cuda_use_resident_sync(int lev) bool bssn_cuda_use_resident_sync(int lev)
{ {
(void)lev; (void)lev;
@@ -1032,7 +1034,6 @@ void bssn_cuda_sync_level_bh_fields(MyList<Patch> *PatL,
} }
} }
} // namespace
#endif #endif
#if !USE_CUDA_BSSN #if !USE_CUDA_BSSN

View File

@@ -404,6 +404,10 @@ void bssn_cuda_release_step_ctx(void *block_tag);
#ifdef __cplusplus #ifdef __cplusplus
} }
// C++-only helpers declared for derived equation classes (Z4C, etc.)
// Defined in bssn_class.C. Requires MyList, Patch, var from including TU.
bool bssn_cuda_use_resident_sync(int lev);
void bssn_cuda_download_level_state_if_present(MyList<Patch> *PatL, MyList<var> *vars, int myrank);
#endif #endif
#endif #endif