Save Z4C CUDA transfer progress

This commit is contained in:
2026-05-01 18:51:19 +08:00
parent db9383e439
commit 30b778daa3
3 changed files with 28 additions and 2 deletions

View File

@@ -31,7 +31,7 @@ GPU_Part = 0.0
## Setting the physical system and numerical method ## Setting the physical system and numerical method
Symmetry = "equatorial-symmetry" ## Symmetry of System: choose equatorial-symmetry、no-symmetry、octant-symmetry Symmetry = "equatorial-symmetry" ## Symmetry of System: choose equatorial-symmetry、no-symmetry、octant-symmetry
Equation_Class = "BSSN" ## Evolution Equation: choose "BSSN", "BSSN-EScalar", "BSSN-EM", "Z4C" Equation_Class = "Z4C" ## Evolution Equation: choose "BSSN", "BSSN-EScalar", "BSSN-EM", "Z4C"
## If "BSSN-EScalar" is chosen, it is necessary to set other parameters below ## If "BSSN-EScalar" is chosen, it is necessary to set other parameters below
Initial_Data_Method = "Ansorg-TwoPuncture" ## initial data method: choose "Ansorg-TwoPuncture", "Lousto-Analytical", "Cao-Analytical", "KerrSchild-Analytical" Initial_Data_Method = "Ansorg-TwoPuncture" ## initial data method: choose "Ansorg-TwoPuncture", "Lousto-Analytical", "Cao-Analytical", "KerrSchild-Analytical"
Time_Evolution_Method = "runge-kutta-45" ## time evolution method: choose "runge-kutta-45" Time_Evolution_Method = "runge-kutta-45" ## time evolution method: choose "runge-kutta-45"

View File

@@ -10,7 +10,7 @@
#define GaussInt #define GaussInt
#define ABEtype 0 #define ABEtype 2
//#define With_AHF //#define With_AHF
#define Psi4type 0 #define Psi4type 0
@@ -167,3 +167,4 @@
#define TINY 1e-10 #define TINY 1e-10
#endif /* MICRODEF_H */ #endif /* MICRODEF_H */

View File

@@ -4676,6 +4676,18 @@ static void compute_patch_boundary_flags(int *ex,
static void upload_state_inputs(double **state_host, size_t all) static void upload_state_inputs(double **state_host, size_t all)
{ {
const size_t bytes = all * sizeof(double); const size_t bytes = all * sizeof(double);
static int direct_upload = -1;
if (direct_upload < 0) {
const char *env = getenv("AMSS_CUDA_DIRECT_STATE_UPLOAD");
direct_upload = env ? ((atoi(env) != 0) ? 1 : 0) : 1;
}
if (direct_upload) {
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
CUDA_CHECK(cudaMemcpyAsync(g_buf.slot[k_state_input_slots[i]], state_host[i],
bytes, cudaMemcpyHostToDevice));
}
return;
}
for (int i = 0; i < BSSN_STATE_COUNT; ++i) { for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
std::memcpy(g_buf.h_stage + (size_t)i * all, state_host[i], bytes); std::memcpy(g_buf.h_stage + (size_t)i * all, state_host[i], bytes);
} }
@@ -5259,6 +5271,19 @@ static void download_resident_state(void *block_tag, int *ex, double **state_hos
const size_t all = (size_t)ex[0] * ex[1] * ex[2]; const size_t all = (size_t)ex[0] * ex[1] * ex[2];
const size_t bytes = all * sizeof(double); const size_t bytes = all * sizeof(double);
StepContext &ctx = ensure_step_ctx(block_tag, all); StepContext &ctx = ensure_step_ctx(block_tag, all);
static int direct_download = -1;
if (direct_download < 0) {
const char *env = getenv("AMSS_CUDA_DIRECT_STATE_DOWNLOAD");
direct_download = env ? ((atoi(env) != 0) ? 1 : 0) : 1;
}
if (direct_download) {
for (int i = 0; i < BSSN_STATE_COUNT; ++i) {
CUDA_CHECK(cudaMemcpyAsync(state_host_out[i], ctx.d_state_curr[i],
bytes, cudaMemcpyDeviceToHost));
}
CUDA_CHECK(cudaDeviceSynchronize());
return;
}
CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_state_curr_mem, CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_state_curr_mem,
(size_t)BSSN_STATE_COUNT * bytes, (size_t)BSSN_STATE_COUNT * bytes,
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));