From 30b778daa3283a5bc8ae35a2e8c4b3f5db5451fd Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Fri, 1 May 2026 18:51:19 +0800 Subject: [PATCH] Save Z4C CUDA transfer progress --- AMSS_NCKU_Input.py | 2 +- AMSS_NCKU_source/macrodef.h | 3 ++- AMSS_NCKU_source/z4c_rhs_cuda.cu | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/AMSS_NCKU_Input.py b/AMSS_NCKU_Input.py index 73af547..40d252b 100755 --- a/AMSS_NCKU_Input.py +++ b/AMSS_NCKU_Input.py @@ -31,7 +31,7 @@ GPU_Part = 0.0 ## Setting the physical system and numerical method Symmetry = "equatorial-symmetry" ## Symmetry of System: choose equatorial-symmetry、no-symmetry、octant-symmetry -Equation_Class = "BSSN" ## Evolution Equation: choose "BSSN", "BSSN-EScalar", "BSSN-EM", "Z4C" +Equation_Class = "Z4C" ## Evolution Equation: choose "BSSN", "BSSN-EScalar", "BSSN-EM", "Z4C" ## If "BSSN-EScalar" is chosen, it is necessary to set other parameters below Initial_Data_Method = "Ansorg-TwoPuncture" ## initial data method: choose "Ansorg-TwoPuncture", "Lousto-Analytical", "Cao-Analytical", "KerrSchild-Analytical" Time_Evolution_Method = "runge-kutta-45" ## time evolution method: choose "runge-kutta-45" diff --git a/AMSS_NCKU_source/macrodef.h b/AMSS_NCKU_source/macrodef.h index f55e4f3..7160d1c 100644 --- a/AMSS_NCKU_source/macrodef.h +++ b/AMSS_NCKU_source/macrodef.h @@ -10,7 +10,7 @@ #define GaussInt -#define ABEtype 0 +#define ABEtype 2 //#define With_AHF #define Psi4type 0 @@ -167,3 +167,4 @@ #define TINY 1e-10 #endif /* MICRODEF_H */ + diff --git a/AMSS_NCKU_source/z4c_rhs_cuda.cu b/AMSS_NCKU_source/z4c_rhs_cuda.cu index 8addc3e..d6a46d8 100644 --- a/AMSS_NCKU_source/z4c_rhs_cuda.cu +++ b/AMSS_NCKU_source/z4c_rhs_cuda.cu @@ -4676,6 +4676,18 @@ static void compute_patch_boundary_flags(int *ex, static void upload_state_inputs(double **state_host, size_t all) { const size_t bytes = all * sizeof(double); + static int direct_upload = -1; + if (direct_upload < 0) { + const char *env = getenv("AMSS_CUDA_DIRECT_STATE_UPLOAD"); + direct_upload = env ? ((atoi(env) != 0) ? 1 : 0) : 1; + } + if (direct_upload) { + for (int i = 0; i < BSSN_STATE_COUNT; ++i) { + CUDA_CHECK(cudaMemcpyAsync(g_buf.slot[k_state_input_slots[i]], state_host[i], + bytes, cudaMemcpyHostToDevice)); + } + return; + } for (int i = 0; i < BSSN_STATE_COUNT; ++i) { std::memcpy(g_buf.h_stage + (size_t)i * all, state_host[i], bytes); } @@ -5259,6 +5271,19 @@ static void download_resident_state(void *block_tag, int *ex, double **state_hos const size_t all = (size_t)ex[0] * ex[1] * ex[2]; const size_t bytes = all * sizeof(double); StepContext &ctx = ensure_step_ctx(block_tag, all); + static int direct_download = -1; + if (direct_download < 0) { + const char *env = getenv("AMSS_CUDA_DIRECT_STATE_DOWNLOAD"); + direct_download = env ? ((atoi(env) != 0) ? 1 : 0) : 1; + } + if (direct_download) { + for (int i = 0; i < BSSN_STATE_COUNT; ++i) { + CUDA_CHECK(cudaMemcpyAsync(state_host_out[i], ctx.d_state_curr[i], + bytes, cudaMemcpyDeviceToHost)); + } + CUDA_CHECK(cudaDeviceSynchronize()); + return; + } CUDA_CHECK(cudaMemcpy(g_buf.h_stage, ctx.d_state_curr_mem, (size_t)BSSN_STATE_COUNT * bytes, cudaMemcpyDeviceToHost));