From 284ab80bafc29bbac78b8e17be409857eb8b00f8 Mon Sep 17 00:00:00 2001 From: ianchb Date: Wed, 25 Feb 2026 13:15:24 +0000 Subject: [PATCH] Remove OpenMP from C rewrite kernel The C rewrite introduced OpenMP parallelism. Remove all OpenMP. --- AMSS_NCKU_source/bssn_rhs_c.C | 298 ++++++++++++---------------------- AMSS_NCKU_source/fdderivs_c.C | 4 +- AMSS_NCKU_source/fderivs_c.C | 4 +- AMSS_NCKU_source/makefile | 10 +- AMSS_NCKU_source/share_func.h | 1 - 5 files changed, 111 insertions(+), 206 deletions(-) diff --git a/AMSS_NCKU_source/bssn_rhs_c.C b/AMSS_NCKU_source/bssn_rhs_c.C index 99254a2..52995a8 100644 --- a/AMSS_NCKU_source/bssn_rhs_c.C +++ b/AMSS_NCKU_source/bssn_rhs_c.C @@ -34,7 +34,6 @@ int f_compute_rhs_bssn(int *ex, double &T, int &Symmetry, int &Lev, double &eps, int &co ) // return gont { - double t0 = omp_get_wtime(); int nx = ex[0], ny = ex[1], nz=ex[2]; int all = nx*ny*nz; // printf("nx=%d ny=%d nz=%d all=%d\n", nx, ny, nz, all); @@ -80,15 +79,8 @@ int f_compute_rhs_bssn(int *ex, double &T, dY = Y[1] - Y[0]; dZ = Z[1] - Z[0]; - #pragma omp parallel - { - int tid = omp_get_thread_num(); // 当前线程号(从 0 开始) - int nthr = omp_get_num_threads(); // 当前并行区里的总线程数 - int local = all / nthr; - int start = tid * local; - int end = (tid == nthr - 1) ? all : start + local; // 1ms // - for(int i=start;i0){ - for(int _task=tid; _task<16; _task+=nthr){ - switch(_task){ - case 0: kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps); break; - case 1: kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps); break; - case 2: kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps); break; - case 3: kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps); break; - case 4: kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps); break; - case 5: kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps); break; - case 6: kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps); break; - case 7: kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps); break; - case 8: - kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps); - break; - case 9: - kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps); - break; - case 10: - kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps); - break; - case 11: - kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps); - break; - case 12: - kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps); - kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps); - break; - case 13: - kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps); - break; - case 14: - kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps); - kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps); - break; - case 15: - kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps); - kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps); - break; - } - } + kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps); + kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps); + kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps); + kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps); + kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps); + kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps); + kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps); + kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps); + kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps); + kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps); + kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps); + kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps); + kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps); + kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps); + kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps); + kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps); } // 2ms // if(co==0){ - for (int i=start;i cap) { free(fh); diff --git a/AMSS_NCKU_source/fderivs_c.C b/AMSS_NCKU_source/fderivs_c.C index 47c9c6c..0637cba 100644 --- a/AMSS_NCKU_source/fderivs_c.C +++ b/AMSS_NCKU_source/fderivs_c.C @@ -50,8 +50,8 @@ void fderivs(const int ex[3], const size_t ny = (size_t)ex2 + 2; const size_t nz = (size_t)ex3 + 2; const size_t fh_size = nx * ny * nz; - static thread_local double *fh = NULL; - static thread_local size_t cap = 0; + static double *fh = NULL; + static size_t cap = 0; if (fh_size > cap) { free(fh); diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index c6d465f..57ce95e 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -39,19 +39,19 @@ endif # C rewrite of BSSN RHS kernel and helpers bssn_rhs_c.o: bssn_rhs_c.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@ + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fderivs_c.o: fderivs_c.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@ + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ fdderivs_c.o: fdderivs_c.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@ + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ kodiss_c.o: kodiss_c.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@ + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ lopsided_c.o: lopsided_c.C - ${CXX} $(CXXAPPFLAGS) -qopenmp -c $< $(filein) -o $@ + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ## TwoPunctureABE uses fixed optimal flags, independent of CXXAPPFLAGS (which may be PGO-instrumented) TP_OPTFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo -Dfortran3 -Dnewc -I${MKLROOT}/include diff --git a/AMSS_NCKU_source/share_func.h b/AMSS_NCKU_source/share_func.h index f504a07..5051448 100644 --- a/AMSS_NCKU_source/share_func.h +++ b/AMSS_NCKU_source/share_func.h @@ -5,7 +5,6 @@ #include #include #include -#include /* 主网格:0-based -> 1D */ static inline size_t idx_ex(int i0, int j0, int k0, const int ex[3]) { const int ex1 = ex[0], ex2 = ex[1];