diff --git a/AMSS_NCKU_source/bssn_rhs_c.C b/AMSS_NCKU_source/bssn_rhs_c.C index ea88692..9dabd74 100644 --- a/AMSS_NCKU_source/bssn_rhs_c.C +++ b/AMSS_NCKU_source/bssn_rhs_c.C @@ -1059,58 +1059,31 @@ int f_compute_rhs_bssn(int *ex, double &T, dtSfz_rhs[i] = Gamz_rhs[i] - reta[i] * dtSfz[i]; #endif } - // 26ms // - lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA); - lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS); - lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA); - lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS); - lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS); - lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA); - lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA); - lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS); - lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS); - lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS); - lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA); - lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA); - lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA); - lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS); - lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS); - lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS); - // 20ms // - if(eps>0){ - kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps); - kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps); - kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps); - kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps); - kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps); - kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps); - kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps); - kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps); - kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps); - kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps); - kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps); - kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps); - kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps); - kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps); - kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps); - } + // advection + KO dissipation with shared symmetry buffer + lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps); + lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps); + lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps); + lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps); + lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps); + lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps); + lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps); + lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps); + lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps); + lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps); + lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps); + lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps); + lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps); + lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps); + lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps); + lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps); // 2ms // if(co==0){ for (int i=0;i NO_SYMM && fabs(Z[0]) < dZ) kminF = -2; + if (Symmetry > EQ_SYMM && fabs(X[0]) < dX) iminF = -2; + if (Symmetry > EQ_SYMM && fabs(Y[0]) < dY) jminF = -2; + + // fh for Fortran-style domain (-2:ex1,-2:ex2,-2:ex3) + const size_t nx = (size_t)ex1 + 3; + const size_t ny = (size_t)ex2 + 3; + const size_t nz = (size_t)ex3 + 3; + const size_t fh_size = nx * ny * nz; + + double *fh = (double*)malloc(fh_size * sizeof(double)); + if (!fh) return; + + symmetry_bd(3, ex, f, fh, SoA); + + // Advection (same stencil logic as lopsided_c.C) + for (int k0 = 0; k0 <= ex3 - 2; ++k0) { + const int kF = k0 + 1; + for (int j0 = 0; j0 <= ex2 - 2; ++j0) { + const int jF = j0 + 1; + for (int i0 = 0; i0 <= ex1 - 2; ++i0) { + const int iF = i0 + 1; + const size_t p = idx_ex(i0, j0, k0, ex); + + const double sfx = Sfx[p]; + if (sfx > ZEO) { + if (i0 <= ex1 - 4) { + f_rhs[p] += sfx * d12dx * + (-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)] + -F10 * fh[idx_fh_F(iF , jF, kF, ex)] + +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)] + -F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)] + + fh[idx_fh_F(iF + 3, jF, kF, ex)]); + } else if (i0 <= ex1 - 3) { + f_rhs[p] += sfx * d12dx * + ( fh[idx_fh_F(iF - 2, jF, kF, ex)] + -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)] + +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)] + - fh[idx_fh_F(iF + 2, jF, kF, ex)]); + } else if (i0 <= ex1 - 2) { + f_rhs[p] -= sfx * d12dx * + (-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)] + -F10 * fh[idx_fh_F(iF , jF, kF, ex)] + +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)] + -F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)] + + fh[idx_fh_F(iF - 3, jF, kF, ex)]); + } + } else if (sfx < ZEO) { + if ((i0 - 2) >= iminF) { + f_rhs[p] -= sfx * d12dx * + (-F3 * fh[idx_fh_F(iF + 1, jF, kF, ex)] + -F10 * fh[idx_fh_F(iF , jF, kF, ex)] + +F18 * fh[idx_fh_F(iF - 1, jF, kF, ex)] + -F6 * fh[idx_fh_F(iF - 2, jF, kF, ex)] + + fh[idx_fh_F(iF - 3, jF, kF, ex)]); + } else if ((i0 - 1) >= iminF) { + f_rhs[p] += sfx * d12dx * + ( fh[idx_fh_F(iF - 2, jF, kF, ex)] + -EIT * fh[idx_fh_F(iF - 1, jF, kF, ex)] + +EIT * fh[idx_fh_F(iF + 1, jF, kF, ex)] + - fh[idx_fh_F(iF + 2, jF, kF, ex)]); + } else if (i0 >= iminF) { + f_rhs[p] += sfx * d12dx * + (-F3 * fh[idx_fh_F(iF - 1, jF, kF, ex)] + -F10 * fh[idx_fh_F(iF , jF, kF, ex)] + +F18 * fh[idx_fh_F(iF + 1, jF, kF, ex)] + -F6 * fh[idx_fh_F(iF + 2, jF, kF, ex)] + + fh[idx_fh_F(iF + 3, jF, kF, ex)]); + } + } + + const double sfy = Sfy[p]; + if (sfy > ZEO) { + if (j0 <= ex2 - 4) { + f_rhs[p] += sfy * d12dy * + (-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)] + -F10 * fh[idx_fh_F(iF, jF , kF, ex)] + +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)] + -F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)] + + fh[idx_fh_F(iF, jF + 3, kF, ex)]); + } else if (j0 <= ex2 - 3) { + f_rhs[p] += sfy * d12dy * + ( fh[idx_fh_F(iF, jF - 2, kF, ex)] + -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)] + +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)] + - fh[idx_fh_F(iF, jF + 2, kF, ex)]); + } else if (j0 <= ex2 - 2) { + f_rhs[p] -= sfy * d12dy * + (-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)] + -F10 * fh[idx_fh_F(iF, jF , kF, ex)] + +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)] + -F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)] + + fh[idx_fh_F(iF, jF - 3, kF, ex)]); + } + } else if (sfy < ZEO) { + if ((j0 - 2) >= jminF) { + f_rhs[p] -= sfy * d12dy * + (-F3 * fh[idx_fh_F(iF, jF + 1, kF, ex)] + -F10 * fh[idx_fh_F(iF, jF , kF, ex)] + +F18 * fh[idx_fh_F(iF, jF - 1, kF, ex)] + -F6 * fh[idx_fh_F(iF, jF - 2, kF, ex)] + + fh[idx_fh_F(iF, jF - 3, kF, ex)]); + } else if ((j0 - 1) >= jminF) { + f_rhs[p] += sfy * d12dy * + ( fh[idx_fh_F(iF, jF - 2, kF, ex)] + -EIT * fh[idx_fh_F(iF, jF - 1, kF, ex)] + +EIT * fh[idx_fh_F(iF, jF + 1, kF, ex)] + - fh[idx_fh_F(iF, jF + 2, kF, ex)]); + } else if (j0 >= jminF) { + f_rhs[p] += sfy * d12dy * + (-F3 * fh[idx_fh_F(iF, jF - 1, kF, ex)] + -F10 * fh[idx_fh_F(iF, jF , kF, ex)] + +F18 * fh[idx_fh_F(iF, jF + 1, kF, ex)] + -F6 * fh[idx_fh_F(iF, jF + 2, kF, ex)] + + fh[idx_fh_F(iF, jF + 3, kF, ex)]); + } + } + + const double sfz = Sfz[p]; + if (sfz > ZEO) { + if (k0 <= ex3 - 4) { + f_rhs[p] += sfz * d12dz * + (-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)] + -F10 * fh[idx_fh_F(iF, jF, kF , ex)] + +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)] + -F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)] + + fh[idx_fh_F(iF, jF, kF + 3, ex)]); + } else if (k0 <= ex3 - 3) { + f_rhs[p] += sfz * d12dz * + ( fh[idx_fh_F(iF, jF, kF - 2, ex)] + -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)] + +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)] + - fh[idx_fh_F(iF, jF, kF + 2, ex)]); + } else if (k0 <= ex3 - 2) { + f_rhs[p] -= sfz * d12dz * + (-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)] + -F10 * fh[idx_fh_F(iF, jF, kF , ex)] + +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)] + -F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)] + + fh[idx_fh_F(iF, jF, kF - 3, ex)]); + } + } else if (sfz < ZEO) { + if ((k0 - 2) >= kminF) { + f_rhs[p] -= sfz * d12dz * + (-F3 * fh[idx_fh_F(iF, jF, kF + 1, ex)] + -F10 * fh[idx_fh_F(iF, jF, kF , ex)] + +F18 * fh[idx_fh_F(iF, jF, kF - 1, ex)] + -F6 * fh[idx_fh_F(iF, jF, kF - 2, ex)] + + fh[idx_fh_F(iF, jF, kF - 3, ex)]); + } else if ((k0 - 1) >= kminF) { + f_rhs[p] += sfz * d12dz * + ( fh[idx_fh_F(iF, jF, kF - 2, ex)] + -EIT * fh[idx_fh_F(iF, jF, kF - 1, ex)] + +EIT * fh[idx_fh_F(iF, jF, kF + 1, ex)] + - fh[idx_fh_F(iF, jF, kF + 2, ex)]); + } else if (k0 >= kminF) { + f_rhs[p] += sfz * d12dz * + (-F3 * fh[idx_fh_F(iF, jF, kF - 1, ex)] + -F10 * fh[idx_fh_F(iF, jF, kF , ex)] + +F18 * fh[idx_fh_F(iF, jF, kF + 1, ex)] + -F6 * fh[idx_fh_F(iF, jF, kF + 2, ex)] + + fh[idx_fh_F(iF, jF, kF + 3, ex)]); + } + } + } + } + } + + // KO dissipation (same domain restriction as kodiss_c.C) + if (eps > ZEO) { + const int i0_lo = (iminF + 2 > 0) ? iminF + 2 : 0; + const int j0_lo = (jminF + 2 > 0) ? jminF + 2 : 0; + const int k0_lo = (kminF + 2 > 0) ? kminF + 2 : 0; + const int i0_hi = imaxF - 4; // inclusive + const int j0_hi = jmaxF - 4; + const int k0_hi = kmaxF - 4; + + if (!(i0_lo > i0_hi || j0_lo > j0_hi || k0_lo > k0_hi)) { + for (int k0 = k0_lo; k0 <= k0_hi; ++k0) { + const int kF = k0 + 1; + for (int j0 = j0_lo; j0 <= j0_hi; ++j0) { + const int jF = j0 + 1; + for (int i0 = i0_lo; i0 <= i0_hi; ++i0) { + const int iF = i0 + 1; + const size_t p = idx_ex(i0, j0, k0, ex); + + const double Dx_term = + ((fh[idx_fh_F(iF - 3, jF, kF, ex)] + fh[idx_fh_F(iF + 3, jF, kF, ex)]) - + SIX * (fh[idx_fh_F(iF - 2, jF, kF, ex)] + fh[idx_fh_F(iF + 2, jF, kF, ex)]) + + FIT * (fh[idx_fh_F(iF - 1, jF, kF, ex)] + fh[idx_fh_F(iF + 1, jF, kF, ex)]) - + TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dX; + + const double Dy_term = + ((fh[idx_fh_F(iF, jF - 3, kF, ex)] + fh[idx_fh_F(iF, jF + 3, kF, ex)]) - + SIX * (fh[idx_fh_F(iF, jF - 2, kF, ex)] + fh[idx_fh_F(iF, jF + 2, kF, ex)]) + + FIT * (fh[idx_fh_F(iF, jF - 1, kF, ex)] + fh[idx_fh_F(iF, jF + 1, kF, ex)]) - + TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dY; + + const double Dz_term = + ((fh[idx_fh_F(iF, jF, kF - 3, ex)] + fh[idx_fh_F(iF, jF, kF + 3, ex)]) - + SIX * (fh[idx_fh_F(iF, jF, kF - 2, ex)] + fh[idx_fh_F(iF, jF, kF + 2, ex)]) + + FIT * (fh[idx_fh_F(iF, jF, kF - 1, ex)] + fh[idx_fh_F(iF, jF, kF + 1, ex)]) - + TWT * fh[idx_fh_F(iF, jF, kF, ex)]) / dZ; + + f_rhs[p] += (eps / cof) * (Dx_term + Dy_term + Dz_term); + } + } + } + } + } + + free(fh); +} diff --git a/AMSS_NCKU_source/makefile b/AMSS_NCKU_source/makefile index 7849f37..157a689 100644 --- a/AMSS_NCKU_source/makefile +++ b/AMSS_NCKU_source/makefile @@ -16,10 +16,8 @@ f90appflags = -O3 -xHost -fma -fprofile-instr-generate -ipo \ else ## opt (default): maximum performance with PGO profile data CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ -Dfortran3 -Dnewc -I${MKLROOT}/include $(INTERP_LB_FLAGS) f90appflags = -O3 -xHost -fp-model fast=2 -fma -ipo \ - -fprofile-instr-use=$(PROFDATA) \ -align array64byte -fpp -I${MKLROOT}/include endif @@ -50,11 +48,14 @@ fdderivs_c.o: fdderivs_c.C kodiss_c.o: kodiss_c.C ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ -lopsided_c.o: lopsided_c.C - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ - -interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h - ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ +lopsided_c.o: lopsided_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +lopsided_kodis_c.o: lopsided_kodis_c.C + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ + +interp_lb_profile.o: interp_lb_profile.C interp_lb_profile.h + ${CXX} $(CXXAPPFLAGS) -c $< $(filein) -o $@ ## TwoPunctureABE uses fixed optimal flags with its own PGO profile, independent of CXXAPPFLAGS TP_PROFDATA = /home/$(shell whoami)/AMSS-NCKU/pgo_profile/TwoPunctureABE.profdata @@ -75,9 +76,9 @@ ifeq ($(USE_CXX_KERNELS),0) # Fortran mode: no C rewrite files; bssn_rhs.o is included via F90FILES below CFILES = else -# C++ mode (default): C rewrite of bssn_rhs and helper kernels -CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o -endif +# C++ mode (default): C rewrite of bssn_rhs and helper kernels +CFILES = bssn_rhs_c.o fderivs_c.o fdderivs_c.o kodiss_c.o lopsided_c.o lopsided_kodis_c.o +endif C++FILES = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o\ cgh.o bssn_class.o surface_integral.o ShellPatch.o\ diff --git a/AMSS_NCKU_source/tool.h b/AMSS_NCKU_source/tool.h index 154b5ec..2c75748 100644 --- a/AMSS_NCKU_source/tool.h +++ b/AMSS_NCKU_source/tool.h @@ -24,4 +24,10 @@ void lopsided(const int ex[3], const double *X, const double *Y, const double *Z, const double *f, double *f_rhs, const double *Sfx, const double *Sfy, const double *Sfz, - int Symmetry, const double SoA[3]); \ No newline at end of file + int Symmetry, const double SoA[3]); + +void lopsided_kodis(const int ex[3], + const double *X, const double *Y, const double *Z, + const double *f, double *f_rhs, + const double *Sfx, const double *Sfy, const double *Sfz, + int Symmetry, const double SoA[3], double eps);