From 82339f5282ee3db7f5a4d5fb4acfea98eb017821 Mon Sep 17 00:00:00 2001 From: ianchb Date: Fri, 20 Feb 2026 09:45:37 +0800 Subject: [PATCH] Merge lopsided advection + kodis dissipation to share symmetry_bd buffer Cherry-picked from 38c2c30. --- AMSS_NCKU_source/bssn_rhs.f90 | 119 ++++++------------- AMSS_NCKU_source/lopsidediff.f90 | 195 +++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+), 81 deletions(-) diff --git a/AMSS_NCKU_source/bssn_rhs.f90 b/AMSS_NCKU_source/bssn_rhs.f90 index 246b219..c8c044e 100644 --- a/AMSS_NCKU_source/bssn_rhs.f90 +++ b/AMSS_NCKU_source/bssn_rhs.f90 @@ -945,103 +945,60 @@ SSA(2)=SYM SSA(3)=ANTI -!!!!!!!!!advection term part +!!!!!!!!!advection term + Kreiss-Oliger dissipation (merged for cache efficiency) +! lopsided_kodis shares the symmetry_bd buffer between advection and +! dissipation, eliminating redundant full-grid copies. For metric variables +! gxx/gyy/gzz (=dxx/dyy/dzz+1): kodis stencil coefficients sum to zero, +! so the constant offset has no effect on dissipation. - call lopsided(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS) - call lopsided(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS) - call lopsided(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA) - call lopsided(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS) - call lopsided(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA) - call lopsided(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS) + call lopsided_kodis(ex,X,Y,Z,gxx,gxx_rhs,betax,betay,betaz,Symmetry,SSS,eps) + call lopsided_kodis(ex,X,Y,Z,gxy,gxy_rhs,betax,betay,betaz,Symmetry,AAS,eps) + call lopsided_kodis(ex,X,Y,Z,gxz,gxz_rhs,betax,betay,betaz,Symmetry,ASA,eps) + call lopsided_kodis(ex,X,Y,Z,gyy,gyy_rhs,betax,betay,betaz,Symmetry,SSS,eps) + call lopsided_kodis(ex,X,Y,Z,gyz,gyz_rhs,betax,betay,betaz,Symmetry,SAA,eps) + call lopsided_kodis(ex,X,Y,Z,gzz,gzz_rhs,betax,betay,betaz,Symmetry,SSS,eps) - call lopsided(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS) - call lopsided(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS) - call lopsided(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA) - call lopsided(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS) - call lopsided(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA) - call lopsided(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS) + call lopsided_kodis(ex,X,Y,Z,Axx,Axx_rhs,betax,betay,betaz,Symmetry,SSS,eps) + call lopsided_kodis(ex,X,Y,Z,Axy,Axy_rhs,betax,betay,betaz,Symmetry,AAS,eps) + call lopsided_kodis(ex,X,Y,Z,Axz,Axz_rhs,betax,betay,betaz,Symmetry,ASA,eps) + call lopsided_kodis(ex,X,Y,Z,Ayy,Ayy_rhs,betax,betay,betaz,Symmetry,SSS,eps) + call lopsided_kodis(ex,X,Y,Z,Ayz,Ayz_rhs,betax,betay,betaz,Symmetry,SAA,eps) + call lopsided_kodis(ex,X,Y,Z,Azz,Azz_rhs,betax,betay,betaz,Symmetry,SSS,eps) - call lopsided(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS) - call lopsided(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS) + call lopsided_kodis(ex,X,Y,Z,chi,chi_rhs,betax,betay,betaz,Symmetry,SSS,eps) + call lopsided_kodis(ex,X,Y,Z,trK,trK_rhs,betax,betay,betaz,Symmetry,SSS,eps) - call lopsided(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS) - call lopsided(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS) - call lopsided(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA) -!! + call lopsided_kodis(ex,X,Y,Z,Gamx,Gamx_rhs,betax,betay,betaz,Symmetry,ASS,eps) + call lopsided_kodis(ex,X,Y,Z,Gamy,Gamy_rhs,betax,betay,betaz,Symmetry,SAS,eps) + call lopsided_kodis(ex,X,Y,Z,Gamz,Gamz_rhs,betax,betay,betaz,Symmetry,SSA,eps) + +#if 1 +!! bam does not apply dissipation on gauge variables + call lopsided_kodis(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS,eps) +#if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) + call lopsided_kodis(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS,eps) + call lopsided_kodis(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS,eps) + call lopsided_kodis(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA,eps) +#endif +#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) + call lopsided_kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS,eps) + call lopsided_kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS,eps) + call lopsided_kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA,eps) +#endif +#else +! No dissipation on gauge variables (advection only) call lopsided(ex,X,Y,Z,Lap,Lap_rhs,betax,betay,betaz,Symmetry,SSS) - #if (GAUGE == 0 || GAUGE == 1 || GAUGE == 2 || GAUGE == 3 || GAUGE == 4 || GAUGE == 5 || GAUGE == 6 || GAUGE == 7) call lopsided(ex,X,Y,Z,betax,betax_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,betay,betay_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,betaz,betaz_rhs,betax,betay,betaz,Symmetry,SSA) #endif - #if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) call lopsided(ex,X,Y,Z,dtSfx,dtSfx_rhs,betax,betay,betaz,Symmetry,ASS) call lopsided(ex,X,Y,Z,dtSfy,dtSfy_rhs,betax,betay,betaz,Symmetry,SAS) call lopsided(ex,X,Y,Z,dtSfz,dtSfz_rhs,betax,betay,betaz,Symmetry,SSA) #endif - - if(eps>0)then -! usual Kreiss-Oliger dissipation - call kodis(ex,X,Y,Z,chi,chi_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,trK,trK_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,dxx,gxx_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,gxy,gxy_rhs,AAS,Symmetry,eps) - call kodis(ex,X,Y,Z,gxz,gxz_rhs,ASA,Symmetry,eps) - call kodis(ex,X,Y,Z,dyy,gyy_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,gyz,gyz_rhs,SAA,Symmetry,eps) - call kodis(ex,X,Y,Z,dzz,gzz_rhs,SSS,Symmetry,eps) -#if 0 -#define i 42 -#define j 40 -#define k 40 -if(Lev == 1)then -write(*,*) X(i),Y(j),Z(k) -write(*,*) "before",Axx_rhs(i,j,k) -endif -#undef i -#undef j -#undef k -!!stop #endif - call kodis(ex,X,Y,Z,Axx,Axx_rhs,SSS,Symmetry,eps) -#if 0 -#define i 42 -#define j 40 -#define k 40 -if(Lev == 1)then -write(*,*) X(i),Y(j),Z(k) -write(*,*) "after",Axx_rhs(i,j,k) -endif -#undef i -#undef j -#undef k -!!stop -#endif - call kodis(ex,X,Y,Z,Axy,Axy_rhs,AAS,Symmetry,eps) - call kodis(ex,X,Y,Z,Axz,Axz_rhs,ASA,Symmetry,eps) - call kodis(ex,X,Y,Z,Ayy,Ayy_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,Ayz,Ayz_rhs,SAA,Symmetry,eps) - call kodis(ex,X,Y,Z,Azz,Azz_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,Gamx,Gamx_rhs,ASS,Symmetry,eps) - call kodis(ex,X,Y,Z,Gamy,Gamy_rhs,SAS,Symmetry,eps) - call kodis(ex,X,Y,Z,Gamz,Gamz_rhs,SSA,Symmetry,eps) - -#if 1 -!! bam does not apply dissipation on gauge variables - call kodis(ex,X,Y,Z,Lap,Lap_rhs,SSS,Symmetry,eps) - call kodis(ex,X,Y,Z,betax,betax_rhs,ASS,Symmetry,eps) - call kodis(ex,X,Y,Z,betay,betay_rhs,SAS,Symmetry,eps) - call kodis(ex,X,Y,Z,betaz,betaz_rhs,SSA,Symmetry,eps) -#if (GAUGE == 0 || GAUGE == 2 || GAUGE == 3 || GAUGE == 6 || GAUGE == 7) - call kodis(ex,X,Y,Z,dtSfx,dtSfx_rhs,ASS,Symmetry,eps) - call kodis(ex,X,Y,Z,dtSfy,dtSfy_rhs,SAS,Symmetry,eps) - call kodis(ex,X,Y,Z,dtSfz,dtSfz_rhs,SSA,Symmetry,eps) -#endif -#endif - - endif if(co == 0)then ! ham_Res = trR + 2/3 * K^2 - A_ij * A^ij - 16 * PI * rho diff --git a/AMSS_NCKU_source/lopsidediff.f90 b/AMSS_NCKU_source/lopsidediff.f90 index 2e97af5..f0af27b 100644 --- a/AMSS_NCKU_source/lopsidediff.f90 +++ b/AMSS_NCKU_source/lopsidediff.f90 @@ -487,6 +487,201 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA) end subroutine lopsided +!----------------------------------------------------------------------------- +! Combined advection (lopsided) + Kreiss-Oliger dissipation (kodis) +! Shares the symmetry_bd buffer fh, eliminating one full-grid copy per call. +! Mathematically identical to calling lopsided then kodis separately. +!----------------------------------------------------------------------------- +subroutine lopsided_kodis(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA,eps) + implicit none + +!~~~~~~> Input parameters: + + integer, intent(in) :: ex(1:3),Symmetry + real*8, intent(in) :: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3)) + real*8,dimension(ex(1),ex(2),ex(3)),intent(in) :: f,Sfx,Sfy,Sfz + + real*8,dimension(ex(1),ex(2),ex(3)),intent(inout):: f_rhs + real*8,dimension(3),intent(in) ::SoA + real*8,intent(in) :: eps + +!~~~~~~> local variables: +! note index -2,-1,0, so we have 3 extra points + real*8,dimension(-2:ex(1),-2:ex(2),-2:ex(3)) :: fh + integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k + real*8 :: dX,dY,dZ + real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz + real*8, parameter :: ZEO=0.d0,ONE=1.d0, F3=3.d0 + real*8, parameter :: TWO=2.d0,F6=6.0d0,F18=1.8d1 + real*8, parameter :: F12=1.2d1, F10=1.d1,EIT=8.d0 + integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 +! kodis parameters + real*8, parameter :: SIX=6.d0,FIT=1.5d1,TWT=2.d1 + real*8, parameter :: cof=6.4d1 ! 2^6 + + dX = X(2)-X(1) + dY = Y(2)-Y(1) + dZ = Z(2)-Z(1) + + d12dx = ONE/F12/dX + d12dy = ONE/F12/dY + d12dz = ONE/F12/dZ + + d2dx = ONE/TWO/dX + d2dy = ONE/TWO/dY + d2dz = ONE/TWO/dZ + + imax = ex(1) + jmax = ex(2) + kmax = ex(3) + + imin = 1 + jmin = 1 + kmin = 1 + if(Symmetry > NO_SYMM .and. dabs(Z(1)) < dZ) kmin = -2 + if(Symmetry > EQ_SYMM .and. dabs(X(1)) < dX) imin = -2 + if(Symmetry > EQ_SYMM .and. dabs(Y(1)) < dY) jmin = -2 + +! Single symmetry_bd call shared by both advection and dissipation + call symmetry_bd(3,ex,f,fh,SoA) + +! ---- Advection (lopsided) loop ---- +! upper bound set ex-1 only for efficiency, +! the loop body will set ex 0 also + do k=1,ex(3)-1 + do j=1,ex(2)-1 + do i=1,ex(1)-1 +! x direction + if(Sfx(i,j,k) > ZEO)then + if(i+3 <= imax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) & + -F6*fh(i+2,j,k)+ fh(i+3,j,k)) + elseif(i+2 <= imax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) + + elseif(i+1 <= imax)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) & + -F6*fh(i-2,j,k)+ fh(i-3,j,k)) + endif + elseif(Sfx(i,j,k) < ZEO)then + if(i-3 >= imin)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfx(i,j,k)*d12dx*(-F3*fh(i+1,j,k)-F10*fh(i,j,k)+F18*fh(i-1,j,k) & + -F6*fh(i-2,j,k)+ fh(i-3,j,k)) + elseif(i-2 >= imin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfx(i,j,k)*d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k)) + + elseif(i-1 >= imin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfx(i,j,k)*d12dx*(-F3*fh(i-1,j,k)-F10*fh(i,j,k)+F18*fh(i+1,j,k) & + -F6*fh(i+2,j,k)+ fh(i+3,j,k)) + endif + endif + +! y direction + if(Sfy(i,j,k) > ZEO)then + if(j+3 <= jmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) & + -F6*fh(i,j+2,k)+ fh(i,j+3,k)) + elseif(j+2 <= jmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) + + elseif(j+1 <= jmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) & + -F6*fh(i,j-2,k)+ fh(i,j-3,k)) + endif + elseif(Sfy(i,j,k) < ZEO)then + if(j-3 >= jmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfy(i,j,k)*d12dy*(-F3*fh(i,j+1,k)-F10*fh(i,j,k)+F18*fh(i,j-1,k) & + -F6*fh(i,j-2,k)+ fh(i,j-3,k)) + elseif(j-2 >= jmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfy(i,j,k)*d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k)) + + elseif(j-1 >= jmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfy(i,j,k)*d12dy*(-F3*fh(i,j-1,k)-F10*fh(i,j,k)+F18*fh(i,j+1,k) & + -F6*fh(i,j+2,k)+ fh(i,j+3,k)) + endif + endif + +! z direction + if(Sfz(i,j,k) > ZEO)then + if(k+3 <= kmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) & + -F6*fh(i,j,k+2)+ fh(i,j,k+3)) + elseif(k+2 <= kmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) + + elseif(k+1 <= kmax)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) & + -F6*fh(i,j,k-2)+ fh(i,j,k-3)) + endif + elseif(Sfz(i,j,k) < ZEO)then + if(k-3 >= kmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)- & + Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k+1)-F10*fh(i,j,k)+F18*fh(i,j,k-1) & + -F6*fh(i,j,k-2)+ fh(i,j,k-3)) + elseif(k-2 >= kmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfz(i,j,k)*d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2)) + + elseif(k-1 >= kmin)then + f_rhs(i,j,k)=f_rhs(i,j,k)+ & + Sfz(i,j,k)*d12dz*(-F3*fh(i,j,k-1)-F10*fh(i,j,k)+F18*fh(i,j,k+1) & + -F6*fh(i,j,k+2)+ fh(i,j,k+3)) + endif + endif + enddo + enddo + enddo + +! ---- Dissipation (kodis) loop ---- + if(eps > ZEO) then + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) + + if(i-3 >= imin .and. i+3 <= imax .and. & + j-3 >= jmin .and. j+3 <= jmax .and. & + k-3 >= kmin .and. k+3 <= kmax) then + f_rhs(i,j,k) = f_rhs(i,j,k) + eps/cof *( ( & + (fh(i-3,j,k)+fh(i+3,j,k)) - & + SIX*(fh(i-2,j,k)+fh(i+2,j,k)) + & + FIT*(fh(i-1,j,k)+fh(i+1,j,k)) - & + TWT* fh(i,j,k) )/dX + & + ( & + (fh(i,j-3,k)+fh(i,j+3,k)) - & + SIX*(fh(i,j-2,k)+fh(i,j+2,k)) + & + FIT*(fh(i,j-1,k)+fh(i,j+1,k)) - & + TWT* fh(i,j,k) )/dY + & + ( & + (fh(i,j,k-3)+fh(i,j,k+3)) - & + SIX*(fh(i,j,k-2)+fh(i,j,k+2)) + & + FIT*(fh(i,j,k-1)+fh(i,j,k+1)) - & + TWT* fh(i,j,k) )/dZ ) + endif + + enddo + enddo + enddo + endif + + return + + end subroutine lopsided_kodis + #elif (ghost_width == 4) ! sixth order code ! Compute advection terms in right hand sides of field equations