From 95220a05c88fe9418ddb170ba34aceee9866860a Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Mon, 2 Mar 2026 17:33:26 +0800 Subject: [PATCH] optimize fdderivs core-region branch elimination for ghost_width=3 --- AMSS_NCKU_source/diff_newwb.f90 | 69 ++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/AMSS_NCKU_source/diff_newwb.f90 b/AMSS_NCKU_source/diff_newwb.f90 index e6ee09d..1fbbcd2 100644 --- a/AMSS_NCKU_source/diff_newwb.f90 +++ b/AMSS_NCKU_source/diff_newwb.f90 @@ -33,7 +33,7 @@ real*8 :: dX,dY,dZ real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh real*8, dimension(3) :: SoA - integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k + integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k real*8 :: d2dx,d2dy,d2dz integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1 @@ -137,7 +137,7 @@ real*8 :: dX real*8,dimension(0:ex(1),0:ex(2),0:ex(3)) :: fh real*8, dimension(3) :: SoA - integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k + integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k real*8 :: d2dx integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1 @@ -1512,8 +1512,9 @@ real*8 :: dX,dY,dZ real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh real*8, dimension(3) :: SoA - integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k - real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz + integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k + integer :: i_core_min,i_core_max,j_core_min,j_core_max,k_core_min,k_core_max + real*8 :: Sdxdx,Sdydy,Sdzdz,Fdxdx,Fdydy,Fdzdz real*8 :: Sdxdy,Sdxdz,Sdydz,Fdxdy,Fdxdz,Fdydz integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2 real*8, parameter :: ZEO=0.d0, ONE=1.d0, TWO=2.d0, F1o4=2.5d-1, F9=9.d0, F45=4.5d1 @@ -1560,17 +1561,55 @@ fxx = ZEO fyy = ZEO - fzz = ZEO - fxy = ZEO - fxz = ZEO - fyz = ZEO - - do k=1,ex(3) - do j=1,ex(2) - do i=1,ex(1) -!~~~~~~ fxx - if(i+2 <= imax .and. i-2 >= imin)then -! + fzz = ZEO + fxy = ZEO + fxz = ZEO + fyz = ZEO + + i_core_min = max(1, imin+2) + i_core_max = min(ex(1), imax-2) + j_core_min = max(1, jmin+2) + j_core_max = min(ex(2), jmax-2) + k_core_min = max(1, kmin+2) + k_core_max = min(ex(3), kmax-2) + + if(i_core_min <= i_core_max .and. j_core_min <= j_core_max .and. k_core_min <= k_core_max)then + do k=k_core_min,k_core_max + do j=j_core_min,j_core_max + do i=i_core_min,i_core_max +! interior points always use 4th-order stencils without branch checks + fxx(i,j,k) = Fdxdx*(-fh(i-2,j,k)+F16*fh(i-1,j,k)-F30*fh(i,j,k) & + -fh(i+2,j,k)+F16*fh(i+1,j,k) ) + fyy(i,j,k) = Fdydy*(-fh(i,j-2,k)+F16*fh(i,j-1,k)-F30*fh(i,j,k) & + -fh(i,j+2,k)+F16*fh(i,j+1,k) ) + fzz(i,j,k) = Fdzdz*(-fh(i,j,k-2)+F16*fh(i,j,k-1)-F30*fh(i,j,k) & + -fh(i,j,k+2)+F16*fh(i,j,k+1) ) + fxy(i,j,k) = Fdxdy*( (fh(i-2,j-2,k)-F8*fh(i-1,j-2,k)+F8*fh(i+1,j-2,k)-fh(i+2,j-2,k)) & + -F8 *(fh(i-2,j-1,k)-F8*fh(i-1,j-1,k)+F8*fh(i+1,j-1,k)-fh(i+2,j-1,k)) & + +F8 *(fh(i-2,j+1,k)-F8*fh(i-1,j+1,k)+F8*fh(i+1,j+1,k)-fh(i+2,j+1,k)) & + - (fh(i-2,j+2,k)-F8*fh(i-1,j+2,k)+F8*fh(i+1,j+2,k)-fh(i+2,j+2,k))) + fxz(i,j,k) = Fdxdz*( (fh(i-2,j,k-2)-F8*fh(i-1,j,k-2)+F8*fh(i+1,j,k-2)-fh(i+2,j,k-2)) & + -F8 *(fh(i-2,j,k-1)-F8*fh(i-1,j,k-1)+F8*fh(i+1,j,k-1)-fh(i+2,j,k-1)) & + +F8 *(fh(i-2,j,k+1)-F8*fh(i-1,j,k+1)+F8*fh(i+1,j,k+1)-fh(i+2,j,k+1)) & + - (fh(i-2,j,k+2)-F8*fh(i-1,j,k+2)+F8*fh(i+1,j,k+2)-fh(i+2,j,k+2))) + fyz(i,j,k) = Fdydz*( (fh(i,j-2,k-2)-F8*fh(i,j-1,k-2)+F8*fh(i,j+1,k-2)-fh(i,j+2,k-2)) & + -F8 *(fh(i,j-2,k-1)-F8*fh(i,j-1,k-1)+F8*fh(i,j+1,k-1)-fh(i,j+2,k-1)) & + +F8 *(fh(i,j-2,k+1)-F8*fh(i,j-1,k+1)+F8*fh(i,j+1,k+1)-fh(i,j+2,k+1)) & + - (fh(i,j-2,k+2)-F8*fh(i,j-1,k+2)+F8*fh(i,j+1,k+2)-fh(i,j+2,k+2))) + enddo + enddo + enddo + endif + + do k=1,ex(3) + do j=1,ex(2) + do i=1,ex(1) + if(i>=i_core_min .and. i<=i_core_max .and. & + j>=j_core_min .and. j<=j_core_max .and. & + k>=k_core_min .and. k<=k_core_max) cycle +!~~~~~~ fxx + if(i+2 <= imax .and. i-2 >= imin)then +! ! - f(i-2) + 16 f(i-1) - 30 f(i) + 16 f(i+1) - f(i+2) ! fxx(i) = ---------------------------------------------------------- ! 12 dx^2