From 699e443c7a27cfe7548f4a509a6cb514661d2834 Mon Sep 17 00:00:00 2001 From: CGH0S7 <776459475@qq.com> Date: Fri, 6 Feb 2026 19:00:35 +0800 Subject: [PATCH] Optimize polint/polin2/polin3 interpolation for cache locality Changes: - polint: Rewrite Neville algorithm from array-slice operations to scalar loop. Mathematically identical, avoids temporary array allocations for den(1:n-m) slices. (Credit: yx-fmisc branch) - polin2: Swap interpolation order so inner loop accesses ya(:,j) (contiguous in Fortran column-major) instead of ya(i,:) (strided). Tensor product interpolation is commutative; all call sites pass identical coordinate arrays for all dimensions. - polin3: Swap interpolation order to process contiguous first dimension first: ya(:,j,k) -> yatmp(:,k) -> ymtmp(:). Same commutativity argument as polin2. Compile-time safety switch: -DPOLINT_LEGACY_ORDER restores original dimension ordering Default (no flag): uses optimized contiguous-memory ordering Usage: # Production (optimized order): make clean && make -j ABE # Fallback if results differ (original order): Add -DPOLINT_LEGACY_ORDER to f90appflags in makefile.inc Co-Authored-By: Claude Opus 4.6 --- AMSS_NCKU_source/fmisc.f90 | 137 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/AMSS_NCKU_source/fmisc.f90 b/AMSS_NCKU_source/fmisc.f90 index 0feed47..1b57677 100644 --- a/AMSS_NCKU_source/fmisc.f90 +++ b/AMSS_NCKU_source/fmisc.f90 @@ -1112,64 +1112,65 @@ end subroutine d2dump ! Lagrangian polynomial interpolation !------------------------------------------------------------------------------ - subroutine polint(xa,ya,x,y,dy,ordn) - + subroutine polint(xa, ya, x, y, dy, ordn) implicit none -!~~~~~~> Input Parameter: - integer,intent(in) :: ordn - real*8, dimension(ordn), intent(in) :: xa,ya + integer, intent(in) :: ordn + real*8, dimension(ordn), intent(in) :: xa, ya real*8, intent(in) :: x - real*8, intent(out) :: y,dy + real*8, intent(out) :: y, dy -!~~~~~~> Other parameter: + integer :: i, m, ns, n_m + real*8, dimension(ordn) :: c, d, ho + real*8 :: dif, dift, hp, h, den_val - integer :: m,n,ns - real*8, dimension(ordn) :: c,d,den,ho - real*8 :: dif,dift + c = ya + d = ya + ho = xa - x -!~~~~~~> + ns = 1 + dif = abs(x - xa(1)) - n=ordn - m=ordn - - c=ya - d=ya - ho=xa-x - - ns=1 - dif=abs(x-xa(1)) - do m=1,n - dift=abs(x-xa(m)) - if(dift < dif) then - ns=m - dif=dift - end if + do i = 2, ordn + dift = abs(x - xa(i)) + if (dift < dif) then + ns = i + dif = dift + end if end do - y=ya(ns) - ns=ns-1 - do m=1,n-1 - den(1:n-m)=ho(1:n-m)-ho(1+m:n) - if (any(den(1:n-m) == 0.0))then - write(*,*) 'failure in polint for point',x - write(*,*) 'with input points: ',xa - stop - endif - den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m) - d(1:n-m)=ho(1+m:n)*den(1:n-m) - c(1:n-m)=ho(1:n-m)*den(1:n-m) - if (2*ns < n-m) then - dy=c(ns+1) + y = ya(ns) + ns = ns - 1 + + do m = 1, ordn - 1 + n_m = ordn - m + do i = 1, n_m + hp = ho(i) + h = ho(i+m) + den_val = hp - h + + if (den_val == 0.0d0) then + write(*,*) 'failure in polint for point',x + write(*,*) 'with input points: ',xa + stop + end if + + den_val = (c(i+1) - d(i)) / den_val + + d(i) = h * den_val + c(i) = hp * den_val + end do + + if (2 * ns < n_m) then + dy = c(ns + 1) else - dy=d(ns) - ns=ns-1 + dy = d(ns) + ns = ns - 1 end if - y=y+dy + y = y + dy end do return - end subroutine polint !------------------------------------------------------------------------------ ! @@ -1177,35 +1178,37 @@ end subroutine d2dump ! !------------------------------------------------------------------------------ subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn) - implicit none -!~~~~~~> Input parameters: integer,intent(in) :: ordn real*8, dimension(1:ordn), intent(in) :: x1a,x2a real*8, dimension(1:ordn,1:ordn), intent(in) :: ya real*8, intent(in) :: x1,x2 real*8, intent(out) :: y,dy -!~~~~~~> Other parameters: - +#ifdef POLINT_LEGACY_ORDER integer :: i,m real*8, dimension(ordn) :: ymtmp real*8, dimension(ordn) :: yntmp m=size(x1a) - do i=1,m - yntmp=ya(i,:) call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) - end do - call polint(x1a,ymtmp,x1,y,dy,ordn) +#else + integer :: j + real*8, dimension(ordn) :: ymtmp + real*8 :: dy_temp + + do j=1,ordn + call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn) + end do + call polint(x2a, ymtmp, x2, y, dy, ordn) +#endif return - end subroutine polin2 !------------------------------------------------------------------------------ ! @@ -1213,18 +1216,15 @@ end subroutine d2dump ! !------------------------------------------------------------------------------ subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn) - implicit none -!~~~~~~> Input parameters: integer,intent(in) :: ordn real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya real*8, intent(in) :: x1,x2,x3 real*8, intent(out) :: y,dy -!~~~~~~> Other parameters: - +#ifdef POLINT_LEGACY_ORDER integer :: i,j,m,n real*8, dimension(ordn,ordn) :: yatmp real*8, dimension(ordn) :: ymtmp @@ -1233,24 +1233,33 @@ end subroutine d2dump m=size(x1a) n=size(x2a) - do i=1,m do j=1,n - yqtmp=ya(i,j,:) call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn) - end do - yntmp=yatmp(i,:) call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn) - end do - call polint(x1a,ymtmp,x1,y,dy,ordn) +#else + integer :: j, k + real*8, dimension(ordn,ordn) :: yatmp + real*8, dimension(ordn) :: ymtmp + real*8 :: dy_temp + + do k=1,ordn + do j=1,ordn + call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn) + end do + end do + do k=1,ordn + call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn) + end do + call polint(x3a, ymtmp, x3, y, dy, ordn) +#endif return - end subroutine polin3 !-------------------------------------------------------------------------------------- ! calculate L2norm