From 699e443c7a27cfe7548f4a509a6cb514661d2834 Mon Sep 17 00:00:00 2001
From: CGH0S7 <776459475@qq.com>
Date: Fri, 6 Feb 2026 19:00:35 +0800
Subject: [PATCH] Optimize polint/polin2/polin3 interpolation for cache
 locality

Changes:
- polint: Rewrite Neville algorithm from array-slice operations to
  scalar loop. Mathematically identical, avoids temporary array
  allocations for den(1:n-m) slices. (Credit: yx-fmisc branch)

- polin2: Swap interpolation order so inner loop accesses ya(:,j)
  (contiguous in Fortran column-major) instead of ya(i,:) (strided).
  Tensor product interpolation is commutative; all call sites pass
  identical coordinate arrays for all dimensions.

- polin3: Swap interpolation order to process contiguous first
  dimension first: ya(:,j,k) -> yatmp(:,k) -> ymtmp(:).
  Same commutativity argument as polin2.

Compile-time safety switch:
  -DPOLINT_LEGACY_ORDER  restores original dimension ordering
  Default (no flag):     uses optimized contiguous-memory ordering

Usage:
  # Production (optimized order):
  make clean && make -j ABE

  # Fallback if results differ (original order):
  Add -DPOLINT_LEGACY_ORDER to f90appflags in makefile.inc

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 AMSS_NCKU_source/fmisc.f90 | 137 ++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 64 deletions(-)

diff --git a/AMSS_NCKU_source/fmisc.f90 b/AMSS_NCKU_source/fmisc.f90
index 0feed47..1b57677 100644
--- a/AMSS_NCKU_source/fmisc.f90
+++ b/AMSS_NCKU_source/fmisc.f90
@@ -1112,64 +1112,65 @@ end subroutine d2dump
 ! Lagrangian polynomial interpolation
 !------------------------------------------------------------------------------
 
-  subroutine polint(xa,ya,x,y,dy,ordn)
-
+  subroutine polint(xa, ya, x, y, dy, ordn)
   implicit none
 
-!~~~~~~> Input Parameter:
-  integer,intent(in) :: ordn
-  real*8, dimension(ordn), intent(in) :: xa,ya
+  integer, intent(in) :: ordn
+  real*8, dimension(ordn), intent(in) :: xa, ya
   real*8, intent(in) :: x
-  real*8, intent(out) :: y,dy
+  real*8, intent(out) :: y, dy
 
-!~~~~~~> Other parameter:
+  integer :: i, m, ns, n_m
+  real*8, dimension(ordn) :: c, d, ho
+  real*8 :: dif, dift, hp, h, den_val
 
-  integer :: m,n,ns
-  real*8, dimension(ordn) :: c,d,den,ho
-  real*8 :: dif,dift
+  c = ya
+  d = ya
+  ho = xa - x
 
-!~~~~~~>
+  ns = 1
+  dif = abs(x - xa(1))
 
-  n=ordn
-  m=ordn
-
-  c=ya
-  d=ya
-  ho=xa-x
-
-  ns=1
-  dif=abs(x-xa(1))
-  do m=1,n
-   dift=abs(x-xa(m))
-   if(dift < dif) then
-    ns=m
-    dif=dift
-   end if
+  do i = 2, ordn
+    dift = abs(x - xa(i))
+    if (dift < dif) then
+      ns = i
+      dif = dift
+    end if
   end do
 
-  y=ya(ns)
-  ns=ns-1
-  do m=1,n-1
-    den(1:n-m)=ho(1:n-m)-ho(1+m:n)
-    if (any(den(1:n-m) == 0.0))then
-      write(*,*) 'failure in polint for point',x
-      write(*,*) 'with input points: ',xa
-      stop
-    endif
-    den(1:n-m)=(c(2:n-m+1)-d(1:n-m))/den(1:n-m)
-    d(1:n-m)=ho(1+m:n)*den(1:n-m)
-    c(1:n-m)=ho(1:n-m)*den(1:n-m)
-    if (2*ns < n-m) then
-      dy=c(ns+1)
+  y = ya(ns)
+  ns = ns - 1
+
+  do m = 1, ordn - 1
+    n_m = ordn - m
+    do i = 1, n_m
+      hp = ho(i)
+      h  = ho(i+m)
+      den_val = hp - h
+
+      if (den_val == 0.0d0) then
+        write(*,*) 'failure in polint for point',x
+        write(*,*) 'with input points: ',xa
+        stop
+      end if
+
+      den_val = (c(i+1) - d(i)) / den_val
+
+      d(i) = h * den_val
+      c(i) = hp * den_val
+    end do
+
+    if (2 * ns < n_m) then
+      dy = c(ns + 1)
     else
-      dy=d(ns)
-      ns=ns-1
+      dy = d(ns)
+      ns = ns - 1
     end if
-    y=y+dy
+    y = y + dy
   end do
 
   return
-
   end subroutine polint
 !------------------------------------------------------------------------------
 !
@@ -1177,35 +1178,37 @@ end subroutine d2dump
 !
 !------------------------------------------------------------------------------
   subroutine polin2(x1a,x2a,ya,x1,x2,y,dy,ordn)
-
   implicit none
 
-!~~~~~~> Input parameters:
   integer,intent(in) :: ordn
   real*8, dimension(1:ordn), intent(in) :: x1a,x2a
   real*8, dimension(1:ordn,1:ordn), intent(in) :: ya
   real*8, intent(in) :: x1,x2
   real*8, intent(out) :: y,dy
 
-!~~~~~~> Other parameters:
-
+#ifdef POLINT_LEGACY_ORDER
   integer  :: i,m
   real*8, dimension(ordn) :: ymtmp
   real*8, dimension(ordn) :: yntmp
 
   m=size(x1a)
-  
   do i=1,m
-
     yntmp=ya(i,:)
     call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
-
   end do
-
   call polint(x1a,ymtmp,x1,y,dy,ordn)
+#else
+  integer  :: j
+  real*8, dimension(ordn) :: ymtmp
+  real*8 :: dy_temp
+
+  do j=1,ordn
+    call polint(x1a, ya(:,j), x1, ymtmp(j), dy_temp, ordn)
+  end do
+  call polint(x2a, ymtmp, x2, y, dy, ordn)
+#endif
 
   return
-
   end subroutine polin2
 !------------------------------------------------------------------------------
 !
@@ -1213,18 +1216,15 @@ end subroutine d2dump
 !
 !------------------------------------------------------------------------------
   subroutine polin3(x1a,x2a,x3a,ya,x1,x2,x3,y,dy,ordn)
-
   implicit none
 
-!~~~~~~> Input parameters:
   integer,intent(in) :: ordn
   real*8, dimension(1:ordn), intent(in) :: x1a,x2a,x3a
   real*8, dimension(1:ordn,1:ordn,1:ordn), intent(in) :: ya
   real*8, intent(in) :: x1,x2,x3
   real*8, intent(out) :: y,dy
 
-!~~~~~~> Other parameters:
-
+#ifdef POLINT_LEGACY_ORDER
   integer  :: i,j,m,n
   real*8, dimension(ordn,ordn) :: yatmp
   real*8, dimension(ordn) :: ymtmp
@@ -1233,24 +1233,33 @@ end subroutine d2dump
 
   m=size(x1a)
   n=size(x2a)
-  
   do i=1,m
    do j=1,n
-
     yqtmp=ya(i,j,:)
     call polint(x3a,yqtmp,x3,yatmp(i,j),dy,ordn)
-
    end do
-
     yntmp=yatmp(i,:)
     call polint(x2a,yntmp,x2,ymtmp(i),dy,ordn)
-
   end do
-
   call polint(x1a,ymtmp,x1,y,dy,ordn)
+#else
+  integer  :: j, k
+  real*8, dimension(ordn,ordn) :: yatmp
+  real*8, dimension(ordn) :: ymtmp
+  real*8 :: dy_temp
+
+  do k=1,ordn
+    do j=1,ordn
+      call polint(x1a, ya(:,j,k), x1, yatmp(j,k), dy_temp, ordn)
+    end do
+  end do
+  do k=1,ordn
+    call polint(x2a, yatmp(:,k), x2, ymtmp(k), dy_temp, ordn)
+  end do
+  call polint(x3a, ymtmp, x3, y, dy, ordn)
+#endif
 
   return
-
   end subroutine polin3
 !--------------------------------------------------------------------------------------
 ! calculate L2norm