Optimize BSSN RHS and finite difference calculations
- Integrate Intel oneMKL VML for efficient Gauge calculation in bssn_rhs.f90 - Refactor fderivs in diff_new.f90 to separate bulk/boundary loops for better vectorization - Add optimization report in docs/optimization_report.md
This commit is contained in:
@@ -962,6 +962,7 @@
|
||||
real*8,dimension(-1:ex(1),-1:ex(2),-1:ex(3)) :: fh
|
||||
real*8, dimension(3) :: SoA
|
||||
integer :: imin,jmin,kmin,imax,jmax,kmax,i,j,k
|
||||
integer :: ib_s, ib_e, jb_s, jb_e, kb_s, kb_e
|
||||
real*8 :: d12dx,d12dy,d12dz,d2dx,d2dy,d2dz
|
||||
integer, parameter :: NO_SYMM = 0, EQ_SYMM = 1, OCTANT = 2
|
||||
real*8, parameter :: ZEO=0.d0,ONE=1.d0, F60=6.d1
|
||||
@@ -1001,109 +1002,66 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
ib_s = max(1, imin + 2)
|
||||
ib_e = min(ex(1)-1, imax - 2)
|
||||
jb_s = max(1, jmin + 2)
|
||||
jb_e = min(ex(2)-1, jmax - 2)
|
||||
kb_s = max(1, kmin + 2)
|
||||
kb_e = min(ex(3)-1, kmax - 2)
|
||||
|
||||
! Intel compiler directives for aggressive vectorization
|
||||
!DIR$ SIMD
|
||||
!DIR$ IVDEP
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
!DIR$ SIMD
|
||||
do i=1,ex(1)-1
|
||||
#if 0
|
||||
! x direction
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||
! fx(i) = ---------------------------------------------
|
||||
! 12 dx
|
||||
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
|
||||
! Check if (j, k) are within the safe 4th-order range
|
||||
if (k >= kb_s .and. k <= kb_e .and. j >= jb_s .and. j <= jb_e) then
|
||||
|
||||
! 1. Left Boundary Peel
|
||||
do i=1, min(ex(1)-1, ib_s-1)
|
||||
if(i+1 <= imax .and. i-1 >= imin .and. &
|
||||
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||
k+1 <= kmax .and. k-1 >= kmin) then
|
||||
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||
endif
|
||||
enddo
|
||||
|
||||
elseif(i+1 <= imax .and. i-1 >= imin)then
|
||||
!
|
||||
! - f(i-1) + f(i+1)
|
||||
! fx(i) = --------------------------------
|
||||
! 2 dx
|
||||
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||
! 2. BULK CORE (No branches, full SIMD)
|
||||
!DIR$ SIMD
|
||||
do i=ib_s, ib_e
|
||||
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
enddo
|
||||
|
||||
! set imax and imin 0
|
||||
! 3. Right Boundary Peel
|
||||
do i=max(1, ib_e+1), ex(1)-1
|
||||
if(i+1 <= imax .and. i-1 >= imin .and. &
|
||||
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||
k+1 <= kmax .and. k-1 >= kmin) then
|
||||
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||
endif
|
||||
enddo
|
||||
|
||||
else
|
||||
! We are in a boundary slab (j or k is near edge)
|
||||
! Fallback to standard loop with checks (max 2nd order possible here)
|
||||
do i=1,ex(1)-1
|
||||
if(i+1 <= imax .and. i-1 >= imin .and. &
|
||||
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||
k+1 <= kmax .and. k-1 >= kmin) then
|
||||
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
! y direction
|
||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||
|
||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
|
||||
elseif(j+1 <= jmax .and. j-1 >= jmin)then
|
||||
|
||||
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||
|
||||
! set jmax and jmin 0
|
||||
endif
|
||||
! z direction
|
||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||
|
||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
|
||||
elseif(k+1 <= kmax .and. k-1 >= kmin)then
|
||||
|
||||
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||
|
||||
! set kmax and kmin 0
|
||||
endif
|
||||
#elif 0
|
||||
! x direction
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||
! fx(i) = ---------------------------------------------
|
||||
! 12 dx
|
||||
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
|
||||
elseif(i+3 <= imax .and. i-1 >= imin)then
|
||||
fx(i,j,k)=d12dx*(-3.d0*fh(i-1,j,k)-1.d1*fh(i,j,k)+1.8d1*fh(i+1,j,k)-6.d0*fh(i+2,j,k)+fh(i+3,j,k))
|
||||
elseif(i+1 <= imax .and. i-3 >= imin)then
|
||||
fx(i,j,k)=d12dx*( 3.d0*fh(i+1,j,k)+1.d1*fh(i,j,k)-1.8d1*fh(i-1,j,k)+6.d0*fh(i-2,j,k)-fh(i-3,j,k))
|
||||
! set imax and imin 0
|
||||
endif
|
||||
! y direction
|
||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||
|
||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
|
||||
elseif(j+3 <= jmax .and. j-1 >= jmin)then
|
||||
fy(i,j,k)=d12dy*(-3.d0*fh(i,j-1,k)-1.d1*fh(i,j,k)+1.8d1*fh(i,j+1,k)-6.d0*fh(i,j+2,k)+fh(i,j+3,k))
|
||||
elseif(j+1 <= jmax .and. j-3 >= jmin)then
|
||||
fy(i,j,k)=d12dy*( 3.d0*fh(i,j+1,k)+1.d1*fh(i,j,k)-1.8d1*fh(i,j-1,k)+6.d0*fh(i,j-2,k)-fh(i,j-3,k))
|
||||
|
||||
! set jmax and jmin 0
|
||||
endif
|
||||
! z direction
|
||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||
|
||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
|
||||
elseif(k+3 <= kmax .and. k-1 >= kmin)then
|
||||
fz(i,j,k)=d12dz*(-3.d0*fh(i,j,k-1)-1.d1*fh(i,j,k)+1.8d1*fh(i,j,k+1)-6.d0*fh(i,j,k+2)+fh(i,j,k+3))
|
||||
elseif(k+1 <= kmax .and. k-3 >= kmin)then
|
||||
fz(i,j,k)=d12dz*( 3.d0*fh(i,j,k+1)+1.d1*fh(i,j,k)-1.8d1*fh(i,j,k-1)+6.d0*fh(i,j,k-2)-fh(i,j,k-3))
|
||||
|
||||
! set kmax and kmin 0
|
||||
endif
|
||||
#else
|
||||
! for bam comparison
|
||||
if(i+2 <= imax .and. i-2 >= imin .and. &
|
||||
j+2 <= jmax .and. j-2 >= jmin .and. &
|
||||
k+2 <= kmax .and. k-2 >= kmin) then
|
||||
fx(i,j,k)=d12dx*(fh(i-2,j,k)-EIT*fh(i-1,j,k)+EIT*fh(i+1,j,k)-fh(i+2,j,k))
|
||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
elseif(i+1 <= imax .and. i-1 >= imin .and. &
|
||||
j+1 <= jmax .and. j-1 >= jmin .and. &
|
||||
k+1 <= kmax .and. k-1 >= kmin) then
|
||||
fx(i,j,k)=d2dx*(-fh(i-1,j,k)+fh(i+1,j,k))
|
||||
fy(i,j,k)=d2dy*(-fh(i,j-1,k)+fh(i,j+1,k))
|
||||
fz(i,j,k)=d2dz*(-fh(i,j,k-1)+fh(i,j,k+1))
|
||||
endif
|
||||
#endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
||||
Reference in New Issue
Block a user