Files
AMSS-NCKU/AMSS_NCKU_source/enforce_algebra.f90
CGH0S7 6738854a9d Compiler-level and hot-path optimizations for GW150914
- makefile.inc: add -ipo (interprocedural optimization) and
  -align array64byte (64-byte array alignment for vectorization)
- fmisc.f90: remove redundant funcc=0.d0 zeroing from symmetry_bd,
  symmetry_tbd, symmetry_stbd (~328+ full-array memsets eliminated
  per timestep)
- enforce_algebra.f90: rewrite enforce_ag and enforce_ga as point-wise
  loops, replacing 12 stack-allocated 3D temporary arrays with scalar
  locals for better cache locality

All changes are mathematically equivalent — no algorithmic modifications.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 17:13:39 +08:00

231 lines
7.1 KiB
Fortran

!-----------------------------------------------------------------------------
!
! remove the trace of Aij
! trace-free Aij and enforce the determinant of bssn metric to one
!-----------------------------------------------------------------------------
subroutine enforce_ag(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
Axx, Axy, Axz, Ayy, Ayz, Azz)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3)
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable:
integer :: i,j,k
real*8 :: lgxx,lgyy,lgzz,ldetg
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8 :: ltrA,lscale
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~>
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
lgxx = dxx(i,j,k) + ONE
lgyy = dyy(i,j,k) + ONE
lgzz = dzz(i,j,k) + ONE
ldetg = lgxx * lgyy * lgzz &
+ gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
+ gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
- gxz(i,j,k) * lgyy * gxz(i,j,k) &
- gxy(i,j,k) * gxy(i,j,k) * lgzz &
- lgxx * gyz(i,j,k) * gyz(i,j,k)
lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
lscale = ONE / ( ldetg ** F1o3 )
dxx(i,j,k) = lgxx * lscale - ONE
gxy(i,j,k) = gxy(i,j,k) * lscale
gxz(i,j,k) = gxz(i,j,k) * lscale
dyy(i,j,k) = lgyy * lscale - ONE
gyz(i,j,k) = gyz(i,j,k) * lscale
dzz(i,j,k) = lgzz * lscale - ONE
enddo
enddo
enddo
return
end subroutine enforce_ag
#if 1
!----------------------------------------------------------------------------------
! swap the turn of a and g
!----------------------------------------------------------------------------------
subroutine enforce_ga(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
Axx, Axy, Axz, Ayy, Ayz, Azz)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3)
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable:
integer :: i,j,k
real*8 :: lgxx,lgyy,lgzz,lscale
real*8 :: lgxy,lgxz,lgyz
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
real*8 :: ltrA
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~>
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
! for g: normalize determinant first
lgxx = dxx(i,j,k) + ONE
lgyy = dyy(i,j,k) + ONE
lgzz = dzz(i,j,k) + ONE
lgxy = gxy(i,j,k)
lgxz = gxz(i,j,k)
lgyz = gyz(i,j,k)
lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
+ lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
- lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
lscale = ONE / ( lscale ** F1o3 )
lgxx = lgxx * lscale
lgxy = lgxy * lscale
lgxz = lgxz * lscale
lgyy = lgyy * lscale
lgyz = lgyz * lscale
lgzz = lgzz * lscale
dxx(i,j,k) = lgxx - ONE
gxy(i,j,k) = lgxy
gxz(i,j,k) = lgxz
dyy(i,j,k) = lgyy - ONE
gyz(i,j,k) = lgyz
dzz(i,j,k) = lgzz - ONE
! for A: trace-free using normalized metric (det=1, no division needed)
lgupxx = ( lgyy * lgzz - lgyz * lgyz )
lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
lgupxz = ( lgxy * lgyz - lgyy * lgxz )
lgupyy = ( lgxx * lgzz - lgxz * lgxz )
lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
lgupzz = ( lgxx * lgyy - lgxy * lgxy )
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
+ lgupzz * Azz(i,j,k) &
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
+ lgupyz * Ayz(i,j,k))
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
enddo
enddo
enddo
return
end subroutine enforce_ga
#else
!----------------------------------------------------------------------------------
! duplicate bam
!----------------------------------------------------------------------------------
subroutine enforce_ga(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
Axx, Axy, Axz, Ayy, Ayz, Azz)
implicit none
!~~~~~~> Input parameters:
integer, intent(in) :: ex(1:3)
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
!~~~~~~~> Local variable:
real*8, dimension(ex(1),ex(2),ex(3)) :: trA
real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
real*8, dimension(ex(1),ex(2),ex(3)) :: aux,detginv
real*8, parameter :: oot = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
!~~~~~~>
gxx = dxx + ONE
gyy = dyy + ONE
gzz = dzz + ONE
! for g
aux = (2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
- gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)**(-oot)
gxx = gxx * aux
gxy = gxy * aux
gxz = gxz * aux
gyy = gyy * aux
gyz = gyz * aux
gzz = gzz * aux
dxx = gxx - ONE
dyy = gyy - ONE
dzz = gzz - ONE
! for A
detginv = 1/(2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
- gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)
trA = detginv*(-2.d0*Ayz*gxx*gyz + Axx*gyy*gzz + &
gxx*(Azz*gyy + Ayy*gzz) + 2.d0*(gxz*(Ayz*gxy - Axz*gyy + &
Axy*gyz) + gxy*(Axz*gyz - Axy*gzz)) - Azz*gxy**2 - Ayy*gxz**2 - &
Axx*gyz**2)
aux = -(oot*trA)
Axx = Axx + aux * gxx
Axy = Axy + aux * gxy
Axz = Axz + aux * gxz
Ayy = Ayy + aux * gyy
Ayz = Ayz + aux * gyz
Azz = Azz + aux * gzz
return
end subroutine enforce_ga
#endif