Add !$omp parallel do collapse(2) directives to all triple-loop stencil kernels (fderivs, fdderivs, fdx/fdy/fdz, kodis, lopsided, enforce_ag/enforce_ga) across all ghost_width variants. Add !$omp parallel workshare to RK4/ICN/Euler whole-array update routines. Build system: add -qopenmp to compile and link flags, switch MKL from sequential to threaded (-lmkl_intel_thread -liomp5). Runtime: set OMP_NUM_THREADS=96, OMP_STACKSIZE=16M, OMP_PROC_BIND=close, OMP_PLACES=cores for 96-core server target. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
233 lines
7.4 KiB
Fortran
233 lines
7.4 KiB
Fortran
|
|
!-----------------------------------------------------------------------------
|
|
!
|
|
! remove the trace of Aij
|
|
! trace-free Aij and enforce the determinant of bssn metric to one
|
|
!-----------------------------------------------------------------------------
|
|
|
|
subroutine enforce_ag(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
|
|
Axx, Axy, Axz, Ayy, Ayz, Azz)
|
|
implicit none
|
|
|
|
!~~~~~~> Input parameters:
|
|
|
|
integer, intent(in) :: ex(1:3)
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
|
|
|
|
!~~~~~~~> Local variable:
|
|
|
|
integer :: i,j,k
|
|
real*8 :: lgxx,lgyy,lgzz,ldetg
|
|
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
|
|
real*8 :: ltrA,lscale
|
|
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
|
|
|
|
!~~~~~~>
|
|
|
|
!$omp parallel do collapse(2) private(i,j,k,lgxx,lgyy,lgzz,ldetg,lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz,ltrA,lscale)
|
|
do k=1,ex(3)
|
|
do j=1,ex(2)
|
|
do i=1,ex(1)
|
|
|
|
lgxx = dxx(i,j,k) + ONE
|
|
lgyy = dyy(i,j,k) + ONE
|
|
lgzz = dzz(i,j,k) + ONE
|
|
|
|
ldetg = lgxx * lgyy * lgzz &
|
|
+ gxy(i,j,k) * gyz(i,j,k) * gxz(i,j,k) &
|
|
+ gxz(i,j,k) * gxy(i,j,k) * gyz(i,j,k) &
|
|
- gxz(i,j,k) * lgyy * gxz(i,j,k) &
|
|
- gxy(i,j,k) * gxy(i,j,k) * lgzz &
|
|
- lgxx * gyz(i,j,k) * gyz(i,j,k)
|
|
|
|
lgupxx = ( lgyy * lgzz - gyz(i,j,k) * gyz(i,j,k) ) / ldetg
|
|
lgupxy = - ( gxy(i,j,k) * lgzz - gyz(i,j,k) * gxz(i,j,k) ) / ldetg
|
|
lgupxz = ( gxy(i,j,k) * gyz(i,j,k) - lgyy * gxz(i,j,k) ) / ldetg
|
|
lgupyy = ( lgxx * lgzz - gxz(i,j,k) * gxz(i,j,k) ) / ldetg
|
|
lgupyz = - ( lgxx * gyz(i,j,k) - gxy(i,j,k) * gxz(i,j,k) ) / ldetg
|
|
lgupzz = ( lgxx * lgyy - gxy(i,j,k) * gxy(i,j,k) ) / ldetg
|
|
|
|
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
|
|
+ lgupzz * Azz(i,j,k) &
|
|
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
|
|
+ lgupyz * Ayz(i,j,k))
|
|
|
|
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
|
|
Axy(i,j,k) = Axy(i,j,k) - F1o3 * gxy(i,j,k) * ltrA
|
|
Axz(i,j,k) = Axz(i,j,k) - F1o3 * gxz(i,j,k) * ltrA
|
|
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
|
|
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * gyz(i,j,k) * ltrA
|
|
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
|
|
|
|
lscale = ONE / ( ldetg ** F1o3 )
|
|
|
|
dxx(i,j,k) = lgxx * lscale - ONE
|
|
gxy(i,j,k) = gxy(i,j,k) * lscale
|
|
gxz(i,j,k) = gxz(i,j,k) * lscale
|
|
dyy(i,j,k) = lgyy * lscale - ONE
|
|
gyz(i,j,k) = gyz(i,j,k) * lscale
|
|
dzz(i,j,k) = lgzz * lscale - ONE
|
|
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
return
|
|
|
|
end subroutine enforce_ag
|
|
#if 1
|
|
!----------------------------------------------------------------------------------
|
|
! swap the turn of a and g
|
|
!----------------------------------------------------------------------------------
|
|
subroutine enforce_ga(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
|
|
Axx, Axy, Axz, Ayy, Ayz, Azz)
|
|
implicit none
|
|
|
|
!~~~~~~> Input parameters:
|
|
|
|
integer, intent(in) :: ex(1:3)
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
|
|
|
|
!~~~~~~~> Local variable:
|
|
|
|
integer :: i,j,k
|
|
real*8 :: lgxx,lgyy,lgzz,lscale
|
|
real*8 :: lgxy,lgxz,lgyz
|
|
real*8 :: lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz
|
|
real*8 :: ltrA
|
|
real*8, parameter :: F1o3 = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
|
|
|
|
!~~~~~~>
|
|
|
|
!$omp parallel do collapse(2) private(i,j,k,lgxx,lgyy,lgzz,lscale,lgxy,lgxz,lgyz,lgupxx,lgupxy,lgupxz,lgupyy,lgupyz,lgupzz,ltrA)
|
|
do k=1,ex(3)
|
|
do j=1,ex(2)
|
|
do i=1,ex(1)
|
|
|
|
! for g: normalize determinant first
|
|
lgxx = dxx(i,j,k) + ONE
|
|
lgyy = dyy(i,j,k) + ONE
|
|
lgzz = dzz(i,j,k) + ONE
|
|
lgxy = gxy(i,j,k)
|
|
lgxz = gxz(i,j,k)
|
|
lgyz = gyz(i,j,k)
|
|
|
|
lscale = lgxx * lgyy * lgzz + lgxy * lgyz * lgxz &
|
|
+ lgxz * lgxy * lgyz - lgxz * lgyy * lgxz &
|
|
- lgxy * lgxy * lgzz - lgxx * lgyz * lgyz
|
|
|
|
lscale = ONE / ( lscale ** F1o3 )
|
|
|
|
lgxx = lgxx * lscale
|
|
lgxy = lgxy * lscale
|
|
lgxz = lgxz * lscale
|
|
lgyy = lgyy * lscale
|
|
lgyz = lgyz * lscale
|
|
lgzz = lgzz * lscale
|
|
|
|
dxx(i,j,k) = lgxx - ONE
|
|
gxy(i,j,k) = lgxy
|
|
gxz(i,j,k) = lgxz
|
|
dyy(i,j,k) = lgyy - ONE
|
|
gyz(i,j,k) = lgyz
|
|
dzz(i,j,k) = lgzz - ONE
|
|
|
|
! for A: trace-free using normalized metric (det=1, no division needed)
|
|
lgupxx = ( lgyy * lgzz - lgyz * lgyz )
|
|
lgupxy = - ( lgxy * lgzz - lgyz * lgxz )
|
|
lgupxz = ( lgxy * lgyz - lgyy * lgxz )
|
|
lgupyy = ( lgxx * lgzz - lgxz * lgxz )
|
|
lgupyz = - ( lgxx * lgyz - lgxy * lgxz )
|
|
lgupzz = ( lgxx * lgyy - lgxy * lgxy )
|
|
|
|
ltrA = lgupxx * Axx(i,j,k) + lgupyy * Ayy(i,j,k) &
|
|
+ lgupzz * Azz(i,j,k) &
|
|
+ TWO * (lgupxy * Axy(i,j,k) + lgupxz * Axz(i,j,k) &
|
|
+ lgupyz * Ayz(i,j,k))
|
|
|
|
Axx(i,j,k) = Axx(i,j,k) - F1o3 * lgxx * ltrA
|
|
Axy(i,j,k) = Axy(i,j,k) - F1o3 * lgxy * ltrA
|
|
Axz(i,j,k) = Axz(i,j,k) - F1o3 * lgxz * ltrA
|
|
Ayy(i,j,k) = Ayy(i,j,k) - F1o3 * lgyy * ltrA
|
|
Ayz(i,j,k) = Ayz(i,j,k) - F1o3 * lgyz * ltrA
|
|
Azz(i,j,k) = Azz(i,j,k) - F1o3 * lgzz * ltrA
|
|
|
|
enddo
|
|
enddo
|
|
enddo
|
|
|
|
return
|
|
|
|
end subroutine enforce_ga
|
|
#else
|
|
!----------------------------------------------------------------------------------
|
|
! duplicate bam
|
|
!----------------------------------------------------------------------------------
|
|
subroutine enforce_ga(ex, dxx, gxy, gxz, dyy, gyz, dzz, &
|
|
Axx, Axy, Axz, Ayy, Ayz, Azz)
|
|
implicit none
|
|
|
|
!~~~~~~> Input parameters:
|
|
|
|
integer, intent(in) :: ex(1:3)
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: dxx,dyy,dzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: gxy,gxz,gyz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Axx,Axy,Axz
|
|
real*8, dimension(ex(1),ex(2),ex(3)), intent(inout) :: Ayy,Ayz,Azz
|
|
|
|
!~~~~~~~> Local variable:
|
|
|
|
real*8, dimension(ex(1),ex(2),ex(3)) :: trA
|
|
real*8, dimension(ex(1),ex(2),ex(3)) :: gxx,gyy,gzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)) :: aux,detginv
|
|
real*8, parameter :: oot = 1.D0 / 3.D0, ONE = 1.D0, TWO = 2.D0
|
|
|
|
!~~~~~~>
|
|
|
|
gxx = dxx + ONE
|
|
gyy = dyy + ONE
|
|
gzz = dzz + ONE
|
|
! for g
|
|
aux = (2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
|
|
- gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)**(-oot)
|
|
|
|
gxx = gxx * aux
|
|
gxy = gxy * aux
|
|
gxz = gxz * aux
|
|
gyy = gyy * aux
|
|
gyz = gyz * aux
|
|
gzz = gzz * aux
|
|
|
|
dxx = gxx - ONE
|
|
dyy = gyy - ONE
|
|
dzz = gzz - ONE
|
|
! for A
|
|
|
|
detginv = 1/(2.d0*gxy*gxz*gyz + gxx*gyy*gzz &
|
|
- gzz*gxy**2 - gyy*gxz**2 - gxx*gyz**2)
|
|
|
|
trA = detginv*(-2.d0*Ayz*gxx*gyz + Axx*gyy*gzz + &
|
|
gxx*(Azz*gyy + Ayy*gzz) + 2.d0*(gxz*(Ayz*gxy - Axz*gyy + &
|
|
Axy*gyz) + gxy*(Axz*gyz - Axy*gzz)) - Azz*gxy**2 - Ayy*gxz**2 - &
|
|
Axx*gyz**2)
|
|
|
|
aux = -(oot*trA)
|
|
|
|
Axx = Axx + aux * gxx
|
|
Axy = Axy + aux * gxy
|
|
Axz = Axz + aux * gxz
|
|
Ayy = Ayy + aux * gyy
|
|
Ayz = Ayz + aux * gyz
|
|
Azz = Azz + aux * gzz
|
|
|
|
return
|
|
|
|
end subroutine enforce_ga
|
|
#endif
|