- Implemented cache blocking (BLK=8) in bssn_rhs_opt.f90 to improve L1/L2 cache hit rate. - Introduced bssn_rhs_opt.f90 module with vectorized derivative and physics kernels. - Renamed original implementation to bssn_rhs_legacy.f90 for fallback. - Updated bssn_rhs.f90 to act as a dispatcher, using the optimized path for ghost_width=3. - Updated makefile to include new source files. - Added DEBUG_NAN_CHECK macro to optionally disable NaN checks in production.
118 lines
7.6 KiB
Fortran
118 lines
7.6 KiB
Fortran
|
|
#include "macrodef.fh"
|
|
|
|
! Wrapper function to select implementation based on ghost_width
|
|
function compute_rhs_bssn(ex, T,X, Y, Z, &
|
|
chi , trK , &
|
|
dxx , gxy , gxz , dyy , gyz , dzz, &
|
|
Axx , Axy , Axz , Ayy , Ayz , Azz, &
|
|
Gamx , Gamy , Gamz , &
|
|
Lap , betax , betay , betaz , &
|
|
dtSfx , dtSfy , dtSfz , &
|
|
chi_rhs, trK_rhs, &
|
|
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
|
|
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
|
|
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
|
|
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
|
|
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
|
|
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
|
|
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
|
|
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
|
|
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
|
|
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
|
ham_Res, movx_Res, movy_Res, movz_Res, &
|
|
Gmx_Res, Gmy_Res, Gmz_Res, &
|
|
Symmetry,Lev,eps,co) result(gont)
|
|
|
|
! Use the optimization module
|
|
use bssn_rhs_opt_mod
|
|
|
|
implicit none
|
|
! Arguments match original interface exactly
|
|
integer,intent(in ):: ex(1:3), Symmetry,Lev,co
|
|
real*8, intent(in ):: T
|
|
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: chi,dxx,dyy,dzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: trK
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: gxy,gxz,gyz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Lap, betax, betay, betaz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dtSfx, dtSfy, dtSfz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: chi_rhs,trK_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: gxx_rhs,gxy_rhs,gxz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: gyy_rhs,gyz_rhs,gzz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Axx_rhs,Axy_rhs,Axz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Ayy_rhs,Ayz_rhs,Azz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamx_rhs,Gamy_rhs,Gamz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Lap_rhs, betax_rhs, betay_rhs, betaz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: dtSfx_rhs,dtSfy_rhs,dtSfz_rhs
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Sxx,Sxy,Sxz,Syy,Syz,Szz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxxx, Gamxxy, Gamxxz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxyy, Gamxyz, Gamxzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyxx, Gamyxy, Gamyxz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyyy, Gamyyz, Gamyzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzxx, Gamzxy, Gamzxz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzyy, Gamzyz, Gamzzz
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
|
|
real*8,intent(in) :: eps
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res
|
|
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
|
|
integer::gont
|
|
|
|
! Declare legacy function
|
|
integer, external :: compute_rhs_bssn_legacy
|
|
|
|
! Note: Optimization is currently DISABLED (falling back to legacy)
|
|
! until the kernel in bssn_rhs_opt_mod is fully populated with BSSN physics.
|
|
#if (ghost_width == 3)
|
|
! Optimized Blocked Implementation for 4th order
|
|
call compute_rhs_bssn_opt(ex, T,X, Y, Z, &
|
|
chi , trK , &
|
|
dxx , gxy , gxz , dyy , gyz , dzz, &
|
|
Axx , Axy , Axz , Ayy , Ayz , Azz, &
|
|
Gamx , Gamy , Gamz , &
|
|
Lap , betax , betay , betaz , &
|
|
dtSfx , dtSfy , dtSfz , &
|
|
chi_rhs, trK_rhs, &
|
|
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
|
|
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
|
|
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
|
|
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
|
|
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
|
|
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
|
|
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
|
|
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
|
|
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
|
|
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
|
ham_Res, movx_Res, movy_Res, movz_Res, &
|
|
Gmx_Res, Gmy_Res, Gmz_Res, &
|
|
Symmetry,Lev,eps,co, gont)
|
|
#else
|
|
! Legacy Implementation
|
|
gont = compute_rhs_bssn_legacy(ex, T,X, Y, Z, &
|
|
chi , trK , &
|
|
dxx , gxy , gxz , dyy , gyz , dzz, &
|
|
Axx , Axy , Axz , Ayy , Ayz , Azz, &
|
|
Gamx , Gamy , Gamz , &
|
|
Lap , betax , betay , betaz , &
|
|
dtSfx , dtSfy , dtSfz , &
|
|
chi_rhs, trK_rhs, &
|
|
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
|
|
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
|
|
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
|
|
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
|
|
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
|
|
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
|
|
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
|
|
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
|
|
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
|
|
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
|
|
ham_Res, movx_Res, movy_Res, movz_Res, &
|
|
Gmx_Res, Gmy_Res, Gmz_Res, &
|
|
Symmetry,Lev,eps,co)
|
|
#endif
|
|
|
|
end function compute_rhs_bssn
|