Files
AMSS-NCKU/AMSS_NCKU_source/bssn_rhs.f90
CGH0S7 4472d89a9f Optimize bssn_rhs calculation with cache blocking and vectorization
- Implemented cache blocking (BLK=8) in bssn_rhs_opt.f90 to improve L1/L2 cache hit rate.
- Introduced bssn_rhs_opt.f90 module with vectorized derivative and physics kernels.
- Renamed original implementation to bssn_rhs_legacy.f90 for fallback.
- Updated bssn_rhs.f90 to act as a dispatcher, using the optimized path for ghost_width=3.
- Updated makefile to include new source files.
- Added DEBUG_NAN_CHECK macro to optionally disable NaN checks in production.
2026-01-19 16:39:24 +08:00

118 lines
7.6 KiB
Fortran

#include "macrodef.fh"
! Wrapper function to select implementation based on ghost_width
function compute_rhs_bssn(ex, T,X, Y, Z, &
chi , trK , &
dxx , gxy , gxz , dyy , gyz , dzz, &
Axx , Axy , Axz , Ayy , Ayz , Azz, &
Gamx , Gamy , Gamz , &
Lap , betax , betay , betaz , &
dtSfx , dtSfy , dtSfz , &
chi_rhs, trK_rhs, &
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
ham_Res, movx_Res, movy_Res, movz_Res, &
Gmx_Res, Gmy_Res, Gmz_Res, &
Symmetry,Lev,eps,co) result(gont)
! Use the optimization module
use bssn_rhs_opt_mod
implicit none
! Arguments match original interface exactly
integer,intent(in ):: ex(1:3), Symmetry,Lev,co
real*8, intent(in ):: T
real*8, intent(in ):: X(1:ex(1)),Y(1:ex(2)),Z(1:ex(3))
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: chi,dxx,dyy,dzz
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: trK
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: gxy,gxz,gyz
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Axx,Axy,Axz,Ayy,Ayz,Azz
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Gamx,Gamy,Gamz
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Lap, betax, betay, betaz
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: dtSfx, dtSfy, dtSfz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: chi_rhs,trK_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: gxx_rhs,gxy_rhs,gxz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: gyy_rhs,gyz_rhs,gzz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Axx_rhs,Axy_rhs,Axz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Ayy_rhs,Ayz_rhs,Azz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamx_rhs,Gamy_rhs,Gamz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Lap_rhs, betax_rhs, betay_rhs, betaz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: dtSfx_rhs,dtSfy_rhs,dtSfz_rhs
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: rho,Sx,Sy,Sz
real*8, dimension(ex(1),ex(2),ex(3)),intent(in ) :: Sxx,Sxy,Sxz,Syy,Syz,Szz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxxx, Gamxxy, Gamxxz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamxyy, Gamxyz, Gamxzz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyxx, Gamyxy, Gamyxz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamyyy, Gamyyz, Gamyzz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzxx, Gamzxy, Gamzxz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Gamzyy, Gamzyz, Gamzzz
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) :: Rxx,Rxy,Rxz,Ryy,Ryz,Rzz
real*8,intent(in) :: eps
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: ham_Res, movx_Res, movy_Res, movz_Res
real*8, dimension(ex(1),ex(2),ex(3)),intent(inout) :: Gmx_Res, Gmy_Res, Gmz_Res
integer::gont
! Declare legacy function
integer, external :: compute_rhs_bssn_legacy
! Note: Optimization is currently DISABLED (falling back to legacy)
! until the kernel in bssn_rhs_opt_mod is fully populated with BSSN physics.
#if (ghost_width == 3)
! Optimized Blocked Implementation for 4th order
call compute_rhs_bssn_opt(ex, T,X, Y, Z, &
chi , trK , &
dxx , gxy , gxz , dyy , gyz , dzz, &
Axx , Axy , Axz , Ayy , Ayz , Azz, &
Gamx , Gamy , Gamz , &
Lap , betax , betay , betaz , &
dtSfx , dtSfy , dtSfz , &
chi_rhs, trK_rhs, &
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
ham_Res, movx_Res, movy_Res, movz_Res, &
Gmx_Res, Gmy_Res, Gmz_Res, &
Symmetry,Lev,eps,co, gont)
#else
! Legacy Implementation
gont = compute_rhs_bssn_legacy(ex, T,X, Y, Z, &
chi , trK , &
dxx , gxy , gxz , dyy , gyz , dzz, &
Axx , Axy , Axz , Ayy , Ayz , Azz, &
Gamx , Gamy , Gamz , &
Lap , betax , betay , betaz , &
dtSfx , dtSfy , dtSfz , &
chi_rhs, trK_rhs, &
gxx_rhs, gxy_rhs, gxz_rhs, gyy_rhs, gyz_rhs, gzz_rhs, &
Axx_rhs, Axy_rhs, Axz_rhs, Ayy_rhs, Ayz_rhs, Azz_rhs, &
Gamx_rhs, Gamy_rhs, Gamz_rhs, &
Lap_rhs, betax_rhs, betay_rhs, betaz_rhs, &
dtSfx_rhs, dtSfy_rhs, dtSfz_rhs, &
rho,Sx,Sy,Sz,Sxx,Sxy,Sxz,Syy,Syz,Szz, &
Gamxxx,Gamxxy,Gamxxz,Gamxyy,Gamxyz,Gamxzz, &
Gamyxx,Gamyxy,Gamyxz,Gamyyy,Gamyyz,Gamyzz, &
Gamzxx,Gamzxy,Gamzxz,Gamzyy,Gamzyz,Gamzzz, &
Rxx,Rxy,Rxz,Ryy,Ryz,Rzz, &
ham_Res, movx_Res, movy_Res, movz_Res, &
Gmx_Res, Gmy_Res, Gmz_Res, &
Symmetry,Lev,eps,co)
#endif
end function compute_rhs_bssn