diff --git a/AMSS_NCKU_source/bssn_rhs_opt.f90 b/AMSS_NCKU_source/bssn_rhs_opt.f90 index 87dbdc5..02f2e6c 100644 --- a/AMSS_NCKU_source/bssn_rhs_opt.f90 +++ b/AMSS_NCKU_source/bssn_rhs_opt.f90 @@ -147,6 +147,9 @@ contains real*8 :: alpn1, chin1, gdet, igdet, div_beta real*8 :: Gamxa, Gamya, Gamza, S_val real*8 :: val_gxx, val_gyy, val_gzz + real*8 :: gxy2, gxz2, gyz2 ! For optimized matrix inversion + real*8 :: term_xx1, term_xx2, term_yy1, term_yy2, term_zz1, term_zz2 ! For Christoffel CSE + real*8 :: term_xy, term_xz, term_yz nx = ie - is + 1 ny = je - js + 1 @@ -209,14 +212,21 @@ contains val_gxx*betaxz(ii,jj,kk) + gxy(ig,jg,kg)*betayz(ii,jj,kk) + & gyz(ig,jg,kg)*betayx(ii,jj,kk) + val_gzz*betazx(ii,jj,kk) - gxz(ig,jg,kg)*betayy(ii,jj,kk) - ! Inverse Metric + ! Inverse Metric (Optimization 4: Optimized 3x3 matrix inversion) + ! Precompute squared terms to avoid redundant multiplications + gxy2 = gxy(ig,jg,kg)**2 + gxz2 = gxz(ig,jg,kg)**2 + gyz2 = gyz(ig,jg,kg)**2 + + ! Compute determinant using precomputed squares gdet = val_gxx*val_gyy*val_gzz + TWO*gxy(ig,jg,kg)*gxz(ig,jg,kg)*gyz(ig,jg,kg) - & - val_gxx*gyz(ig,jg,kg)**2 - val_gyy*gxz(ig,jg,kg)**2 - val_gzz*gxy(ig,jg,kg)**2 + val_gxx*gyz2 - val_gyy*gxz2 - val_gzz*gxy2 igdet = ONE / gdet - - gupxx(ii,jj,kk) = (val_gyy*val_gzz - gyz(ig,jg,kg)**2) * igdet - gupyy(ii,jj,kk) = (val_gxx*val_gzz - gxz(ig,jg,kg)**2) * igdet - gupzz(ii,jj,kk) = (val_gxx*val_gyy - gxy(ig,jg,kg)**2) * igdet + + ! Compute cofactors and multiply by inverse determinant + gupxx(ii,jj,kk) = (val_gyy*val_gzz - gyz2) * igdet + gupyy(ii,jj,kk) = (val_gxx*val_gzz - gxz2) * igdet + gupzz(ii,jj,kk) = (val_gxx*val_gyy - gxy2) * igdet gupxy(ii,jj,kk) = (gxz(ig,jg,kg)*gyz(ig,jg,kg) - gxy(ig,jg,kg)*val_gzz) * igdet gupxz(ii,jj,kk) = (gxy(ig,jg,kg)*gyz(ig,jg,kg) - gxz(ig,jg,kg)*val_gyy) * igdet gupyz(ii,jj,kk) = (gxy(ig,jg,kg)*gxz(ig,jg,kg) - gyz(ig,jg,kg)*val_gxx) * igdet @@ -236,34 +246,47 @@ contains end if end do; end do; end do - ! === 3. Christoffel Symbols (Optimization 5) === + ! === 3. Christoffel Symbols (Optimization 5: CSE) === + ! Extract common subexpressions to reduce redundant computation !DIR$ SIMD do kk=1,nz; do jj=1,ny; do ii=1,nx ig=is+ii-1; jg=js+jj-1; kg=ks+kk-1 - - Gamxxx(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxx(ii,jj,kk) + gupxy(ii,jj,kk)*(TWO*gxyx(ii,jj,kk)-gxxy(ii,jj,kk)) + gupxz(ii,jj,kk)*(TWO*gxzx(ii,jj,kk)-gxxz(ii,jj,kk))) - Gamyxx(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxx(ii,jj,kk) + gupyy(ii,jj,kk)*(TWO*gxyx(ii,jj,kk)-gxxy(ii,jj,kk)) + gupyz(ii,jj,kk)*(TWO*gxzx(ii,jj,kk)-gxxz(ii,jj,kk))) - Gamzxx(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxx(ii,jj,kk) + gupyz(ii,jj,kk)*(TWO*gxyx(ii,jj,kk)-gxxy(ii,jj,kk)) + gupzz(ii,jj,kk)*(TWO*gxzx(ii,jj,kk)-gxxz(ii,jj,kk))) - Gamxyy(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*(TWO*gxyy(ii,jj,kk)-gyyx(ii,jj,kk)) + gupxy(ii,jj,kk)*gyyy(ii,jj,kk) + gupxz(ii,jj,kk)*(TWO*gyzy(ii,jj,kk)-gyyz(ii,jj,kk))) - Gamyyy(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*(TWO*gxyy(ii,jj,kk)-gyyx(ii,jj,kk)) + gupyy(ii,jj,kk)*gyyy(ii,jj,kk) + gupyz(ii,jj,kk)*(TWO*gyzy(ii,jj,kk)-gyyz(ii,jj,kk))) - Gamzyy(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*(TWO*gxyy(ii,jj,kk)-gyyx(ii,jj,kk)) + gupyz(ii,jj,kk)*gyyy(ii,jj,kk) + gupzz(ii,jj,kk)*(TWO*gyzy(ii,jj,kk)-gyyz(ii,jj,kk))) + ! Precompute common terms (Optimization 5) + term_xx1 = TWO*gxyx(ii,jj,kk) - gxxy(ii,jj,kk) + term_xx2 = TWO*gxzx(ii,jj,kk) - gxxz(ii,jj,kk) + term_yy1 = TWO*gxyy(ii,jj,kk) - gyyx(ii,jj,kk) + term_yy2 = TWO*gyzy(ii,jj,kk) - gyyz(ii,jj,kk) + term_zz1 = TWO*gxzz(ii,jj,kk) - gzzx(ii,jj,kk) + term_zz2 = TWO*gyzz(ii,jj,kk) - gzzy(ii,jj,kk) + term_xy = gxzy(ii,jj,kk) + gyzx(ii,jj,kk) - gxyz(ii,jj,kk) + term_xz = gxyz(ii,jj,kk) + gyzx(ii,jj,kk) - gxzy(ii,jj,kk) + term_yz = gxyz(ii,jj,kk) + gxzy(ii,jj,kk) - gyzx(ii,jj,kk) - Gamxzz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*(TWO*gxzz(ii,jj,kk)-gzzx(ii,jj,kk)) + gupxy(ii,jj,kk)*(TWO*gyzz(ii,jj,kk)-gzzy(ii,jj,kk)) + gupxz(ii,jj,kk)*gzzz(ii,jj,kk)) - Gamyzz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*(TWO*gxzz(ii,jj,kk)-gzzx(ii,jj,kk)) + gupyy(ii,jj,kk)*(TWO*gyzz(ii,jj,kk)-gzzy(ii,jj,kk)) + gupyz(ii,jj,kk)*gzzz(ii,jj,kk)) - Gamzzz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*(TWO*gxzz(ii,jj,kk)-gzzx(ii,jj,kk)) + gupyz(ii,jj,kk)*(TWO*gyzz(ii,jj,kk)-gzzy(ii,jj,kk)) + gupzz(ii,jj,kk)*gzzz(ii,jj,kk)) + ! Christoffel symbols using precomputed terms + Gamxxx(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxx(ii,jj,kk) + gupxy(ii,jj,kk)*term_xx1 + gupxz(ii,jj,kk)*term_xx2) + Gamyxx(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxx(ii,jj,kk) + gupyy(ii,jj,kk)*term_xx1 + gupyz(ii,jj,kk)*term_xx2) + Gamzxx(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxx(ii,jj,kk) + gupyz(ii,jj,kk)*term_xx1 + gupzz(ii,jj,kk)*term_xx2) - Gamxxy(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxy(ii,jj,kk) + gupxy(ii,jj,kk)*gyyx(ii,jj,kk) + gupxz(ii,jj,kk)*(gxzy(ii,jj,kk)+gyzx(ii,jj,kk)-gxyz(ii,jj,kk))) - Gamyxy(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxy(ii,jj,kk) + gupyy(ii,jj,kk)*gyyx(ii,jj,kk) + gupyz(ii,jj,kk)*(gxzy(ii,jj,kk)+gyzx(ii,jj,kk)-gxyz(ii,jj,kk))) - Gamzxy(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxy(ii,jj,kk) + gupyz(ii,jj,kk)*gyyx(ii,jj,kk) + gupzz(ii,jj,kk)*(gxzy(ii,jj,kk)+gyzx(ii,jj,kk)-gxyz(ii,jj,kk))) + Gamxyy(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*term_yy1 + gupxy(ii,jj,kk)*gyyy(ii,jj,kk) + gupxz(ii,jj,kk)*term_yy2) + Gamyyy(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*term_yy1 + gupyy(ii,jj,kk)*gyyy(ii,jj,kk) + gupyz(ii,jj,kk)*term_yy2) + Gamzyy(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*term_yy1 + gupyz(ii,jj,kk)*gyyy(ii,jj,kk) + gupzz(ii,jj,kk)*term_yy2) - Gamxxz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxz(ii,jj,kk) + gupxy(ii,jj,kk)*(gxyz(ii,jj,kk)+gyzx(ii,jj,kk)-gxzy(ii,jj,kk)) + gupxz(ii,jj,kk)*gzzx(ii,jj,kk)) - Gamyxz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxz(ii,jj,kk) + gupyy(ii,jj,kk)*(gxyz(ii,jj,kk)+gyzx(ii,jj,kk)-gxzy(ii,jj,kk)) + gupyz(ii,jj,kk)*gzzx(ii,jj,kk)) - Gamzxz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxz(ii,jj,kk) + gupyz(ii,jj,kk)*(gxyz(ii,jj,kk)+gyzx(ii,jj,kk)-gxzy(ii,jj,kk)) + gupzz(ii,jj,kk)*gzzx(ii,jj,kk)) + Gamxzz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*term_zz1 + gupxy(ii,jj,kk)*term_zz2 + gupxz(ii,jj,kk)*gzzz(ii,jj,kk)) + Gamyzz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*term_zz1 + gupyy(ii,jj,kk)*term_zz2 + gupyz(ii,jj,kk)*gzzz(ii,jj,kk)) + Gamzzz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*term_zz1 + gupyz(ii,jj,kk)*term_zz2 + gupzz(ii,jj,kk)*gzzz(ii,jj,kk)) - Gamxyz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*(gxyz(ii,jj,kk)+gxzy(ii,jj,kk)-gyzx(ii,jj,kk)) + gupxy(ii,jj,kk)*gyyz(ii,jj,kk) + gupxz(ii,jj,kk)*gzzy(ii,jj,kk)) - Gamyyz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*(gxyz(ii,jj,kk)+gxzy(ii,jj,kk)-gyzx(ii,jj,kk)) + gupyy(ii,jj,kk)*gyyz(ii,jj,kk) + gupyz(ii,jj,kk)*gzzy(ii,jj,kk)) - Gamzyz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*(gxyz(ii,jj,kk)+gxzy(ii,jj,kk)-gyzx(ii,jj,kk)) + gupyz(ii,jj,kk)*gyyz(ii,jj,kk) + gupzz(ii,jj,kk)*gzzy(ii,jj,kk)) + Gamxxy(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxy(ii,jj,kk) + gupxy(ii,jj,kk)*gyyx(ii,jj,kk) + gupxz(ii,jj,kk)*term_xy) + Gamyxy(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxy(ii,jj,kk) + gupyy(ii,jj,kk)*gyyx(ii,jj,kk) + gupyz(ii,jj,kk)*term_xy) + Gamzxy(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxy(ii,jj,kk) + gupyz(ii,jj,kk)*gyyx(ii,jj,kk) + gupzz(ii,jj,kk)*term_xy) + + Gamxxz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*gxxz(ii,jj,kk) + gupxy(ii,jj,kk)*term_xz + gupxz(ii,jj,kk)*gzzx(ii,jj,kk)) + Gamyxz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*gxxz(ii,jj,kk) + gupyy(ii,jj,kk)*term_xz + gupyz(ii,jj,kk)*gzzx(ii,jj,kk)) + Gamzxz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*gxxz(ii,jj,kk) + gupyz(ii,jj,kk)*term_xz + gupzz(ii,jj,kk)*gzzx(ii,jj,kk)) + + Gamxyz(ig,jg,kg) = HALF*(gupxx(ii,jj,kk)*term_yz + gupxy(ii,jj,kk)*gyyz(ii,jj,kk) + gupxz(ii,jj,kk)*gzzy(ii,jj,kk)) + Gamyyz(ig,jg,kg) = HALF*(gupxy(ii,jj,kk)*term_yz + gupyy(ii,jj,kk)*gyyz(ii,jj,kk) + gupyz(ii,jj,kk)*gzzy(ii,jj,kk)) + Gamzyz(ig,jg,kg) = HALF*(gupxz(ii,jj,kk)*term_yz + gupyz(ii,jj,kk)*gyyz(ii,jj,kk) + gupzz(ii,jj,kk)*gzzy(ii,jj,kk)) end do; end do; end do ! === 4. Ricci Tensor (Part 1 - Terms from A_ij) ===