Optimize BSSN RHS and finite difference calculations
- Integrate Intel oneMKL VML for efficient Gauge calculation in bssn_rhs.f90 - Refactor fderivs in diff_new.f90 to separate bulk/boundary loops for better vectorization - Add optimization report in docs/optimization_report.md
This commit is contained in:
@@ -24,6 +24,9 @@
|
||||
Gmx_Res, Gmy_Res, Gmz_Res, &
|
||||
Symmetry,Lev,eps,co) result(gont)
|
||||
! calculate constraint violation when co=0
|
||||
#if (GAUGE == 6 || GAUGE == 7)
|
||||
use mkl_vml
|
||||
#endif
|
||||
implicit none
|
||||
|
||||
!~~~~~~> Input parameters:
|
||||
@@ -97,11 +100,13 @@
|
||||
#endif
|
||||
|
||||
#if (GAUGE == 6 || GAUGE == 7)
|
||||
integer :: BHN,i,j,k
|
||||
integer :: BHN,i,j,k, idx, total_points
|
||||
real*8, dimension(9) :: Porg
|
||||
real*8, dimension(3) :: Mass
|
||||
real*8 :: r1,r2,M,A,w1,w2,C1,C2
|
||||
real*8 :: r1,r2,M,A,w1,w2,C1,C2,denom_r
|
||||
real*8, dimension(ex(1),ex(2),ex(3)) :: reta
|
||||
real*8, allocatable :: vml_r1(:), vml_r2(:)
|
||||
real*8, allocatable :: vml_res1(:), vml_res2(:)
|
||||
|
||||
call getpbh(BHN,Porg,Mass)
|
||||
#endif
|
||||
@@ -862,17 +867,41 @@
|
||||
C1 = 1.d0/Mass(1) - A
|
||||
C2 = 1.d0/Mass(2) - A
|
||||
|
||||
denom_r = ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
total_points = ex(1)*ex(2)*ex(3)
|
||||
allocate(vml_r1(total_points), vml_r2(total_points))
|
||||
allocate(vml_res1(total_points), vml_res2(total_points))
|
||||
|
||||
idx = 0
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/ &
|
||||
((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/ &
|
||||
((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
reta(i,j,k) = A + C1/(ONE+w1*r1) + C2/(ONE+w2*r2)
|
||||
idx = idx + 1
|
||||
r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/denom_r
|
||||
r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/denom_r
|
||||
|
||||
! Prepare for 1/(1+w*r) -> vdInv
|
||||
vml_r1(idx) = ONE + w1*r1
|
||||
vml_r2(idx) = ONE + w2*r2
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
call vdInv(total_points, vml_r1, vml_res1)
|
||||
call vdInv(total_points, vml_r2, vml_res2)
|
||||
|
||||
idx = 0
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
idx = idx + 1
|
||||
reta(i,j,k) = A + C1*vml_res1(idx) + C2*vml_res2(idx)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(vml_r1, vml_r2, vml_res1, vml_res2)
|
||||
|
||||
else
|
||||
write(*,*) "not support BH_num in Jason's form 1",BHN
|
||||
endif
|
||||
@@ -892,17 +921,41 @@
|
||||
C1 = 1.d0/Mass(1) - A
|
||||
C2 = 1.d0/Mass(2) - A
|
||||
|
||||
denom_r = ((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
total_points = ex(1)*ex(2)*ex(3)
|
||||
allocate(vml_r1(total_points), vml_r2(total_points))
|
||||
allocate(vml_res1(total_points), vml_res2(total_points))
|
||||
|
||||
idx = 0
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/ &
|
||||
((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/ &
|
||||
((Porg(1)-Porg(4))**2+(Porg(2)-Porg(5))**2+(Porg(3)-Porg(6))**2)
|
||||
reta(i,j,k) = A + C1*dexp(-w1*r1) + C2*dexp(-w2*r2)
|
||||
idx = idx + 1
|
||||
r1 = ((Porg(1)-X(i))**2+(Porg(2)-Y(j))**2+(Porg(3)-Z(k))**2)/denom_r
|
||||
r2 = ((Porg(4)-X(i))**2+(Porg(5)-Y(j))**2+(Porg(6)-Z(k))**2)/denom_r
|
||||
|
||||
! Prepare for dexp -> vdExp
|
||||
vml_r1(idx) = -w1*r1
|
||||
vml_r2(idx) = -w2*r2
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
call vdExp(total_points, vml_r1, vml_res1)
|
||||
call vdExp(total_points, vml_r2, vml_res2)
|
||||
|
||||
idx = 0
|
||||
do k=1,ex(3)
|
||||
do j=1,ex(2)
|
||||
do i=1,ex(1)
|
||||
idx = idx + 1
|
||||
reta(i,j,k) = A + C1*vml_res1(idx) + C2*vml_res2(idx)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(vml_r1, vml_r2, vml_res1, vml_res2)
|
||||
|
||||
else
|
||||
write(*,*) "not support BH_num in Jason's form 2",BHN
|
||||
endif
|
||||
|
||||
Reference in New Issue
Block a user