Compare commits
5 Commits
cjy-oneapi
...
cjy-oneapi
| Author | SHA1 | Date | |
|---|---|---|---|
| ed89bc029b | |||
| 19274e93d1 | |||
| ae1a474cca | |||
| cbb8fb3a87 | |||
| 4472d89a9f |
File diff suppressed because it is too large
Load Diff
1188
AMSS_NCKU_source/bssn_rhs_legacy.f90
Normal file
1188
AMSS_NCKU_source/bssn_rhs_legacy.f90
Normal file
File diff suppressed because it is too large
Load Diff
1125
AMSS_NCKU_source/bssn_rhs_opt.f90
Normal file
1125
AMSS_NCKU_source/bssn_rhs_opt.f90
Normal file
File diff suppressed because it is too large
Load Diff
@@ -997,11 +997,10 @@
|
|||||||
fy = ZEO
|
fy = ZEO
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
#if 0
|
#if 0
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
@@ -1152,11 +1151,10 @@
|
|||||||
|
|
||||||
fx = ZEO
|
fx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||||
@@ -1229,11 +1227,10 @@
|
|||||||
|
|
||||||
fy = ZEO
|
fy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! y direction
|
! y direction
|
||||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
|
||||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
@@ -1300,11 +1297,10 @@
|
|||||||
|
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
! z direction
|
! z direction
|
||||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
|
||||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
@@ -1405,11 +1401,10 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
#if 0
|
#if 0
|
||||||
!~~~~~~ fxx
|
!~~~~~~ fxx
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
@@ -1581,7 +1576,6 @@
|
|||||||
|
|
||||||
fxx = ZEO
|
fxx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
@@ -1649,7 +1643,6 @@
|
|||||||
|
|
||||||
fyy = ZEO
|
fyy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
@@ -1719,7 +1712,6 @@
|
|||||||
|
|
||||||
fzz = ZEO
|
fzz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
@@ -1789,7 +1781,6 @@
|
|||||||
|
|
||||||
fxy = ZEO
|
fxy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
@@ -1860,7 +1851,6 @@
|
|||||||
|
|
||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
@@ -1929,7 +1919,6 @@
|
|||||||
|
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
|
|||||||
@@ -1019,11 +1019,10 @@
|
|||||||
fy = ZEO
|
fy = ZEO
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||||
@@ -1135,11 +1134,10 @@
|
|||||||
|
|
||||||
fx = ZEO
|
fx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||||
@@ -1229,11 +1227,10 @@
|
|||||||
|
|
||||||
fy = ZEO
|
fy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! y direction
|
! y direction
|
||||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
|
||||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
@@ -1317,11 +1314,10 @@
|
|||||||
|
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! z direction
|
! z direction
|
||||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
|
||||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
@@ -1434,7 +1430,6 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1585,7 +1580,6 @@
|
|||||||
|
|
||||||
fxx = ZEO
|
fxx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1665,7 +1659,6 @@
|
|||||||
|
|
||||||
fyy = ZEO
|
fyy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1747,7 +1740,6 @@
|
|||||||
|
|
||||||
fzz = ZEO
|
fzz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1829,7 +1821,6 @@
|
|||||||
|
|
||||||
fxy = ZEO
|
fxy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1912,7 +1903,6 @@
|
|||||||
|
|
||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1993,7 +1983,6 @@
|
|||||||
|
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|||||||
@@ -1186,11 +1186,10 @@
|
|||||||
fy = ZEO
|
fy = ZEO
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||||
@@ -1301,11 +1300,10 @@
|
|||||||
|
|
||||||
fx = ZEO
|
fx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! x direction
|
! x direction
|
||||||
if(i+2 <= imax .and. i-2 >= imin)then
|
if(i+2 <= imax .and. i-2 >= imin)then
|
||||||
!
|
!
|
||||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||||
@@ -1383,11 +1381,10 @@
|
|||||||
|
|
||||||
fy = ZEO
|
fy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! y direction
|
! y direction
|
||||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||||
|
|
||||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||||
@@ -1459,11 +1456,10 @@
|
|||||||
|
|
||||||
fz = ZEO
|
fz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
! z direction
|
! z direction
|
||||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||||
|
|
||||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||||
@@ -1569,7 +1565,6 @@
|
|||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1786,7 +1781,6 @@
|
|||||||
|
|
||||||
fxx = ZEO
|
fxx = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1862,7 +1856,6 @@
|
|||||||
|
|
||||||
fyy = ZEO
|
fyy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -1940,7 +1933,6 @@
|
|||||||
|
|
||||||
fzz = ZEO
|
fzz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -2018,7 +2010,6 @@
|
|||||||
|
|
||||||
fxy = ZEO
|
fxy = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -2107,7 +2098,6 @@
|
|||||||
|
|
||||||
fxz = ZEO
|
fxz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
@@ -2194,7 +2184,6 @@
|
|||||||
|
|
||||||
fyz = ZEO
|
fyz = ZEO
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|||||||
@@ -159,7 +159,6 @@ integer, parameter :: NO_SYMM=0, OCTANT=2
|
|||||||
|
|
||||||
call symmetry_bd(3,ex,f,fh,SoA)
|
call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|||||||
@@ -369,12 +369,11 @@ integer, parameter :: NO_SYMM=0, EQ_SYMM=1, OCTANT=2
|
|||||||
|
|
||||||
call symmetry_stbd(3,ex,f,fh,SoA)
|
call symmetry_stbd(3,ex,f,fh,SoA)
|
||||||
|
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)
|
do k=1,ex(3)
|
||||||
do j=1,ex(2)
|
do j=1,ex(2)
|
||||||
do i=1,ex(1)
|
do i=1,ex(1)
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
if(i-3 >= imin .and. i+3 <= imax .and. &
|
if(i-3 >= imin .and. i+3 <= imax .and. &
|
||||||
j-3 >= jmin .and. j+3 <= jmax .and. &
|
j-3 >= jmin .and. j+3 <= jmax .and. &
|
||||||
k-3 >= kmin .and. k+3 <= kmax) then
|
k-3 >= kmin .and. k+3 <= kmax) then
|
||||||
|
|||||||
@@ -231,9 +231,8 @@ subroutine lopsided(ex,X,Y,Z,f,f_rhs,Sfx,Sfy,Sfz,Symmetry,SoA)
|
|||||||
|
|
||||||
call symmetry_bd(3,ex,f,fh,SoA)
|
call symmetry_bd(3,ex,f,fh,SoA)
|
||||||
|
|
||||||
! upper bound set ex-1 only for efficiency,
|
! upper bound set ex-1 only for efficiency,
|
||||||
! the loop body will set ex 0 also
|
! the loop body will set ex 0 also
|
||||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
|
||||||
do k=1,ex(3)-1
|
do k=1,ex(3)-1
|
||||||
do j=1,ex(2)-1
|
do j=1,ex(2)-1
|
||||||
do i=1,ex(1)-1
|
do i=1,ex(1)-1
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ C++FILES_GPU = ABE.o Ansorg.o Block.o misc.o monitor.o Parallel.o MPatch.o var.o
|
|||||||
|
|
||||||
F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
F90FILES = enforce_algebra.o fmisc.o initial_puncture.o prolongrestrict.o\
|
||||||
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
prolongrestrict_cell.o prolongrestrict_vertex.o\
|
||||||
rungekutta4_rout.o bssn_rhs.o diff_new.o kodiss.o kodiss_sh.o\
|
rungekutta4_rout.o bssn_rhs_opt.o bssn_rhs.o bssn_rhs_legacy.o diff_new.o kodiss.o kodiss_sh.o\
|
||||||
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
lopsidediff.o sommerfeld_rout.o getnp4.o diff_new_sh.o\
|
||||||
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
shellfunctions.o bssn_rhs_ss.o Set_Rho_ADM.o\
|
||||||
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
getnp4EScalar.o bssnEScalar_rhs.o bssn_constraint.o ricci_gamma.o\
|
||||||
|
|||||||
@@ -7,18 +7,19 @@
|
|||||||
filein = -I/usr/include/ -I${MKLROOT}/include
|
filein = -I/usr/include/ -I${MKLROOT}/include
|
||||||
|
|
||||||
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
## Using sequential MKL (OpenMP disabled for better single-threaded performance)
|
||||||
## Added -lifcore for Intel Fortran runtime and -limf for Intel math library
|
LDLIBS = -L/usr/lib/x86_64-linux-gnu -L/usr/lib64 -lifcore -limf -lmpi \
|
||||||
LDLIBS = -L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lifcore -limf -lpthread -lm -ldl -qopenmp
|
-L${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core \
|
||||||
|
-lpthread -lm -ldl
|
||||||
|
|
||||||
## Aggressive optimization flags:
|
## Aggressive optimization flags:
|
||||||
## -O3: Maximum optimization
|
## -O3: Maximum optimization
|
||||||
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
## -xHost: Optimize for the host CPU architecture (Intel/AMD compatible)
|
||||||
## -fp-model fast=2: Aggressive floating-point optimizations
|
## -fp-model fast=2: Aggressive floating-point optimizations
|
||||||
## -fma: Enable fused multiply-add instructions
|
## -fma: Enable fused multiply-add instructions
|
||||||
## OpenMP re-enabled for MPI+OpenMP hybrid parallelism (MKL stays sequential to avoid nested parallelism)
|
## Note: OpenMP has been disabled (-qopenmp removed) due to performance issues
|
||||||
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
|
CXXAPPFLAGS = -O3 -xHost -fp-model fast=2 -fma \
|
||||||
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
-Dfortran3 -Dnewc -I${MKLROOT}/include
|
||||||
f90appflags = -O3 -xHost -fp-model fast=2 -fma -qopenmp \
|
f90appflags = -O3 -xHost -fp-model fast=2 -fma \
|
||||||
-fpp -I${MKLROOT}/include
|
-fpp -I${MKLROOT}/include
|
||||||
f90 = ifx
|
f90 = ifx
|
||||||
f77 = ifx
|
f77 = ifx
|
||||||
|
|||||||
@@ -13,9 +13,13 @@ import subprocess
|
|||||||
|
|
||||||
## CPU core binding configuration using taskset
|
## CPU core binding configuration using taskset
|
||||||
## taskset ensures all child processes inherit the CPU affinity mask
|
## taskset ensures all child processes inherit the CPU affinity mask
|
||||||
NUMACTL_CPU_BIND = "taskset -c 0-111"
|
## This forces make and all compiler processes to use only nohz_full cores (4-55, 60-111)
|
||||||
|
## Format: taskset -c 4-55,60-111 ensures processes only run on these cores
|
||||||
|
NUMACTL_CPU_BIND = "taskset -c 4-55,60-111"
|
||||||
|
|
||||||
## Build parallelism configuration
|
## Build parallelism configuration
|
||||||
|
## Use nohz_full cores (4-55, 60-111) for compilation: 52 + 52 = 104 cores
|
||||||
|
## Set make -j to utilize available cores for faster builds
|
||||||
BUILD_JOBS = 104
|
BUILD_JOBS = 104
|
||||||
|
|
||||||
|
|
||||||
@@ -110,18 +114,12 @@ def run_ABE():
|
|||||||
print( )
|
print( )
|
||||||
|
|
||||||
## Define the command to run; cast other values to strings as needed
|
## Define the command to run; cast other values to strings as needed
|
||||||
## MPI+OpenMP hybrid: compute threads per rank from total cores / MPI ranks
|
|
||||||
omp_threads = max(1, 96 // input_data.MPI_processes)
|
|
||||||
omp_env = (f" -genv OMP_NUM_THREADS={omp_threads}"
|
|
||||||
f" -genv OMP_PROC_BIND=close"
|
|
||||||
f" -genv OMP_PLACES=cores"
|
|
||||||
f" -genv I_MPI_PIN_DOMAIN=omp")
|
|
||||||
|
|
||||||
if (input_data.GPU_Calculation == "no"):
|
if (input_data.GPU_Calculation == "no"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + omp_env + " ./ABE"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABE"
|
||||||
mpi_command_outfile = "ABE_out.log"
|
mpi_command_outfile = "ABE_out.log"
|
||||||
elif (input_data.GPU_Calculation == "yes"):
|
elif (input_data.GPU_Calculation == "yes"):
|
||||||
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + omp_env + " ./ABEGPU"
|
mpi_command = NUMACTL_CPU_BIND + " mpirun -np " + str(input_data.MPI_processes) + " ./ABEGPU"
|
||||||
mpi_command_outfile = "ABEGPU_out.log"
|
mpi_command_outfile = "ABEGPU_out.log"
|
||||||
|
|
||||||
## Execute the MPI command and stream output
|
## Execute the MPI command and stream output
|
||||||
|
|||||||
Reference in New Issue
Block a user