Add MPI+OpenMP hybrid parallelism (48 ranks x 2 threads) for full 96-core utilization

Enable OpenMP threading in finite-difference kernels (diff_new, diff_new_sh, diff_newwb,
lopsidediff, kodiss, kodiss_sh) with collapse(3) directives on 36 triple-nested loops.
Update build flags (-qopenmp), MPI process binding, and runtime configuration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-06 15:53:15 +08:00
parent 223ec17a54
commit 4eb698f496
9 changed files with 65 additions and 27 deletions

View File

@@ -1019,10 +1019,11 @@
fy = ZEO
fz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
! x direction
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
@@ -1134,10 +1135,11 @@
fx = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
! x direction
! x direction
if(i+2 <= imax .and. i-2 >= imin)then
!
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
@@ -1227,10 +1229,11 @@
fy = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
! y direction
! y direction
if(j+2 <= jmax .and. j-2 >= jmin)then
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
@@ -1314,10 +1317,11 @@
fz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
! z direction
! z direction
if(k+2 <= kmax .and. k-2 >= kmin)then
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
@@ -1430,6 +1434,7 @@
fxz = ZEO
fyz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1580,6 +1585,7 @@
fxx = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1659,6 +1665,7 @@
fyy = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1740,6 +1747,7 @@
fzz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1821,6 +1829,7 @@
fxy = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1903,6 +1912,7 @@
fxz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)
@@ -1983,6 +1993,7 @@
fyz = ZEO
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
do k=1,ex(3)
do j=1,ex(2)
do i=1,ex(1)