Add MPI+OpenMP hybrid parallelism (48 ranks x 2 threads) for full 96-core utilization
Enable OpenMP threading in finite-difference kernels (diff_new, diff_new_sh, diff_newwb, lopsidediff, kodiss, kodiss_sh) with collapse(3) directives on 36 triple-nested loops. Update build flags (-qopenmp), MPI process binding, and runtime configuration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -997,10 +997,11 @@
|
||||
fy = ZEO
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
#if 0
|
||||
#if 0
|
||||
! x direction
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
@@ -1151,10 +1152,11 @@
|
||||
|
||||
fx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
! x direction
|
||||
! x direction
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
! f(i-2) - 8 f(i-1) + 8 f(i+1) - f(i+2)
|
||||
@@ -1227,10 +1229,11 @@
|
||||
|
||||
fy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
! y direction
|
||||
! y direction
|
||||
if(j+2 <= jmax .and. j-2 >= jmin)then
|
||||
|
||||
fy(i,j,k)=d12dy*(fh(i,j-2,k)-EIT*fh(i,j-1,k)+EIT*fh(i,j+1,k)-fh(i,j+2,k))
|
||||
@@ -1297,10 +1300,11 @@
|
||||
|
||||
fz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
! z direction
|
||||
! z direction
|
||||
if(k+2 <= kmax .and. k-2 >= kmin)then
|
||||
|
||||
fz(i,j,k)=d12dz*(fh(i,j,k-2)-EIT*fh(i,j,k-1)+EIT*fh(i,j,k+1)-fh(i,j,k+2))
|
||||
@@ -1401,10 +1405,11 @@
|
||||
fxz = ZEO
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
#if 0
|
||||
#if 0
|
||||
!~~~~~~ fxx
|
||||
if(i+2 <= imax .and. i-2 >= imin)then
|
||||
!
|
||||
@@ -1576,6 +1581,7 @@
|
||||
|
||||
fxx = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1643,6 +1649,7 @@
|
||||
|
||||
fyy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1712,6 +1719,7 @@
|
||||
|
||||
fzz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1781,6 +1789,7 @@
|
||||
|
||||
fxy = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1851,6 +1860,7 @@
|
||||
|
||||
fxz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
@@ -1919,6 +1929,7 @@
|
||||
|
||||
fyz = ZEO
|
||||
|
||||
!$omp parallel do collapse(3) private(i,j,k) if(ex(1)*ex(2)*ex(3) > 4096)
|
||||
do k=1,ex(3)-1
|
||||
do j=1,ex(2)-1
|
||||
do i=1,ex(1)-1
|
||||
|
||||
Reference in New Issue
Block a user