Add OpenMP parallelization to Fortran compute kernels
Add !$omp parallel do collapse(2) directives to all triple-loop stencil kernels (fderivs, fdderivs, fdx/fdy/fdz, kodis, lopsided, enforce_ag/enforce_ga) across all ghost_width variants. Add !$omp parallel workshare to RK4/ICN/Euler whole-array update routines. Build system: add -qopenmp to compile and link flags, switch MKL from sequential to threaded (-lmkl_intel_thread -liomp5). Runtime: set OMP_NUM_THREADS=96, OMP_STACKSIZE=16M, OMP_PROC_BIND=close, OMP_PLACES=cores for 96-core server target. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -109,23 +109,33 @@
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + HLF * dT * f_rhs
|
||||
!$omp end parallel workshare
|
||||
|
||||
elseif(RK4 == 1 ) then
|
||||
|
||||
!$omp parallel workshare
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
|
||||
!$omp end parallel workshare
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + HLF * dT * f1
|
||||
!$omp end parallel workshare
|
||||
|
||||
elseif(RK4 == 2 ) then
|
||||
|
||||
!$omp parallel workshare
|
||||
f_rhs = f_rhs + TWO * f1
|
||||
|
||||
!$omp end parallel workshare
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + dT * f1
|
||||
!$omp end parallel workshare
|
||||
|
||||
elseif( RK4 == 3 ) then
|
||||
|
||||
|
||||
!$omp parallel workshare
|
||||
f1 = f0 +F1o6 * dT *(f1 + f_rhs)
|
||||
!$omp end parallel workshare
|
||||
|
||||
else
|
||||
|
||||
@@ -134,7 +144,7 @@
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
end subroutine rungekutta4_rout
|
||||
!-----------------------------------------------------------------------------
|
||||
@@ -215,15 +225,19 @@
|
||||
|
||||
if( RK4 == 0 ) then
|
||||
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + dT * f_rhs
|
||||
!$omp end parallel workshare
|
||||
|
||||
else
|
||||
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + HLF * dT * (f1+f_rhs)
|
||||
!$omp end parallel workshare
|
||||
|
||||
endif
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
end subroutine icn_rout
|
||||
!~~~~~~~~~~~~~~~~~~
|
||||
@@ -239,8 +253,10 @@
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(in) ::f_rhs
|
||||
real*8, dimension(ex(1),ex(2),ex(3)),intent(out) ::f1
|
||||
|
||||
!$omp parallel workshare
|
||||
f1 = f0 + dT * f_rhs
|
||||
!$omp end parallel workshare
|
||||
|
||||
return
|
||||
return
|
||||
|
||||
end subroutine euler_rout
|
||||
|
||||
Reference in New Issue
Block a user