Fix load explosion: use subprocess for binary data plots to avoid thread conflict

Co-authored-by: copilot-swe-agent[bot] <198982749+copilot@users.noreply.github.com>
This commit is contained in:
2026-02-09 21:40:27 +08:00
parent 2d7ba5c60c
commit dd2443c926
3 changed files with 69 additions and 7 deletions

View File

@@ -8,6 +8,14 @@
##
##################################################################
## Guard against re-execution by multiprocessing child processes.
## Without this, using 'spawn' or 'forkserver' context would cause every
## worker to re-run the entire script, spawning exponentially more
## workers (fork bomb).
if __name__ != '__main__':
import sys as _sys
_sys.exit(0)
##################################################################

View File

@@ -8,6 +8,17 @@
##
#################################################
## Restrict BLAS/OpenMP to one thread per process so that running
## many workers in parallel does not create an O(workers * BLAS_threads)
## thread explosion. These variables MUST be set before numpy/scipy
## are imported, because the BLAS library reads them only at load time.
import os
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("VECLIB_MAXIMUM_THREADS", "1")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")
import numpy
import scipy
import matplotlib
@@ -18,8 +29,6 @@ from mpl_toolkits.mplot3d import Axes3D
## import torch
import AMSS_NCKU_Input as input_data
import os
#########################################################################################
@@ -194,3 +203,19 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ):
####################################################################################
####################################################################################
## Allow this module to be run as a standalone script so that each
## binary-data plot can be executed in a fresh subprocess whose BLAS
## environment variables (set above) take effect before numpy loads.
##
## Usage: python3 plot_binary_data.py <filename> <binary_outdir> <figure_outdir>
####################################################################################
if __name__ == '__main__':
import sys
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <filename> <binary_outdir> <figure_outdir>")
sys.exit(1)
plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3])

View File

@@ -17,7 +17,9 @@ import os ## operating system utilities
import plot_binary_data
import AMSS_NCKU_Input as input_data
from parallel_plot_helper import run_plot_tasks_parallel
import subprocess
import sys
import multiprocessing
# plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots
@@ -53,13 +55,40 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ):
file_list.append(x)
print(x)
## Plot each file in the list (in parallel)
plot_tasks = []
## Plot each file in parallel using subprocesses.
## Each subprocess is a fresh Python process where the BLAS thread-count
## environment variables (set at the top of plot_binary_data.py) take
## effect before numpy is imported. This avoids the thread explosion
## that occurs when multiprocessing.Pool with 'fork' context inherits
## already-initialized multi-threaded BLAS from the parent.
script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" )
max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0
running = []
failed = []
for filename in file_list:
print(filename)
plot_tasks.append( ( plot_binary_data.plot_binary_data, (filename, binary_outdir, figure_outdir) ) )
proc = subprocess.Popen(
[sys.executable, script, filename, binary_outdir, figure_outdir],
)
running.append( (proc, filename) )
## Keep at most max_workers subprocesses active at a time
if len(running) >= max_workers:
p, fn = running.pop(0)
p.wait()
if p.returncode != 0:
failed.append(fn)
run_plot_tasks_parallel(plot_tasks)
## Wait for all remaining subprocesses to finish
for p, fn in running:
p.wait()
if p.returncode != 0:
failed.append(fn)
if failed:
print( " WARNING: the following binary data plots failed:" )
for fn in failed:
print( " ", fn )
print( )
print( " Binary Data Plot Has been Finished " )