From dd2443c9268d7b362662e46fe51b162fe13c690b Mon Sep 17 00:00:00 2001 From: ianchb Date: Mon, 9 Feb 2026 21:40:27 +0800 Subject: [PATCH] Fix load explosion: use subprocess for binary data plots to avoid thread conflict Co-authored-by: copilot-swe-agent[bot] <198982749+copilot@users.noreply.github.com> --- AMSS_NCKU_Program.py | 8 ++++++++ plot_binary_data.py | 29 +++++++++++++++++++++++++++-- plot_xiaoqu.py | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 69 insertions(+), 7 deletions(-) diff --git a/AMSS_NCKU_Program.py b/AMSS_NCKU_Program.py index 214f800..6a7952a 100755 --- a/AMSS_NCKU_Program.py +++ b/AMSS_NCKU_Program.py @@ -8,6 +8,14 @@ ## ################################################################## +## Guard against re-execution by multiprocessing child processes. +## Without this, using 'spawn' or 'forkserver' context would cause every +## worker to re-run the entire script, spawning exponentially more +## workers (fork bomb). +if __name__ != '__main__': + import sys as _sys + _sys.exit(0) + ################################################################## diff --git a/plot_binary_data.py b/plot_binary_data.py index 3ed947d..78b5a4c 100755 --- a/plot_binary_data.py +++ b/plot_binary_data.py @@ -8,6 +8,17 @@ ## ################################################# +## Restrict BLAS/OpenMP to one thread per process so that running +## many workers in parallel does not create an O(workers * BLAS_threads) +## thread explosion. These variables MUST be set before numpy/scipy +## are imported, because the BLAS library reads them only at load time. +import os +os.environ.setdefault("OMP_NUM_THREADS", "1") +os.environ.setdefault("OPENBLAS_NUM_THREADS", "1") +os.environ.setdefault("MKL_NUM_THREADS", "1") +os.environ.setdefault("VECLIB_MAXIMUM_THREADS", "1") +os.environ.setdefault("NUMEXPR_NUM_THREADS", "1") + import numpy import scipy import matplotlib @@ -18,8 +29,6 @@ from mpl_toolkits.mplot3d import Axes3D ## import torch import AMSS_NCKU_Input as input_data -import os - ######################################################################################### @@ -194,3 +203,19 @@ def get_data_xy( Rmin, Rmax, n, data0, time, figure_title, figure_outdir ): #################################################################################### + +#################################################################################### +## Allow this module to be run as a standalone script so that each +## binary-data plot can be executed in a fresh subprocess whose BLAS +## environment variables (set above) take effect before numpy loads. +## +## Usage: python3 plot_binary_data.py +#################################################################################### + +if __name__ == '__main__': + import sys + if len(sys.argv) != 4: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + plot_binary_data(sys.argv[1], sys.argv[2], sys.argv[3]) + diff --git a/plot_xiaoqu.py b/plot_xiaoqu.py index 6e843d0..47970cf 100755 --- a/plot_xiaoqu.py +++ b/plot_xiaoqu.py @@ -17,7 +17,9 @@ import os ## operating system utilities import plot_binary_data import AMSS_NCKU_Input as input_data -from parallel_plot_helper import run_plot_tasks_parallel +import subprocess +import sys +import multiprocessing # plt.rcParams['text.usetex'] = True ## enable LaTeX fonts in plots @@ -53,13 +55,40 @@ def generate_binary_data_plot( binary_outdir, figure_outdir ): file_list.append(x) print(x) - ## Plot each file in the list (in parallel) - plot_tasks = [] + ## Plot each file in parallel using subprocesses. + ## Each subprocess is a fresh Python process where the BLAS thread-count + ## environment variables (set at the top of plot_binary_data.py) take + ## effect before numpy is imported. This avoids the thread explosion + ## that occurs when multiprocessing.Pool with 'fork' context inherits + ## already-initialized multi-threaded BLAS from the parent. + script = os.path.join( os.path.dirname(__file__), "plot_binary_data.py" ) + max_workers = min( multiprocessing.cpu_count(), len(file_list) ) if file_list else 0 + + running = [] + failed = [] for filename in file_list: print(filename) - plot_tasks.append( ( plot_binary_data.plot_binary_data, (filename, binary_outdir, figure_outdir) ) ) + proc = subprocess.Popen( + [sys.executable, script, filename, binary_outdir, figure_outdir], + ) + running.append( (proc, filename) ) + ## Keep at most max_workers subprocesses active at a time + if len(running) >= max_workers: + p, fn = running.pop(0) + p.wait() + if p.returncode != 0: + failed.append(fn) - run_plot_tasks_parallel(plot_tasks) + ## Wait for all remaining subprocesses to finish + for p, fn in running: + p.wait() + if p.returncode != 0: + failed.append(fn) + + if failed: + print( " WARNING: the following binary data plots failed:" ) + for fn in failed: + print( " ", fn ) print( ) print( " Binary Data Plot Has been Finished " )