"""Run the 34q/20L TN complex64 benchmark under torch.profiler briefly.""" from __future__ import annotations import argparse import os import signal import sys from pathlib import Path from mpi4py import MPI def main(): parser = argparse.ArgumentParser() parser.add_argument("--seconds", type=float, default=30.0) parser.add_argument("--out-dir", default="torch_profiles/tn_complex64") parser.add_argument("--torch-threads", type=int, default=48) args = parser.parse_args() repo_root = Path(__file__).resolve().parents[1] os.chdir(repo_root) sys.path.insert(0, str(repo_root)) import torch from torch.profiler import ProfilerActivity, profile comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() out_dir = Path(args.out_dir) if rank == 0: out_dir.mkdir(parents=True, exist_ok=True) comm.Barrier() torch.set_num_threads(args.torch_threads) def run_benchmark(): import benchmark_cpu_expectation sys.argv = [ "benchmark_cpu_expectation.py", "--mpi", "--ansatz", "tn", "--nqubits", "34", "--nlayers", "20", "--circuits", "rxx_rzz", "--pauli-pattern", "XZ", "--tn-load-tree", "trees/rxx_rzz_34q20l_s4.pkl", "--quimb-backend", "torch", "--torch-threads", str(args.torch_threads), "--dtype", "complex64", ] benchmark_cpu_expectation.main() trace_path = out_dir / f"rank{rank}_trace.json" stacks_path = out_dir / f"rank{rank}_stacks.txt" summary_path = out_dir / f"rank{rank}_summary.txt" prof = profile( activities=[ProfilerActivity.CPU], record_shapes=True, profile_memory=True, with_stack=True, ) class ProfileTimeout(Exception): pass def alarm_handler(signum, frame): raise ProfileTimeout() old_handler = signal.signal(signal.SIGALRM, alarm_handler) signal.setitimer(signal.ITIMER_REAL, args.seconds) try: with prof: try: run_benchmark() except ProfileTimeout: pass finally: signal.setitimer(signal.ITIMER_REAL, 0) signal.signal(signal.SIGALRM, old_handler) prof.export_chrome_trace(str(trace_path)) try: prof.export_stacks(str(stacks_path), "self_cpu_time_total") except Exception as exc: # pragma: no cover - diagnostic only stacks_path.write_text(f"export_stacks failed: {exc}\n", encoding="utf-8") summary = prof.key_averages(group_by_stack_n=5).table( sort_by="self_cpu_time_total", row_limit=40, ) summary_path.write_text(summary, encoding="utf-8") print( f"torch_profile_done rank={rank}/{size} " f"trace={trace_path} summary={summary_path}", flush=True, ) if __name__ == "__main__": main()