From bcad2882fab499975f88c64a89ee44a1240283c7 Mon Sep 17 00:00:00 2001 From: jaunatisblue Date: Wed, 15 Apr 2026 21:15:10 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9E=84=E5=BB=BA=E5=9F=BA=E4=BA=8Eoneapi?= =?UTF-8?q?=E7=9A=84mpi4py=EF=BC=8Cquimb=E6=94=AF=E6=8C=81mpi=E5=A4=9A?= =?UTF-8?q?=E6=9C=BA=E5=B9=B6=E8=A1=8C=EF=BC=8C=E7=BC=A9=E7=9F=AD=E8=B7=AF?= =?UTF-8?q?=E5=BE=84=E6=89=BE=E5=AF=BB=E6=97=B6=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/hostfile | 2 ++ tests/quimb_mpi.py | 68 +++++++++++++++++++++++++++++++++++++ tests/quimb_mpi2.py | 81 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 tests/hostfile create mode 100644 tests/quimb_mpi.py create mode 100644 tests/quimb_mpi2.py diff --git a/tests/hostfile b/tests/hostfile new file mode 100644 index 0000000..4ac0300 --- /dev/null +++ b/tests/hostfile @@ -0,0 +1,2 @@ +192.168.20.102 +192.168.20.101 diff --git a/tests/quimb_mpi.py b/tests/quimb_mpi.py new file mode 100644 index 0000000..d4d251d --- /dev/null +++ b/tests/quimb_mpi.py @@ -0,0 +1,68 @@ +import os +import time +import numpy as np +import quimb.tensor as qtn +import cotengra as ctg +''' +# --- 1. 关键:在导入 numpy/quimb 之前设置环境变量 --- +# 告诉底层 BLAS 库 (MKL/OpenBLAS) 使用 96 个线程 +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" +os.environ["OPENBLAS_NUM_THREADS"] = "1" +# 优化线程亲和性,避免线程在不同 CPU 核心间跳变,提升缓存命中率 +os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0" +os.environ["KMP_BLOCKTIME"] = "0" +''' +# 现在导入库 +import psutil + +def run_baseline(n_qubits=50, depth=20): + print(f"🚀 {n_qubits} Qubits, Depth {depth}") + print(f"💻 Detected Logical Cores: {os.cpu_count()}") + + # 1. 构建电路 (必须 complex128 保证精度) + circ = qtn.Circuit(n_qubits, dtype=np.complex128) + for d in range(depth): + for i in range(n_qubits): + circ.apply_gate('H', i) + for i in range(0, n_qubits - 1, 2): + circ.apply_gate('CZ', i, i + 1) + + psi = circ.psi + + # 2. 构建闭合网络 + net = psi.conj() & psi + + # 3. 路径搜索参数 (Kahypar) + print("🔍 Searching path with Kahypar...") + opt = ctg.HyperOptimizer( + methods=['kahypar'], + max_repeats=128, + parallel=96, + minimize='flops', + on_trial_error='ignore' + ) + + # 4. 阶段1:路径搜索 + t0 = time.perf_counter() + tree = net.contraction_tree(optimize=opt) + t1 = time.perf_counter() + print(f"🔍 Path search done: {t1 - t0:.4f} s") + + # 5. 阶段2:张量收缩 + result = net.contract(optimize=tree, backend='numpy') + t2 = time.perf_counter() + peak_mem = psutil.Process().memory_info().rss / 1024**3 + + print(f"✅ Done!") + print(f"⏱️ Contract: {t2 - t1:.4f} s | Total: {t2 - t0:.4f} s") + print(f"💾 Peak Memory: {peak_mem:.2f} GB") + print(f"🔢 Result: {result:.10f}") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--n_qubits", type=int, default=50) + parser.add_argument("--depth", type=int, default=20) + args = parser.parse_args() + run_baseline(n_qubits=args.n_qubits, depth=args.depth) diff --git a/tests/quimb_mpi2.py b/tests/quimb_mpi2.py new file mode 100644 index 0000000..40c36d4 --- /dev/null +++ b/tests/quimb_mpi2.py @@ -0,0 +1,81 @@ +import time +import numpy as np +import quimb.tensor as qtn +import cotengra as ctg +from mpi4py import MPI + +comm = MPI.COMM_WORLD +rank = comm.Get_rank() +size = comm.Get_size() + +def run_mpi(n_qubits, depth): + if rank == 0: + print(f"MPI size: {size} ranks") + print(f"Circuit: {n_qubits} qubits, depth {depth}") + + # 1. 所有 rank 独立构建电路(避免广播大对象) + circ = qtn.Circuit(n_qubits, dtype=np.complex128) + for _ in range(depth): + for i in range(n_qubits): + circ.apply_gate('H', i) + for i in range(0, n_qubits - 1, 2): + circ.apply_gate('CZ', i, i + 1) + psi = circ.psi + net = psi.conj() & psi + + # 2. 所有 rank 并行搜索路径,rank 0 选全局最优 + t0 = time.perf_counter() + repeats_per_rank = max(1, 128 // size) + opt = ctg.HyperOptimizer( + methods=['kahypar'], + max_repeats=repeats_per_rank, + minimize='flops', + parallel=max(1, 96 // size), + ) + local_tree = net.contraction_tree(optimize=opt) + + all_trees = comm.gather(local_tree, root=0) + + if rank == 0: + tree = min(all_trees, key=lambda t: t.contraction_cost()) + t1 = time.perf_counter() + print(f"[rank 0] Path search: {t1 - t0:.4f} s") + else: + tree = None + + tree = comm.bcast(tree, root=0) + + # 3. rank 0 切片,broadcast sliced_tree + if rank == 0: + sliced_tree = tree.slice(target_size=2**27) + else: + sliced_tree = None + sliced_tree = comm.bcast(sliced_tree, root=0) + n_slices = sliced_tree.nslices + + if rank == 0: + print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size}") + + arrays = [t.data for t in net.tensors] + + # 每个 rank 处理自己负责的切片 + t2 = time.perf_counter() + local_result = 0.0 + 0.0j + for i in range(rank, n_slices, size): + local_result += sliced_tree.contract_slice(arrays, i, backend='numpy') + t3 = time.perf_counter() + + # 4. reduce 汇总到 rank 0 + total = comm.reduce(local_result, op=MPI.SUM, root=0) + + if rank == 0: + print(f"[rank 0] Contract: {t3 - t2:.4f} s") + print(f"Result: {total:.10f}") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--n_qubits", type=int, default=50) + parser.add_argument("--depth", type=int, default=20) + args = parser.parse_args() + run_mpi(args.n_qubits, args.depth)