构建基于oneapi的mpi4py,quimb支持mpi多机并行,缩短路径找寻时间
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
This commit is contained in:
2
tests/hostfile
Normal file
2
tests/hostfile
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
192.168.20.102
|
||||||
|
192.168.20.101
|
||||||
68
tests/quimb_mpi.py
Normal file
68
tests/quimb_mpi.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
import quimb.tensor as qtn
|
||||||
|
import cotengra as ctg
|
||||||
|
'''
|
||||||
|
# --- 1. 关键:在导入 numpy/quimb 之前设置环境变量 ---
|
||||||
|
# 告诉底层 BLAS 库 (MKL/OpenBLAS) 使用 96 个线程
|
||||||
|
os.environ["OMP_NUM_THREADS"] = "1"
|
||||||
|
os.environ["MKL_NUM_THREADS"] = "1"
|
||||||
|
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
||||||
|
# 优化线程亲和性,避免线程在不同 CPU 核心间跳变,提升缓存命中率
|
||||||
|
os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
|
||||||
|
os.environ["KMP_BLOCKTIME"] = "0"
|
||||||
|
'''
|
||||||
|
# 现在导入库
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
def run_baseline(n_qubits=50, depth=20):
|
||||||
|
print(f"🚀 {n_qubits} Qubits, Depth {depth}")
|
||||||
|
print(f"💻 Detected Logical Cores: {os.cpu_count()}")
|
||||||
|
|
||||||
|
# 1. 构建电路 (必须 complex128 保证精度)
|
||||||
|
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||||
|
for d in range(depth):
|
||||||
|
for i in range(n_qubits):
|
||||||
|
circ.apply_gate('H', i)
|
||||||
|
for i in range(0, n_qubits - 1, 2):
|
||||||
|
circ.apply_gate('CZ', i, i + 1)
|
||||||
|
|
||||||
|
psi = circ.psi
|
||||||
|
|
||||||
|
# 2. 构建闭合网络 <psi|psi>
|
||||||
|
net = psi.conj() & psi
|
||||||
|
|
||||||
|
# 3. 路径搜索参数 (Kahypar)
|
||||||
|
print("🔍 Searching path with Kahypar...")
|
||||||
|
opt = ctg.HyperOptimizer(
|
||||||
|
methods=['kahypar'],
|
||||||
|
max_repeats=128,
|
||||||
|
parallel=96,
|
||||||
|
minimize='flops',
|
||||||
|
on_trial_error='ignore'
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. 阶段1:路径搜索
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
tree = net.contraction_tree(optimize=opt)
|
||||||
|
t1 = time.perf_counter()
|
||||||
|
print(f"🔍 Path search done: {t1 - t0:.4f} s")
|
||||||
|
|
||||||
|
# 5. 阶段2:张量收缩
|
||||||
|
result = net.contract(optimize=tree, backend='numpy')
|
||||||
|
t2 = time.perf_counter()
|
||||||
|
peak_mem = psutil.Process().memory_info().rss / 1024**3
|
||||||
|
|
||||||
|
print(f"✅ Done!")
|
||||||
|
print(f"⏱️ Contract: {t2 - t1:.4f} s | Total: {t2 - t0:.4f} s")
|
||||||
|
print(f"💾 Peak Memory: {peak_mem:.2f} GB")
|
||||||
|
print(f"🔢 Result: {result:.10f}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--n_qubits", type=int, default=50)
|
||||||
|
parser.add_argument("--depth", type=int, default=20)
|
||||||
|
args = parser.parse_args()
|
||||||
|
run_baseline(n_qubits=args.n_qubits, depth=args.depth)
|
||||||
81
tests/quimb_mpi2.py
Normal file
81
tests/quimb_mpi2.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
import quimb.tensor as qtn
|
||||||
|
import cotengra as ctg
|
||||||
|
from mpi4py import MPI
|
||||||
|
|
||||||
|
comm = MPI.COMM_WORLD
|
||||||
|
rank = comm.Get_rank()
|
||||||
|
size = comm.Get_size()
|
||||||
|
|
||||||
|
def run_mpi(n_qubits, depth):
|
||||||
|
if rank == 0:
|
||||||
|
print(f"MPI size: {size} ranks")
|
||||||
|
print(f"Circuit: {n_qubits} qubits, depth {depth}")
|
||||||
|
|
||||||
|
# 1. 所有 rank 独立构建电路(避免广播大对象)
|
||||||
|
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||||
|
for _ in range(depth):
|
||||||
|
for i in range(n_qubits):
|
||||||
|
circ.apply_gate('H', i)
|
||||||
|
for i in range(0, n_qubits - 1, 2):
|
||||||
|
circ.apply_gate('CZ', i, i + 1)
|
||||||
|
psi = circ.psi
|
||||||
|
net = psi.conj() & psi
|
||||||
|
|
||||||
|
# 2. 所有 rank 并行搜索路径,rank 0 选全局最优
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
repeats_per_rank = max(1, 128 // size)
|
||||||
|
opt = ctg.HyperOptimizer(
|
||||||
|
methods=['kahypar'],
|
||||||
|
max_repeats=repeats_per_rank,
|
||||||
|
minimize='flops',
|
||||||
|
parallel=max(1, 96 // size),
|
||||||
|
)
|
||||||
|
local_tree = net.contraction_tree(optimize=opt)
|
||||||
|
|
||||||
|
all_trees = comm.gather(local_tree, root=0)
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
tree = min(all_trees, key=lambda t: t.contraction_cost())
|
||||||
|
t1 = time.perf_counter()
|
||||||
|
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
|
||||||
|
else:
|
||||||
|
tree = None
|
||||||
|
|
||||||
|
tree = comm.bcast(tree, root=0)
|
||||||
|
|
||||||
|
# 3. rank 0 切片,broadcast sliced_tree
|
||||||
|
if rank == 0:
|
||||||
|
sliced_tree = tree.slice(target_size=2**27)
|
||||||
|
else:
|
||||||
|
sliced_tree = None
|
||||||
|
sliced_tree = comm.bcast(sliced_tree, root=0)
|
||||||
|
n_slices = sliced_tree.nslices
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size}")
|
||||||
|
|
||||||
|
arrays = [t.data for t in net.tensors]
|
||||||
|
|
||||||
|
# 每个 rank 处理自己负责的切片
|
||||||
|
t2 = time.perf_counter()
|
||||||
|
local_result = 0.0 + 0.0j
|
||||||
|
for i in range(rank, n_slices, size):
|
||||||
|
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
|
||||||
|
t3 = time.perf_counter()
|
||||||
|
|
||||||
|
# 4. reduce 汇总到 rank 0
|
||||||
|
total = comm.reduce(local_result, op=MPI.SUM, root=0)
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
print(f"[rank 0] Contract: {t3 - t2:.4f} s")
|
||||||
|
print(f"Result: {total:.10f}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--n_qubits", type=int, default=50)
|
||||||
|
parser.add_argument("--depth", type=int, default=20)
|
||||||
|
args = parser.parse_args()
|
||||||
|
run_mpi(args.n_qubits, args.depth)
|
||||||
Reference in New Issue
Block a user